Fading Coder

One Final Commit for the Last Sprint

Home > Tech > Content

Support Vector Machine Implementation via Sequential Minimal Optimization

Tech 2

Kernel Function Definitions

The foundation of non-linear classification relies on mapping input data in to higher-dimensional spaces through kernel transformations. The following module defines three common kernels using a functional factory pattern.

import numpy as np

def build_linear_kernel():
    """Generates a dot-product kernel function."""
    def compute(vec_a, vec_b):
        return np.dot(vec_a, vec_b)
    return compute

def build_polynomial_kernel(degree=3, intercept=1.0):
    """Generates a polynomial kernel function."""
    def compute(vec_a, vec_b):
        return (np.dot(vec_a, vec_b) + intercept) ** degree
    return compute

def build_gaussian_kernel(sigma=1.0):
    """Generates a Radial Basis Function (RBF) kernel."""
    def compute(vec_a, vec_b):
        diff = np.asarray(vec_a) - np.asarray(vec_b)
        if diff.ndim <= 1:
            distance_sq = np.dot(diff, diff)
        else:
            distance_sq = np.sum(diff ** 2, axis=1)
        return np.exp(-distance_sq / (2.0 * sigma ** 2))
    return compute

SMO Optimization Engine

The core solver applies the Sequential Minimal Optimization strategy to solve the dual quadratic programming problem. It iteratively identifies pairs of Lagrange multipliers that violate the Karush-Kuhn-Tucker conditions and updates them analytically.

import copy
import random
import numpy as np
import matplotlib.pyplot as plt
from kernels import build_linear_kernel, build_polynomial_kernel, build_gaussian_kernel

class BinarySVM:
    def __init__(self, penalty=1.0, sigma=1.0, poly_deg=3, poly_c=1.0, 
                 k_type=None, eps=1e-3, alpha_tol=1e-5, iterations=500):
        self.penalty = penalty
        self.eps = eps
        self.alpha_tol = alpha_tol
        self.max_iters = iterations
        
        if k_type is None or k_type.lower() == 'linear':
            self.kernel = build_linear_kernel()
        elif k_type.lower() == 'poly':
            self.kernel = build_polynomial_kernel(poly_deg, poly_c)
        elif k_type.lower() == 'rbf':
            self.kernel = build_gaussian_kernel(sigma)
        else:
            self.kernel = build_linear_kernel()

        self.dual_vars = None
        self.error_cache = None
        self.weights = None
        self.intercept = 0.0
        self.support_idx = []
        self.loss_history = []

    def _compute_outputs(self, X):
        X = np.asarray(X)
        if not self.support_idx:
            return np.zeros(X.shape[0] if X.ndim > 1 else 1)
        
        sv_X = self.support_X
        sv_y = self.support_labels
        sv_alphas = self.support_alphas
        
        scores = np.zeros(len(X)) if X.ndim > 1 else 0.0
        for idx in range(len(self.support_idx)):
            scores += sv_alphas[idx] * sv_y[idx] * self.kernel(X, sv_X[idx])
        return scores + self.intercept

    def _check_violation(self, x_sample, y_label, alpha_val, weight):
        margin = y_label * self._compute_outputs(x_sample)
        if alpha_val < self.penalty * weight:
            return margin >= 1 - self.eps
        else:
            return margin <= 1 + self.eps

    def _select_partner(self, primary_idx):
        active = [j for j in range(len(self.dual_vars)) 
                  if self.dual_vars[j] > self.alpha_tol and j != primary_idx]
        if active:
            diffs = np.abs(self.error_cache[primary_idx] - self.error_cache[active])
            return active[np.argmax(diffs)]
        indices = list(range(len(self.dual_vars)))
        indices.remove(primary_idx)
        return random.choice(indices)

    def _bound_alpha(self, yi, yj, unc_j, old_i, old_j, weight_j):
        C_val = self.penalty * weight_j
        if yi == yj:
            low = max(0, old_i + old_j - C_val)
            high = min(C_val, old_i + old_j)
        else:
            low = max(0, old_j - old_i)
            high = min(C_val, C_val + old_j - old_i)
        return low if unc_j < low else high if unc_j > high else unc_j

    def _update_losses(self, X, true_y, mapped_y):
        preds = self._compute_outputs(X)
        self.error_cache = preds - mapped_y
        sig = lambda v: 1 / (1 + np.exp(-np.clip(v, -50, 30)))
        prob = sig(preds)
        entropy = -(true_y.T @ np.log(prob + 1e-12) + (1 - true_y).T @ np.log(1 - prob + 1e-12))
        self.loss_history.append(entropy / len(mapped_y))

    def train(self, X, y, sample_w=None):
        X, y = np.asarray(X), np.asarray(y)
        if sample_w is None:
            sample_w = np.ones(len(X))
        
        unique = np.unique(y)
        if len(unique) != 2 or sorted(unique) != [0, 1]:
            raise ValueError("Target labels must be exactly 0 or 1.")
            
        mapped_y = y.copy().astype(float)
        mapped_y[mapped_y == 0] = -1.0
        
        self.weights = np.zeros(X.shape[1])
        self.intercept = 0.0
        self.dual_vars = np.zeros(len(X))
        self.support_X = np.empty((0, X.shape[1]))
        self.support_labels = np.empty(0)
        self.support_alphas = np.empty(0)
        self.support_idx = []
        
        self._compute_outputs(X) # Initialize errors
        
        for epoch in range(self.max_iters):
            converged = True
            for i in range(len(X)):
                x_i, y_i = X[i], mapped_y[i]
                a_i_old, err_i = self.dual_vars[i], self.error_cache[i]
                
                if not self._check_violation(x_i, y_i, a_i_old, sample_w[i]):
                    converged = False
                    j = self._select_partner(i)
                    x_j, y_j = X[j], mapped_y[j]
                    a_j_old, err_j = self.dual_vars[j], self.error_cache[j]
                    
                    K_ii = self.kernel(x_i, x_i)
                    K_jj = self.kernel(x_j, x_j)
                    K_ij = self.kernel(x_i, x_j)
                    
                    eta = K_ii + K_jj - 2 * K_ij
                    if abs(eta) < 1e-6:
                        continue
                        
                    unc_a_j = a_j_old - y_j * (err_j - err_i) / eta
                    a_j_new = self._bound_alpha(y_i, y_j, unc_a_j, a_i_old, a_j_old, sample_w[j])
                    
                    if abs(a_j_new - a_j_old) < 1e-6:
                        continue
                        
                    a_i_new = a_i_old - y_i * y_j * (a_j_new - a_j_old)
                    self.dual_vars[i] = a_i_new
                    self.dual_vars[j] = a_j_new
                    
                    b1 = self.intercept - err_i - y_i * K_ii * (a_i_new - a_i_old) - y_j * K_ij * (a_j_new - a_j_old)
                    b2 = self.intercept - err_j - y_i * K_ij * (a_i_new - a_i_old) - y_j * K_jj * (a_j_new - a_j_old)
                    
                    if 0 < a_i_new < self.penalty * sample_w[i]:
                        self.intercept = b1
                    elif 0 < a_j_new < self.penalty * sample_w[j]:
                        self.intercept = b2
                    else:
                        self.intercept = (b1 + b2) / 2.0
                        
                    self._update_losses(X, y, mapped_y)
                    
                    sv_mask = self.dual_vars > self.alpha_tol
                    self.support_idx = np.where(sv_mask)[0]
                    self.support_X = X[sv_mask]
                    self.support_labels = mapped_y[sv_mask]
                    self.support_alphas = self.dual_vars[sv_mask]
                    
            if converged:
                break

    def predict(self, X_test):
        X_test = np.asarray(X_test)
        raw = self._compute_outputs(X_test)
        probs = 1 / (1 + np.exp(-np.clip(raw, -50, 30)))
        return (probs >= 0.5).astype(int)

    def plot_boundary(self, X, y, show=True, draw_margin=False):
        X, y = np.asarray(X), np.asarray(y)
        if X.shape[1] != 2:
            return
            
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
        Z = self.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
        
        if show: plt.figure(figsize=(8, 6))
        plt.contourf(xx, yy, Z, alpha=0.2, cmap='winter')
        plt.scatter(X[y==1][:, 0], X[y==1][:, 1], marker='*', label='Class 1')
        plt.scatter(X[y==0][:, 0], X[y==0][:, 1], marker='x', label='Class 0')
        
        if self.support_idx.size > 0:
            plt.scatter(self.support_X[:, 0], self.support_X[:, 1], s=100, 
                        facecolors='none', edgecolors='k', linewidths=1.5, label='Support Vectors')
        if draw_margin and (self.kernel == build_linear_kernel()):
            xi = np.linspace(x_min, x_max, 100)
            boundary = -(self.weights[0] * xi + self.intercept) / self.weights[1]
            margin = 1 / np.linalg.norm(self.weights)
            plt.plot(xi, boundary, 'r-', label='Hyperplane')
            plt.plot(xi, boundary + margin/np.abs(self.weights[1]), 'k--')
            plt.plot(xi, boundary - margin/np.abs(self.weights[1]), 'k--')
            
        plt.xlim(x_min, x_max)
        plt.ylim(y_min, y_max)
        plt.legend(frameon=False)
        if show: plt.show()

    def plot_convergence(self, show=True):
        if not self.loss_history: return
        if show: plt.figure(figsize=(8, 4))
        plt.plot(self.loss_history, linewidth=1.2)
        plt.xlabel('Iterations')
        plt.ylabel('Mean Cross-Entropy Loss')
        plt.title('Optimization Convergence')
        plt.grid(alpha=0.3)
        if show: plt.show()

Model Evaluation and Visualization

The following scripts demonstrate training scenarios with varying regularization strengths and kernel configurations. The first example contrasts hard-margin and soft-margin behavior on linear separable data, while the second appleis non-linear kernels to complex distributions.

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification, make_moons
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from svm_engine import BinarySVM

# Linear separability and regularization comparison
X_lin, y_lin = make_classification(n_samples=250, n_features=2, n_classes=2, 
                                   n_informative=1, class_sep=1.8, random_state=9)
X_tr, X_te, y_tr, y_te = train_test_split(X_lin, y_lin, test_size=0.2, random_state=5)

model_hard = BinarySVM(penalty=1000.0)
model_hard.train(X_tr, y_tr)
print(classification_report(y_te, model_hard.predict(X_te)))

model_soft = BinarySVM(penalty=1.0)
model_soft.train(X_tr, y_tr)
print(classification_report(y_te, model_soft.predict(X_te)))

plt.figure(figsize=(12, 5))
plt.subplot(121)
model_hard.plot_boundary(X_tr, y_tr, show=False, draw_margin=True)
plt.title('Hard Margin SVM')
plt.subplot(122)
model_soft.plot_boundary(X_tr, y_tr, show=False, draw_margin=True)
plt.title('Soft Margin SVM')
plt.tight_layout()
plt.show()

# Non-linear kernel evaluation on complex manifolds
X_moon, y_moon = make_moons(n_samples=300, noise=0.08, random_state=12)
X_tr_m, X_te_m, y_tr_m, y_te_m = train_test_split(X_moon, y_moon, test_size=0.25, random_state=7)

clf_rbf = BinarySVM(penalty=15.0, sigma=0.6, k_type='rbf')
clf_rbf.train(X_tr_m, y_tr_m)
print(classification_report(y_te_m, clf_rbf.predict(X_te_m)))

clf_poly = BinarySVM(penalty=15.0, poly_deg=3, k_type='poly')
clf_poly.train(X_tr_m, y_tr_m)
print(classification_report(y_te_m, clf_poly.predict(X_te_m)))

plt.figure(figsize=(12, 5))
plt.subplot(121)
clf_rbf.plot_boundary(X_tr_m, y_tr_m, show=False)
plt.title('RBF Kernel Decision Region')
plt.subplot(122)
clf_poly.plot_boundary(X_tr_m, y_tr_m, show=False)
plt.title('Polynomial Kernel Decision Region')
plt.tight_layout()
plt.show()

Related Articles

Understanding Strong and Weak References in Java

Strong References Strong reference are the most prevalent type of object referencing in Java. When an object has a strong reference pointing to it, the garbage collector will not reclaim its memory. F...

Comprehensive Guide to SSTI Explained with Payload Bypass Techniques

Introduction Server-Side Template Injection (SSTI) is a vulnerability in web applications where user input is improper handled within the template engine and executed on the server. This exploit can r...

Implement Image Upload Functionality for Django Integrated TinyMCE Editor

Django’s Admin panel is highly user-friendly, and pairing it with TinyMCE, an effective rich text editor, simplifies content management significantly. Combining the two is particular useful for bloggi...

Leave a Comment

Anonymous

◎Feel free to join the discussion and share your thoughts.