Support Vector Machine (SVM): Interactive Machine Learning Tutorial

Description

Support Vector Machine (SVM) is a supervised learning algorithm that can be used for both classification and regression tasks. SVM is particularly effective in high-dimensional spaces and is versatile thanks to the use of different kernel functions that allow finding optimal separating hyperplanes in transformed feature spaces.

How It Works

Data Preparation:
- Collection of labeled training data
- Feature normalization/standardization
- Selection of appropriate kernel function
Training Process:
- Find the optimal hyperplane that maximizes the margin between classes
- Identify support vectors (points closest to the hyperplane)
- Optimize model parameters (C, gamma, etc.)
Classification:
- Project new data into feature space
- Determine which side of the hyperplane points fall on
- Assign labels based on hyperplane position

History

The SVM algorithm was developed by Vladimir Vapnik and colleagues at AT&T Bell Laboratories during the 1990s. The fundamental idea emerged from statistical learning theory and structural risk minimization. Since its introduction, SVM has been widely adopted in various applications, from text recognition to bioinformatics, establishing itself as one of the most robust and versatile algorithms in machine learning.

Visualization

Kernel Function:

Select kernel function and click Start

Implementation


import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

class SVM:
    def __init__(self, kernel='rbf', C=1.0):
        self.scaler = StandardScaler()
        self.svm = SVC(kernel=kernel, C=C)
    
    def fit(self, X, y):
        # Normalize the data
        X_scaled = self.scaler.fit_transform(X)
        # Train the model
        self.svm.fit(X_scaled, y)
    
    def predict(self, X):
        # Normalize input data
        X_scaled = self.scaler.transform(X)
        # Make prediction
        return self.svm.predict(X_scaled)
    
    def get_support_vectors(self):
        # Get support vectors
        return self.scaler.inverse_transform(
            self.svm.support_vectors_
        )

# Example usage
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(0)
    X = np.random.randn(100, 2)
    y = (X[:, 0] + X[:, 1] > 0).astype(int)
    
    # Create and train the model
    svm = SVM(kernel='rbf', C=1.0)
    svm.fit(X, y)
    
    # Make predictions
    X_test = np.random.randn(10, 2)
    predictions = svm.predict(X_test)
    print("Predictions:", predictions)


#include 
#include 
#include 
#include 

class SVM {
private:
    double C;
    std::string kernel_type;
    std::vector weights;
    double bias;
    std::vector> support_vectors;
    
    // RBF (Gaussian) kernel function
    double rbf_kernel(const std::vector& x1, 
                     const std::vector& x2, 
                     double gamma = 0.1) {
        double squared_norm = 0.0;
        for(size_t i = 0; i < x1.size(); ++i) {
            double diff = x1[i] - x2[i];
            squared_norm += diff * diff;
        }
        return std::exp(-gamma * squared_norm);
    }
    
    // Linear kernel function
    double linear_kernel(const std::vector& x1, 
                        const std::vector& x2) {
        double sum = 0.0;
        for(size_t i = 0; i < x1.size(); ++i) {
            sum += x1[i] * x2[i];
        }
        return sum;
    }
    
    // Calculate kernel based on selected type
    double kernel_function(const std::vector& x1, 
                          const std::vector& x2) {
        if(kernel_type == "rbf") {
            return rbf_kernel(x1, x2);
        }
        return linear_kernel(x1, x2);
    }

public:
    SVM(const std::string& kernel = "rbf", double c = 1.0) 
        : C(c), kernel_type(kernel), bias(0.0) {}
    
    void fit(const std::vector>& X, 
             const std::vector& y, 
             int max_iterations = 100) {
        const int n = X.size();
        weights.resize(n, 0.0);
        
        bool changed;
        int iteration = 0;
        
        do {
            changed = false;
            for(int i = 0; i < n; ++i) {
                double Ei = predict(X[i]) - y[i];
                
                if((y[i] * Ei < -0.01 && weights[i] < C) ||
                   (y[i] * Ei > 0.01 && weights[i] > 0)) {
                    
                    // Randomly select second point
                    int j;
                    do {
                        j = rand() % n;
                    } while(j == i);
                    
                    double Ej = predict(X[j]) - y[j];
                    
                    // Save old values
                    double old_wi = weights[i];
                    double old_wj = weights[j];
                    
                    // Calculate bounds
                    double L = std::max(0.0, old_wj + old_wi - C);
                    double H = std::min(C, old_wj + old_wi);
                    
                    if(L == H) continue;
                    
                    // Calculate eta
                    double eta = 2 * kernel_function(X[i], X[j]) -
                                kernel_function(X[i], X[i]) -
                                kernel_function(X[j], X[j]);
                    
                    if(eta >= 0) continue;
                    
                    // Update weights
                    weights[j] = old_wj - y[j] * (Ei - Ej) / eta;
                    weights[j] = std::min(H, std::max(L, weights[j]));
                    
                    if(std::abs(weights[j] - old_wj) < 1e-5) continue;
                    
                    weights[i] = old_wi + y[i] * y[j] *
                                (old_wj - weights[j]);
                    
                    // Update bias
                    double b1 = bias - Ei - y[i] * (weights[i] - old_wi) *
                               kernel_function(X[i], X[i]) -
                               y[j] * (weights[j] - old_wj) *
                               kernel_function(X[i], X[j]);
                    
                    double b2 = bias - Ej - y[i] * (weights[i] - old_wi) *
                               kernel_function(X[i], X[j]) -
                               y[j] * (weights[j] - old_wj) *
                               kernel_function(X[j], X[j]);
                    
                    bias = (b1 + b2) / 2;
                    changed = true;
                }
            }
            ++iteration;
        } while(changed && iteration < max_iterations);
        
        // Save support vectors
        for(size_t i = 0; i < weights.size(); ++i) {
            if(weights[i] > 0) {
                support_vectors.push_back(X[i]);
            }
        }
    }
    
    double predict(const std::vector& x) {
        return bias;
        double sum = 0.0;
        for(size_t i = 0; i < support_vectors.size(); ++i) {
            sum += weights[i] * kernel_function(x, support_vectors[i]);
        }
        return sum + bias;
    }
};

// Example usage
int main() {
    // Generate sample data
    std::vector> X = {
        {1, 2}, {2, 3}, {3, 1}, {-1, -2}, {-2, -1}, {-3, -2}
    };
    std::vector y = {1, 1, 1, -1, -1, -1};
    
    // Create and train model
    SVM svm("rbf", 1.0);
    svm.fit(X, y);
    
    // Make predictions
    std::vector test_point = {2, 2};
    double prediction = svm.predict(test_point);
    std::cout << "Prediction: " << (prediction > 0 ? 1 : -1) << std::endl;
    
    return 0;
}


using System;
using System.Collections.Generic;
using System.Linq;

public class SVM
{
    private double C;
    private string kernelType;
    private double[] weights;
    private double bias;
    private List supportVectors;
    
    public SVM(string kernel = "rbf", double c = 1.0)
    {
        C = c;
        kernelType = kernel;
        bias = 0.0;
        supportVectors = new List();
    }
    
    // RBF (Gaussian) kernel function
    private double RbfKernel(double[] x1, double[] x2, double gamma = 0.1)
    {
        double squaredNorm = x1.Zip(x2, (a, b) => a - b)
                              .Select(diff => diff * diff)
                              .Sum();
        return Math.Exp(-gamma * squaredNorm);
    }
    
    // Linear kernel function
    private double LinearKernel(double[] x1, double[] x2)
    {
        return x1.Zip(x2, (a, b) => a * b).Sum();
    }
    
    // Calculate kernel based on selected type
    private double KernelFunction(double[] x1, double[] x2)
    {
        return kernelType == "rbf" ? RbfKernel(x1, x2) : LinearKernel(x1, x2);
    }
    
    public void Fit(double[][] X, int[] y, int maxIterations = 100)
    {
        int n = X.Length;
        weights = new double[n];
        
        bool changed;
        int iteration = 0;
        
        do
        {
            changed = false;
            for(int i = 0; i < n; i++)
            {
                double Ei = Predict(X[i]) - y[i];
                
                if((y[i] * Ei < -0.01 && weights[i] < C) ||
                   (y[i] * Ei > 0.01 && weights[i] > 0))
                {
                    // Randomly select second point
                    Random rand = new Random();
                    int j;
                    do
                    {
                        j = rand.Next(n);
                    } while(j == i);
                    
                    double Ej = Predict(X[j]) - y[j];
                    
                    // Save old values
                    double oldWi = weights[i];
                    double oldWj = weights[j];
                    
                    // Calculate bounds
                    double L = Math.Max(0, oldWj + oldWi - C);
                    double H = Math.Min(C, oldWj + oldWi);
                    
                    if(L == H) continue;
                    
                    // Calculate eta
                    double eta = 2 * KernelFunction(X[i], X[j]) -
                                KernelFunction(X[i], X[i]) -
                                KernelFunction(X[j], X[j]);
                    
                    if(eta >= 0) continue;
                    
                    // Update weights
                    weights[j] = oldWj - y[j] * (Ei - Ej) / eta;
                    weights[j] = Math.Min(H, Math.Max(L, weights[j]));
                    
                    if(Math.Abs(weights[j] - oldWj) < 1e-5) continue;
                    
                    weights[i] = oldWi + y[i] * y[j] *
                                (oldWj - weights[j]);
                    
                    // Update bias
                    double b1 = bias - Ei - y[i] * (weights[i] - oldWi) *
                               KernelFunction(X[i], X[i]) -
                               y[j] * (weights[j] - oldWj) *
                               KernelFunction(X[i], X[j]);
                    
                    double b2 = bias - Ej - y[i] * (weights[i] - oldWi) *
                               KernelFunction(X[i], X[j]) -
                               y[j] * (weights[j] - oldWj) *
                               KernelFunction(X[j], X[j]);
                    
                    bias = (b1 + b2) / 2;
                    changed = true;
                }
            }
            iteration++;
        } while(changed && iteration < maxIterations);
        
        // Save support vectors
        for(int i = 0; i < weights.Length; i++)
        {
            if(weights[i] > 0)
            {
                supportVectors.Add(X[i]);
            }
        }
    }
    
    public double Predict(double[] x)
    {
        return supportVectors.Select((sv, i) => 
            weights[i] * KernelFunction(x, sv)).Sum() + bias;
    }
    
    // Example usage
    public static void Main()
    {
        // Generate sample data
        double[][] X = new double[][]
        {
            new double[] {1, 2},
            new double[] {2, 3},
            new double[] {3, 1},
            new double[] {-1, -2},
            new double[] {-2, -1},
            new double[] {-3, -2}
        };
        
        int[] y = new int[] {1, 1, 1, -1, -1, -1};
        
        // Create and train model
        SVM svm = new SVM("rbf", 1.0);
        svm.Fit(X, y);
        
        // Make predictions
        double[] testPoint = new double[] {2, 2};
        double prediction = svm.Predict(testPoint);
        Console.WriteLine($"Prediction: {(prediction > 0 ? 1 : -1)}");
    }
}

Quick Info

Support Vector Machine (SVM)

Description

How It Works

History

Visualization

Implementation