Quick Info

Category: String Matching
Time Complexity: O(n + m)
Space Complexity: O(m)
Input Type: String

KMP (Knuth-Morris-Pratt) Algorithm

Description

The Knuth-Morris-Pratt (KMP) algorithm is an efficient string matching algorithm that finds occurrences of a pattern string within a main text string. It minimizes comparisons by utilizing information about previous matches.

How It Works

  1. Preprocessing Phase:
    • Build a Longest Proper Prefix which is also Suffix (LPS) array
    • LPS[i] contains length of longest proper prefix that is also a suffix for pattern[0..i]
  2. Searching Phase:
    • Use two pointers: one for text (i) and one for pattern (j)
    • If characters match, increment both pointers
    • If mismatch occurs, use LPS array to skip redundant comparisons

Visualization

Enter text and pattern to begin

Complexity Analysis

Time Complexity

  • Preprocessing: O(m)
    • Building LPS array for pattern of length m
  • Searching: O(n)
    • n is the length of the text
    • Each character in text is compared at most once
  • Overall: O(n + m)

Space Complexity

  • LPS Array: O(m)
    • Stores pattern prefix information
    • m is the length of the pattern

Advantages and Disadvantages

Advantages

  • Linear time complexity
  • No backtracking in text
  • Efficient for multiple pattern searches
  • Works well with repetitive patterns

Disadvantages

  • Requires preprocessing of pattern
  • Extra space for LPS array
  • Complex implementation
  • May be overkill for simple searches

def compute_lps(pattern):
    """Compute Longest Proper Prefix which is also Suffix array."""
    m = len(pattern)
    lps = [0] * m  # Initialize LPS array with zeros
    
    length = 0  # Length of previous longest prefix suffix
    i = 1       # Start from second character
    
    while i < m:
        if pattern[i] == pattern[length]:
            length += 1
            lps[i] = length
            i += 1
        else:
            if length != 0:
                # Try matching with shorter prefix
                length = lps[length - 1]
            else:
                # No matching prefix found
                lps[i] = 0
                i += 1
    
    return lps

def kmp_search(text, pattern):
    """Search for pattern in text using KMP algorithm."""
    if not pattern or not text:
        return []
    
    n = len(text)
    m = len(pattern)
    matches = []
    
    # Compute LPS array
    lps = compute_lps(pattern)
    
    i = 0  # Index for text
    j = 0  # Index for pattern
    
    while i < n:
        if pattern[j] == text[i]:
            i += 1
            j += 1
        
        if j == m:
            # Pattern found at index i-j
            matches.append(i - j)
            # Look for next occurrence
            j = lps[j - 1]
        
        elif i < n and pattern[j] != text[i]:
            if j != 0:
                # Use LPS array to skip comparisons
                j = lps[j - 1]
            else:
                # No match, move to next character in text
                i += 1
    
    return matches

# Example usage
if __name__ == "__main__":
    text = "ABABDABACDABABCABAB"
    pattern = "ABABCABAB"
    
    matches = kmp_search(text, pattern)
    
    if matches:
        print(f"Pattern found at indices: {matches}")
    else:
        print("Pattern not found")

#include <vector>
#include <string>
#include <iostream>

class KMP {
private:
    std::vector<int> computeLPS(const std::string& pattern) {
        int m = pattern.length();
        std::vector<int> lps(m, 0);
        
        int length = 0;
        int i = 1;
        
        while (i < m) {
            if (pattern[i] == pattern[length]) {
                length++;
                lps[i] = length;
                i++;
            } else {
                if (length != 0) {
                    length = lps[length - 1];
                } else {
                    lps[i] = 0;
                    i++;
                }
            }
        }
        
        return lps;
    }

public:
    std::vector<int> search(const std::string& text, const std::string& pattern) {
        std::vector<int> matches;
        if (pattern.empty() || text.empty()) {
            return matches;
        }
        
        int n = text.length();
        int m = pattern.length();
        
        // Compute LPS array
        std::vector<int> lps = computeLPS(pattern);
        
        int i = 0;  // Index for text
        int j = 0;  // Index for pattern
        
        while (i < n) {
            if (pattern[j] == text[i]) {
                i++;
                j++;
            }
            
            if (j == m) {
                matches.push_back(i - j);
                j = lps[j - 1];
            }
            else if (i < n && pattern[j] != text[i]) {
                if (j != 0) {
                    j = lps[j - 1];
                } else {
                    i++;
                }
            }
        }
        
        return matches;
    }
};

int main() {
    std::string text = "ABABDABACDABABCABAB";
    std::string pattern = "ABABCABAB";
    
    KMP kmp;
    std::vector<int> matches = kmp.search(text, pattern);
    
    if (!matches.empty()) {
        std::cout << "Pattern found at indices: ";
        for (int index : matches) {
            std::cout << index << " ";
        }
        std::cout << std::endl;
    } else {
        std::cout << "Pattern not found" << std::endl;
    }
    
    return 0;
}

public class KMP
{
    private int[] ComputeLPS(string pattern)
    {
        int m = pattern.Length;
        int[] lps = new int[m];
        
        int length = 0;
        int i = 1;
        
        while (i < m)
        {
            if (pattern[i] == pattern[length])
            {
                length++;
                lps[i] = length;
                i++;
            }
            else
            {
                if (length != 0)
                {
                    length = lps[length - 1];
                }
                else
                {
                    lps[i] = 0;
                    i++;
                }
            }
        }
        
        return lps;
    }
    
    public List<int> Search(string text, string pattern)
    {
        var matches = new List<int>();
        
        if (string.IsNullOrEmpty(pattern) || string.IsNullOrEmpty(text))
        {
            return matches;
        }
        
        int n = text.Length;
        int m = pattern.Length;
        
        // Compute LPS array
        int[] lps = ComputeLPS(pattern);
        
        int i = 0;  // Index for text
        int j = 0;  // Index for pattern
        
        while (i < n)
        {
            if (pattern[j] == text[i])
            {
                i++;
                j++;
            }
            
            if (j == m)
            {
                matches.Add(i - j);
                j = lps[j - 1];
            }
            else if (i < n && pattern[j] != text[i])
            {
                if (j != 0)
                {
                    j = lps[j - 1];
                }
                else
                {
                    i++;
                }
            }
        }
        
        return matches;
    }
    
    public static void Main()
    {
        string text = "ABABDABACDABABCABAB";
        string pattern = "ABABCABAB";
        
        var kmp = new KMP();
        var matches = kmp.Search(text, pattern);
        
        if (matches.Count > 0)
        {
            Console.WriteLine($"Pattern found at indices: {string.Join(" ", matches)}");
        }
        else
        {
            Console.WriteLine("Pattern not found");
        }
    }
}