KMP (Knuth-Morris-Pratt) Algorithm
Description
The Knuth-Morris-Pratt (KMP) algorithm is an efficient string matching algorithm that finds occurrences of a pattern string within a main text string. It minimizes comparisons by utilizing information about previous matches.
How It Works
- Preprocessing Phase:
- Build a Longest Proper Prefix which is also Suffix (LPS) array
- LPS[i] contains length of longest proper prefix that is also a suffix for pattern[0..i]
- Searching Phase:
- Use two pointers: one for text (i) and one for pattern (j)
- If characters match, increment both pointers
- If mismatch occurs, use LPS array to skip redundant comparisons
Visualization
Enter text and pattern to begin
Complexity Analysis
Time Complexity
-
Preprocessing: O(m)
- Building LPS array for pattern of length m
-
Searching: O(n)
- n is the length of the text
- Each character in text is compared at most once
- Overall: O(n + m)
Space Complexity
-
LPS Array: O(m)
- Stores pattern prefix information
- m is the length of the pattern
Advantages and Disadvantages
Advantages
- Linear time complexity
- No backtracking in text
- Efficient for multiple pattern searches
- Works well with repetitive patterns
Disadvantages
- Requires preprocessing of pattern
- Extra space for LPS array
- Complex implementation
- May be overkill for simple searches
def compute_lps(pattern):
"""Compute Longest Proper Prefix which is also Suffix array."""
m = len(pattern)
lps = [0] * m # Initialize LPS array with zeros
length = 0 # Length of previous longest prefix suffix
i = 1 # Start from second character
while i < m:
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
# Try matching with shorter prefix
length = lps[length - 1]
else:
# No matching prefix found
lps[i] = 0
i += 1
return lps
def kmp_search(text, pattern):
"""Search for pattern in text using KMP algorithm."""
if not pattern or not text:
return []
n = len(text)
m = len(pattern)
matches = []
# Compute LPS array
lps = compute_lps(pattern)
i = 0 # Index for text
j = 0 # Index for pattern
while i < n:
if pattern[j] == text[i]:
i += 1
j += 1
if j == m:
# Pattern found at index i-j
matches.append(i - j)
# Look for next occurrence
j = lps[j - 1]
elif i < n and pattern[j] != text[i]:
if j != 0:
# Use LPS array to skip comparisons
j = lps[j - 1]
else:
# No match, move to next character in text
i += 1
return matches
# Example usage
if __name__ == "__main__":
text = "ABABDABACDABABCABAB"
pattern = "ABABCABAB"
matches = kmp_search(text, pattern)
if matches:
print(f"Pattern found at indices: {matches}")
else:
print("Pattern not found")
#include <vector>
#include <string>
#include <iostream>
class KMP {
private:
std::vector<int> computeLPS(const std::string& pattern) {
int m = pattern.length();
std::vector<int> lps(m, 0);
int length = 0;
int i = 1;
while (i < m) {
if (pattern[i] == pattern[length]) {
length++;
lps[i] = length;
i++;
} else {
if (length != 0) {
length = lps[length - 1];
} else {
lps[i] = 0;
i++;
}
}
}
return lps;
}
public:
std::vector<int> search(const std::string& text, const std::string& pattern) {
std::vector<int> matches;
if (pattern.empty() || text.empty()) {
return matches;
}
int n = text.length();
int m = pattern.length();
// Compute LPS array
std::vector<int> lps = computeLPS(pattern);
int i = 0; // Index for text
int j = 0; // Index for pattern
while (i < n) {
if (pattern[j] == text[i]) {
i++;
j++;
}
if (j == m) {
matches.push_back(i - j);
j = lps[j - 1];
}
else if (i < n && pattern[j] != text[i]) {
if (j != 0) {
j = lps[j - 1];
} else {
i++;
}
}
}
return matches;
}
};
int main() {
std::string text = "ABABDABACDABABCABAB";
std::string pattern = "ABABCABAB";
KMP kmp;
std::vector<int> matches = kmp.search(text, pattern);
if (!matches.empty()) {
std::cout << "Pattern found at indices: ";
for (int index : matches) {
std::cout << index << " ";
}
std::cout << std::endl;
} else {
std::cout << "Pattern not found" << std::endl;
}
return 0;
}
public class KMP
{
private int[] ComputeLPS(string pattern)
{
int m = pattern.Length;
int[] lps = new int[m];
int length = 0;
int i = 1;
while (i < m)
{
if (pattern[i] == pattern[length])
{
length++;
lps[i] = length;
i++;
}
else
{
if (length != 0)
{
length = lps[length - 1];
}
else
{
lps[i] = 0;
i++;
}
}
}
return lps;
}
public List<int> Search(string text, string pattern)
{
var matches = new List<int>();
if (string.IsNullOrEmpty(pattern) || string.IsNullOrEmpty(text))
{
return matches;
}
int n = text.Length;
int m = pattern.Length;
// Compute LPS array
int[] lps = ComputeLPS(pattern);
int i = 0; // Index for text
int j = 0; // Index for pattern
while (i < n)
{
if (pattern[j] == text[i])
{
i++;
j++;
}
if (j == m)
{
matches.Add(i - j);
j = lps[j - 1];
}
else if (i < n && pattern[j] != text[i])
{
if (j != 0)
{
j = lps[j - 1];
}
else
{
i++;
}
}
}
return matches;
}
public static void Main()
{
string text = "ABABDABACDABABCABAB";
string pattern = "ABABCABAB";
var kmp = new KMP();
var matches = kmp.Search(text, pattern);
if (matches.Count > 0)
{
Console.WriteLine($"Pattern found at indices: {string.Join(" ", matches)}");
}
else
{
Console.WriteLine("Pattern not found");
}
}
}