The Gauss-Newton algorithm is a method used to solve non-linear least squares problems. It's particularly
useful for fitting a parameterized function to a set of measured data points by minimizing the sum of
the squares of the errors between the data and the function. The algorithm combines Gauss's method of least
squares with Newton's method for finding roots of equations.
How It Works
Problem Setup:
Given m data points (xᵢ, yᵢ)
Model function f(x; β) with n parameters β
Residuals rᵢ = yᵢ - f(xᵢ; β)
Jacobian Matrix:
Compute J = ∂r/∂β (m×n matrix)
Each element Jᵢⱼ = -∂f(xᵢ; β)/∂βⱼ
Normal Equations:
Form JᵀJ δ = -Jᵀr
Solve for update δ
Parameter Update:
β ← β + δ
Repeat until convergence
History
The Gauss-Newton algorithm combines two fundamental mathematical approaches: Gauss's method of least
squares (1795) and Newton's method for finding roots (1669). Carl Friedrich Gauss developed the method
of least squares while studying astronomical data, and this was later combined with Newton's iterative
approach to create a powerful tool for non-linear optimization. The algorithm was formally described
in the early 20th century and has since become a cornerstone of numerical optimization and data fitting.
import numpy as np
from typing import Callable, Tuple
def gauss_newton(
f: Callable[[np.ndarray, np.ndarray], np.ndarray],
jacobian: Callable[[np.ndarray, np.ndarray], np.ndarray],
x: np.ndarray,
y: np.ndarray,
beta0: np.ndarray,
tolerance: float = 1e-6,
max_iterations: int = 100
) -> Tuple[np.ndarray, int]:
"""
Solve non-linear least squares using Gauss-Newton algorithm.
Args:
f: Model function f(x, β)
jacobian: Jacobian of f with respect to β
x: Independent variable data
y: Dependent variable data
beta0: Initial parameter guess
tolerance: Convergence tolerance
max_iterations: Maximum number of iterations
Returns:
Tuple of (optimal parameters, number of iterations)
"""
beta = beta0.copy()
for iteration in range(max_iterations):
# Compute residuals
r = y - f(x, beta)
# Compute Jacobian
J = jacobian(x, beta)
# Solve normal equations
JtJ = J.T @ J
Jtr = J.T @ r
# Compute update
try:
delta = np.linalg.solve(JtJ, Jtr)
except np.linalg.LinAlgError:
print("Warning: Singular matrix")
break
# Update parameters
beta += delta
# Check convergence
if np.max(np.abs(delta)) < tolerance:
return beta, iteration + 1
return beta, max_iterations
# Example usage
if __name__ == "__main__":
# Example: Fit y = a*exp(-b*x)
def model(x, beta):
return beta[0] * np.exp(-beta[1] * x)
def model_jacobian(x, beta):
J = np.zeros((len(x), 2))
J[:, 0] = np.exp(-beta[1] * x)
J[:, 1] = -beta[0] * x * np.exp(-beta[1] * x)
return J
# Generate sample data
x_data = np.linspace(0, 5, 50)
true_beta = np.array([2.0, 0.5])
y_true = model(x_data, true_beta)
y_data = y_true + np.random.normal(0, 0.1, len(x_data))
# Fit model
beta0 = np.array([1.0, 1.0])
beta_fit, iterations = gauss_newton(
model, model_jacobian, x_data, y_data, beta0
)
print(f"True parameters: {true_beta}")
print(f"Fitted parameters: {beta_fit}")
print(f"Iterations: {iterations}")
#include <iostream>
#include <vector>
#include <functional>
#include <cmath>
class GaussNewton {
private:
double tolerance;
int maxIterations;
// Solve 2x2 linear system using Cramer's rule
std::vector<double> solveLinearSystem(
const std::vector<std::vector<double>>& A,
const std::vector<double>& b
) {
double det = A[0][0] * A[1][1] - A[0][1] * A[1][0];
return {
(b[0] * A[1][1] - b[1] * A[0][1]) / det,
(A[0][0] * b[1] - A[1][0] * b[0]) / det
};
}
public:
GaussNewton(double tol = 1e-6, int maxIter = 100)
: tolerance(tol), maxIterations(maxIter) {}
template<typename F, typename J>
std::vector<double> fit(
F model,
J jacobian,
const std::vector<double>& x,
const std::vector<double>& y,
std::vector<double> beta0
) {
std::vector<double> beta = beta0;
int m = x.size();
for (int iter = 0; iter < maxIterations; iter++) {
// Compute residuals and Jacobian
std::vector<double> r(m);
std::vector<std::vector<double>> J(m, std::vector<double>(2));
for (int i = 0; i < m; i++) {
r[i] = y[i] - model(x[i], beta);
auto Ji = jacobian(x[i], beta);
J[i][0] = Ji[0];
J[i][1] = Ji[1];
}
// Compute J^T * J and J^T * r
std::vector<std::vector<double>> JtJ(2, std::vector<double>(2));
std::vector<double> Jtr(2);
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
double sum = 0;
for (int k = 0; k < m; k++) {
sum += J[k][i] * J[k][j];
}
JtJ[i][j] = sum;
}
double sum = 0;
for (int k = 0; k < m; k++) {
sum += J[k][i] * r[k];
}
Jtr[i] = sum;
}
// Solve normal equations
auto delta = solveLinearSystem(JtJ, Jtr);
// Update parameters
beta[0] += delta[0];
beta[1] += delta[1];
// Check convergence
if (std::abs(delta[0]) < tolerance &&
std::abs(delta[1]) < tolerance) {
break;
}
}
return beta;
}
};
int main() {
// Example: Fit y = a*exp(-b*x)
auto model = [](double x, const std::vector<double>& beta) {
return beta[0] * std::exp(-beta[1] * x);
};
auto jacobian = [](double x, const std::vector<double>& beta) {
return std::vector<double>{
std::exp(-beta[1] * x),
-beta[0] * x * std::exp(-beta[1] * x)
};
};
// Generate sample data
std::vector<double> x, y;
double true_a = 2.0, true_b = 0.5;
for (int i = 0; i < 50; i++) {
double xi = i * 0.1;
x.push_back(xi);
y.push_back(
true_a * std::exp(-true_b * xi) +
(std::rand() % 100 - 50) / 500.0
);
}
// Fit model
GaussNewton gn;
auto beta = gn.fit(model, jacobian, x, y, {1.0, 1.0});
std::cout << "True parameters: a = " << true_a
<< ", b = " << true_b << std::endl;
std::cout << "Fitted parameters: a = " << beta[0]
<< ", b = " << beta[1] << std::endl;
return 0;
}
public class GaussNewton {
private readonly double tolerance;
private readonly int maxIterations;
public GaussNewton(double tolerance = 1e-6, int maxIterations = 100) {
this.tolerance = tolerance;
this.maxIterations = maxIterations;
}
private double[] SolveLinearSystem(double[,] A, double[] b) {
double det = A[0,0] * A[1,1] - A[0,1] * A[1,0];
return new[] {
(b[0] * A[1,1] - b[1] * A[0,1]) / det,
(A[0,0] * b[1] - A[1,0] * b[0]) / det
};
}
public double[] Fit(
Func<double, double[], double> model,
Func<double, double[], double[]> jacobian,
double[] x,
double[] y,
double[] beta0
) {
var beta = (double[])beta0.Clone();
int m = x.Length;
for (int iter = 0; iter < maxIterations; iter++) {
// Compute residuals and Jacobian
var r = new double[m];
var J = new double[m,2];
for (int i = 0; i < m; i++) {
r[i] = y[i] - model(x[i], beta);
var Ji = jacobian(x[i], beta);
J[i,0] = Ji[0];
J[i,1] = Ji[1];
}
// Compute J^T * J and J^T * r
var JtJ = new double[2,2];
var Jtr = new double[2];
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
double sum = 0;
for (int k = 0; k < m; k++) {
sum += J[k,i] * J[k,j];
}
JtJ[i,j] = sum;
}
double sum = 0;
for (int k = 0; k < m; k++) {
sum += J[k,i] * r[k];
}
Jtr[i] = sum;
}
// Solve normal equations
var delta = SolveLinearSystem(JtJ, Jtr);
// Update parameters
beta[0] += delta[0];
beta[1] += delta[1];
// Check convergence
if (Math.Abs(delta[0]) < tolerance &&
Math.Abs(delta[1]) < tolerance) {
break;
}
}
return beta;
}
public static void Main() {
// Example: Fit y = a*exp(-b*x)
double Model(double x, double[] beta) =>
beta[0] * Math.Exp(-beta[1] * x);
double[] Jacobian(double x, double[] beta) => new[] {
Math.Exp(-beta[1] * x),
-beta[0] * x * Math.Exp(-beta[1] * x)
};
// Generate sample data
var random = new Random();
var x = new double[50];
var y = new double[50];
double trueA = 2.0, trueB = 0.5;
for (int i = 0; i < 50; i++) {
x[i] = i * 0.1;
y[i] = trueA * Math.Exp(-trueB * x[i]) +
(random.NextDouble() - 0.5) * 0.2;
}
// Fit model
var gn = new GaussNewton();
var beta = gn.Fit(Model, Jacobian, x, y, new[] {1.0, 1.0});
Console.WriteLine($"True parameters: a = {trueA}, b = {trueB}");
Console.WriteLine($"Fitted parameters: a = {beta[0]:F4}, b = {beta[1]:F4}");
}
}
Complexity Analysis
Time Complexity
The time complexity per iteration is O(mn²), where: