#!/usr/bin/python3 '''Linear regression example ''' import numpy as np import os from matplotlib import pyplot as plt def data_set_linear(count = 10, sigma = 0.1): '''Return a linear dataset with normally distributed noise @param {int} count number of points @param {float} sigma std deviation of the added noise @return {np.ndarray} x data {np.ndarray} y data {np.ndarray} unperturned y data ''' # Make random values reproducible rstate = np.random.RandomState(0) x = np.linspace(0, 1, count) # Linear line with noise with no bias (offset) y = x + rstate.normal(0, sigma, len(x)) return x, y, x def basis(x, M): '''Polynomial basis function of M-th degree @param {float} location to evaluate the basis functions @param {int} M degree of the basis functions @return {np.1darray} basis function values at `x` ''' # Include bias N = M+1 ret = np.zeros(N, dtype=np.float64) for i in range(N): ret[i] = x**i return ret def train(x, t, M): '''Determine polynomial model coefficients Minimization of the L2 norm of the error function E = 1/2 || y(x, w) - t ||_2^2 with y = w0 + w1 * x + w2 * x^2 + .... @param {np.1darray} training observation points @param {np.1darray} training observed values @param {int} M polynomial degree (model complexity) @return {np.1darray} model coefficients [w0, w1, ...] ''' # Include bias N = M + 1 A = np.zeros(shape=(N, N), dtype=np.float64) b = np.zeros(N, dtype=np.float64) # Form LHS and RHS for i, x_ in enumerate(x): phi = basis(x_, M) # A is symmetric, so computation/storage can be improved A = A + np.outer(phi, phi) b += t[i] * phi return np.linalg.solve(A, b) def predict(x, w): ''' Predict values observation values at `x` @param {float|np.1darray} location to evaluate model @param {np.1darray} model coefficients @return {np.array} Predicted values at @x ''' x = iter(x) M = len(w) return np.array([basis(x_, M-1).dot(w) for x_ in x]) def main(): x, t, t_unperturbed = data_set_linear() # Prediction values x_fine = np.linspace(x[0], x[-1], len(x)*2) # stage 1: "train" using 1st degree polynomial model w = train(x, t, 1) # stage 2: "predicted" model values using training set y = predict(x_fine, w) fig, ax = plt.subplots() # "Training" dataset ax.plot(x, t, linestyle='', marker='o', markersize='4', label='Data set') # Target ax.plot(x, t_unperturbed, linestyle='--', label=r'Target') # Model graph ax.plot(x_fine, y, linestyle='-', label=r'Model $y(x, \mathbf{w})$') ax.set_ylabel('t') ax.set_xlabel('x') ax.grid(True) ax.legend(loc='best', fancybox=True, framealpha=0.5, fontsize='medium') __dirname = os.path.dirname(os.path.realpath(__file__)) fn = os.path.join(__dirname, '../img/linear-regression.svg') fig.patch.set_alpha(0.0) plt.savefig(fn, facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches=0) if __name__ == '__main__': main()