#!/usr/bin/python3 import os import numpy as np import functools from matplotlib import pyplot as plt def create_data_set(max_x = 2*np.pi, count = 15): '''Generate a randomly data set resembling real data @param {float} max_x maximum value on x axis @param {int} count maximum number of points @return {np.array} x x values {np.array} y y values ''' # maximum x value NOISE_STD = 0.2 rstate = np.random.RandomState(0) x = np.linspace(-max_x, max_x, count) target = np.sin(x) y = target + rstate.normal(0, NOISE_STD, len(x)) return x, y def gauss_kernel(x, xp, sigma = 0.1): '''Gaussian Kernel See Eq. (6.23) in PR&ML book @param {float} x first argument @param {float} xp second argument @param {float sigma variance @param {float} kernel value at specified points ''' return np.exp(-np.linalg.norm(x - xp)**2/(2*sigma**2)) def exp_kernel(x, xp, theta = 1): '''Exponential Kernel See Eq. (6.65) in PR&ML book @param {float} x first argument @param {float} xp second argument @param {float} sigma variance @param {float} kernel value at specified points ''' return np.exp(-theta*np.abs(x - xp)) def gram_matrix(x, kernel): '''Gram matrix See Eq. 6.55 in PR&ML book This is the variance of the Gaussian process @param {array} x sample poins @param {func} kernel callable kernel @return {numpy.arrayd} gram matrix with size (N, N) where N = len(x) ''' return np.array([[kernel(x1, x2) for x2 in x] for x1 in x], np.float64) def k_vector(x, xp, kernel): '''Evaluate the k vector See text following Eq. 6.65 in PR&ML book. @param {iterable} x training points @param {float} xp prediction point @param {func} kernel kernel function ''' N = len(x) ret = np.zeros(N, dtype=np.float64) for i in range(N): ret[i] = kernel(x[i], xp) return ret def c_matrix(x, kernel, beta = 0.1): '''Covariance of the marginialized training data distribution Eq. (6.65) in PR&ML book C = ( C_N k ) ( k' c ) C_N[i, j] = k(x_i, x_j) + 1/beta * delta_{ij} @param {np.1darray} x training points @param {func} kernel kernel function @param {float} beta precision of p(t|y) @return {np.2darray} covariance matrix of marginialized dist p(t_n1) ''' N = len(x) ret = np.zeros(shape=(N, N), dtype=np.float64) for i in range(N): for j in range(N): ret[i, j] = kernel(x[i], x[j]) if (i == j): ret[i, j] += 1.0/beta return ret def mean_std(x, x_train, t_train, kernel, beta): '''Return the mean and std. dev of the model Eqs. (6.66), (6.67) in PR&ML book @param {np.ndarray} x points where model is evaluated @param {np.ndarray} x_train training x-data @param {np.ndarray} t_train training target x-data @param {function} kernel function @param beta preceision of the noise @return {np.ndarray} mean of the model {np.ndarray} standard deviation of the model ''' N = len(x) var = np.zeros(N, np.float64) mean = np.zeros(N, np.float64) C = c_matrix(x_train, kernel, beta) C_inv = np.linalg.inv(C) # Coefficients an = C_inv.dot(t_train) for i in range(N): k = k_vector(x_train, x[i], kernel) mean[i] = k.dot(an) c = kernel(x[i], x[i]) var[i] = c - k.dot(C_inv.dot(k)) return mean, np.sqrt(var) def main(): BETA = 10 SIGMA = 1.0 THETA = 0.10 x, t = create_data_set() # Points used for 'training' x_train = x[:-3] t_train = t[:-3] kernel_g = functools.partial(gauss_kernel, sigma=SIGMA) kernel_e = functools.partial(exp_kernel, theta=THETA) x_r = np.linspace(x[0], x[-1], 100) mean_g, std_g = mean_std(x_r, x_train, t_train, kernel_g, BETA) mean_e, std_e = mean_std(x_r, x_train, t_train, kernel_e, BETA) # Multiple of standard deviations to include on each side of mean STD_OFFSET = 2 fig, ax = plt.subplots() # Full data set ax.plot(x, t, linestyle='', marker='o', markersize=5, label='Data') # Points used for model generation ax.plot(x_train, t_train, linestyle='', marker='x', color='red', markeredgewidth=2, label='Picked Points') # Taget ax.plot(x_r, np.sin(x_r), label='Target', linestyle='--', color='m') # Mean and plus/minus std dev of model -- Gaussian kernel ax.plot(x_r, mean_g, label='$E[p(y)]$ (Gaussian)', color='blue') ax.fill_between(x_r, mean_g - STD_OFFSET*std_g, mean_g + STD_OFFSET*std_g, color='red', alpha=0.30, label=r'$E \pm {}\sigma$ (Gaussian)'.format(STD_OFFSET)) # Mean and plus/minus std deviation -- Exponential kernel ax.plot(x_r, mean_e, label='$E[p(y)]$ (Exponential)', color='green') ax.fill_between(x_r, mean_g - STD_OFFSET*std_e, mean_g + STD_OFFSET*std_e, color='green', alpha=0.30, label=r'$E \pm {}\sigma$ (Exponential)'.format(STD_OFFSET)) ax.plot() ax.grid(True) ax.legend(loc='best', fancybox=True, framealpha=0.5, fontsize='small') ax.set_title(r'$\beta$ = {}, $\sigma$ = {}, $\theta$ = {}'.format(BETA, SIGMA, THETA)) ax.set_ylabel('y') ax.set_xlabel('x') fig.patch.set_alpha(0.0) __dirname = os.path.dirname(os.path.realpath(__file__)) fn = os.path.join(__dirname, '../img/gprocess-regression.svg') plt.savefig(fn, facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches=0) if __name__ == '__main__': main()