%matplotlib inline
import numpy as np
import math
import matplotlib.pyplot as plt


hidden_law = np.vectorize(lambda x: math.sin(2*math.pi*x))  # made to run on vectors
x_all = np.arange(0, 1, 0.01)


y_hidden = hidden_law(x_all)
plt.plot(x_all, y_hidden, ':g');


np.random.seed(42)  # to make repeatable
noiseLevel = 0.2
x = np.random.rand(20)
y = hidden_law(x) + noiseLevel*np.random.randn(20)


x_train = x[:10]
y_train = y[:10]
x_valid = x[10:]
y_valid = y[10:]


plt.plot(x_all, y_hidden, ':g')
plt.plot(x_train, y_train, '.')
plt.plot(x_valid, y_valid, 'r.')
plt.legend(['Hidden law', 'Train. data', 'Valid. data']);


maxPolyDegree = 10
polyDegrees = range(maxPolyDegree)


polys = []
for polyDegree in polyDegrees:
    polys.append(np.polyfit(x_train, y_train,polyDegree))


polys[:3]

[array([-0.21700967]),
 array([-2.1734858 ,  0.91350014]),
 array([ 2.95148537, -5.04705926,  1.34462422])]


trainRMSEs = np.zeros(maxPolyDegree)
validRMSEs = np.zeros(maxPolyDegree)
for polyDegree in polyDegrees:
    y_train_p = np.polyval(polys[polyDegree], x_train)
    trainRMSEs[polyDegree] = np.sqrt(np.mean(np.square(y_train_p - y_train)))
    y_valid_p = np.polyval(polys[polyDegree], x_valid)
    validRMSEs[polyDegree] = np.sqrt(np.mean(np.square(y_valid_p - y_valid)))


plt.plot(polyDegrees, trainRMSEs, 'b')
plt.plot(polyDegrees, validRMSEs, 'r')
plt.axis((0, maxPolyDegree-1, 0, 10))
plt.legend(['Training', 'Validation'])
plt.xlabel('Polynomial degree')
plt.ylabel('RMSE');


validRMSEs[9]

1350.8736116030939


plt.figure(figsize=(15, 6))
for polyDegree in polyDegrees:
    plt.subplot(2, 5, polyDegree+1)
    y_pol = np.polyval(polys[polyDegree], x_all)
    plt.plot(x_all, y_hidden, ':g')
    plt.plot(x_train, y_train, '.')
    plt.plot(x_valid, y_valid, 'r.')
    plt.plot(x_all, y_pol, 'b')
    plt.title(polyDegree)
    plt.axis((0, 1, -1.5, 1.5))

Polynomial curve (over)fitting demo¶

Generate data¶

Learn¶

Evaluate¶

Visualize¶

Conclusion¶