# Petits exemples

In [None]:
import torch

In [None]:
x0 = 5.
x  = torch.tensor(x0, requires_grad=True)

In [None]:
x

tensor(5., requires_grad=True)

In [None]:
y = x**2

In [None]:
y

tensor(25., grad_fn=<PowBackward0>)

In [None]:
y.backward()

In [None]:
dy_dx = x.grad
dy_dx

tensor(10.)

In [None]:
x

tensor(5., requires_grad=True)

In [None]:
x = torch.tensor(5., requires_grad=True)
a = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)
k = torch.tensor(2., requires_grad=True)
y = a * x + b
z = x**k
y

tensor(13., grad_fn=<AddBackward0>)

In [None]:
y.backward()

In [None]:
x.grad

tensor(2.)

In [None]:
a.grad

tensor(5.)

In [None]:
b.grad

tensor(1.)

In [None]:
k.grad

In [None]:
z.backward()

In [None]:
x.grad # FAUX

tensor(12.)

# Optimisation basique

Minimisation de $x^2$

In [None]:
lr = 0.1

x = torch.tensor(5., requires_grad=True)

for i in range(100):
    y = x**2

    y.backward()

    with torch.no_grad():
       x = x - lr * x.grad
        # if i % 10 == 0:
        #     lr = lr / 2 

    x.requires_grad_()

    print(x, y)

# Régression linaire

## Données toutes simples

In [None]:
x = torch.arange(0, 10, 0.1, requires_grad=False)
y = 2 * x + 5 + torch.randn(100)

In [None]:
y

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(x.detach().numpy(), y.detach().numpy())
plt.show()

## À la main

Les 3 variantes ci-dessous font la même chose.

Le point important est que lors de la mise à jour, on doit éviter de
créer une boucle dans le graphe de calcul.

In [None]:
a = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)
lr = 0.01

history = []
for epoch in range(1000):
    loss = torch.sum((a * x + b - y)**2) / x.shape[0]
    history.append(loss)
    loss.backward()

    with torch.no_grad(): # On ne stocke pas les gradients
        a = a - lr * a.grad
        b = b - lr * b.grad

    a.requires_grad_() # Mais on voudra quand même les gradients plus tard dans la suite
    b.requires_grad_()

    print(epoch, loss, a.data, b.data)

plt.plot(history)
plt.show()

In [None]:
a = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)
lr = 0.01

for epoch in range(1000):
    loss = torch.sum((a * x + b - y)**2) / x.shape[0]
    loss.backward()

    # Pas de graphe de calcul, on manipule directement les valeurs
    a.data = a.data - lr * a.grad.data
    b.data = b.data - lr * b.grad.data

    # On réinitalise les gradients pour préparer la prochaine étape
    a.grad.data.zero_()
    b.grad.data.zero_()

    print(epoch, loss, a.data, b.data)

In [None]:
a = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)
lr = 0.01

for epoch in range(1000):
    loss = torch.sum((a * x + b - y)**2) / x.shape[0]
    loss.backward()

    # Pas de graphe de calcul, on manipule directement les valeurs, en les copiant et en les séparant du graphe de calcul
    a = (a - lr * a.grad).clone().detach()
    b = (b - lr * b.grad).clone().detach()

    # On voudra les gradients dans la suite
    a.requires_grad=True
    b.requires_grad=True

    print(epoch, loss, a.data, b.data)

## La vraie version pytorch

In [None]:
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        out = self.linear(x)
        return out

In [None]:
model = linearRegression(1, 1)
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(1000):
    optimizer.zero_grad()
    outputs = model(x.reshape((-1, 1)))

    loss = criterion(outputs, y.reshape((-1, 1)))
    loss.backward()

    optimizer.step()

    print(epoch, loss, model.linear.weight.data, model.linear.bias.data)