from fastcore.test import test_close
=2, linewidth=140, sci_mode=False)
torch.set_printoptions(precision1)
torch.manual_seed('image.cmap'] = 'gray'
mpl.rcParams[
= Path('data')
path_data = path_data/'mnist.pkl.gz'
path_gz with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
= map(tensor, [x_train, y_train, x_valid, y_valid]) x_train, y_train, x_valid, y_valid
This is not my content it’s a part of Fastai’s From Deep Learning Foundations to Stable Diffusion course. I add some notes for me to understand better thats all. For the source check Fastai course page.
Everything from scratch to pytorch
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl,numpy as np,matplotlib.pyplot as plt
from pathlib import Path
from torch import tensor,nn
import torch.nn.functional as F
:::
Initial setup
Data
= x_train.shape
n,m = y_train.max()+1
c = 50 nh
class Model(nn.Module):
def __init__(self, n_in, nh, n_out):
super().__init__()
self.layers = [nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out)]
def __call__(self, x):
for l in self.layers: x = l(x)
return x
= Model(m, nh, 10)
model = model(x_train)
pred pred.shape
torch.Size([50000, 10])
Cross entropy loss
First, we will need to compute the softmax of our activations. This is defined by:
\[\hbox{softmax(x)}_{i} = \frac{e^{x_{i}}}{e^{x_{0}} + e^{x_{1}} + \cdots + e^{x_{n-1}}}\]
or more concisely:
\[\hbox{softmax(x)}_{i} = \frac{e^{x_{i}}}{\sum\limits_{0 \leq j \lt n} e^{x_{j}}}\]
In practice, we will need the log of the softmax when we calculate the loss.
def log_softmax(x): return (x.exp()/(x.exp().sum(-1,keepdim=True))).log()
log_softmax(pred)
tensor([[-2.37, -2.49, -2.36, ..., -2.31, -2.28, -2.22],
[-2.37, -2.44, -2.44, ..., -2.27, -2.26, -2.16],
[-2.48, -2.33, -2.28, ..., -2.30, -2.30, -2.27],
...,
[-2.33, -2.52, -2.34, ..., -2.31, -2.21, -2.16],
[-2.38, -2.38, -2.33, ..., -2.29, -2.26, -2.17],
[-2.33, -2.55, -2.36, ..., -2.29, -2.27, -2.16]], grad_fn=<LogBackward0>)
Note that the formula
\[\log \left ( \frac{a}{b} \right ) = \log(a) - \log(b)\]
gives a simplification when we compute the log softmax:
def log_softmax(x): return x - x.exp().sum(-1,keepdim=True).log()
Then, there is a way to compute the log of the sum of exponentials in a more stable way, called the LogSumExp trick. The idea is to use the following formula:
\[\log \left ( \sum_{j=1}^{n} e^{x_{j}} \right ) = \log \left ( e^{a} \sum_{j=1}^{n} e^{x_{j}-a} \right ) = a + \log \left ( \sum_{j=1}^{n} e^{x_{j}-a} \right )\]
where a is the maximum of the \(x_{j}\).
def logsumexp(x):
= x.max(-1)[0]
m return m + (x-m[:,None]).exp().sum(-1).log()
This way, we will avoid an overflow when taking the exponential of a big activation. In PyTorch, this is already implemented for us.
def log_softmax(x): return x - x.logsumexp(-1,keepdim=True)
-1))
test_close(logsumexp(pred), pred.logsumexp(= log_softmax(pred)
sm_pred sm_pred
tensor([[-2.37, -2.49, -2.36, ..., -2.31, -2.28, -2.22],
[-2.37, -2.44, -2.44, ..., -2.27, -2.26, -2.16],
[-2.48, -2.33, -2.28, ..., -2.30, -2.30, -2.27],
...,
[-2.33, -2.52, -2.34, ..., -2.31, -2.21, -2.16],
[-2.38, -2.38, -2.33, ..., -2.29, -2.26, -2.17],
[-2.33, -2.55, -2.36, ..., -2.29, -2.27, -2.16]], grad_fn=<SubBackward0>)
The cross entropy loss for some target \(x\) and some prediction \(p(x)\) is given by:
\[ -\sum x\, \log p(x) \]
But since our \(x\)s are 1-hot encoded (actually, they’re just the integer indices), this can be rewritten as \(-\log(p_{i})\) where i is the index of the desired target.
This can be done using numpy-style integer array indexing. Note that PyTorch supports all the tricks in the advanced indexing methods discussed in that link.
https://www.youtube.com/watch?v=vGdB4eI4KBs 1:38
3] y_train[:
tensor([5, 0, 4])
0,5],sm_pred[1,0],sm_pred[2,4] sm_pred[
(tensor(-2.20, grad_fn=<SelectBackward0>),
tensor(-2.37, grad_fn=<SelectBackward0>),
tensor(-2.36, grad_fn=<SelectBackward0>))
0,1,2], y_train[:3]] sm_pred[[
tensor([-2.20, -2.37, -2.36], grad_fn=<IndexBackward0>)
def nll(input, target): return -input[range(target.shape[0]), target].mean()
= nll(sm_pred, y_train)
loss loss
tensor(2.30, grad_fn=<NegBackward0>)
Then use PyTorch’s implementation.
-1), y_train), loss, 1e-3) test_close(F.nll_loss(F.log_softmax(pred,
In PyTorch, F.log_softmax
and F.nll_loss
are combined in one optimized function, F.cross_entropy
.
1e-3) test_close(F.cross_entropy(pred, y_train), loss,
Basic training loop
Basically the training loop repeats over the following steps: - get the output of the model on a batch of inputs - compare the output to the labels we have and compute a loss - calculate the gradients of the loss with respect to every parameter of the model - update said parameters with those gradients to make them a little bit better
= F.cross_entropy loss_func
=50 # batch size
bs
= x_train[0:bs] # a mini-batch from x
xb = model(xb) # predictions
preds 0], preds.shape preds[
(tensor([-0.09, -0.21, -0.08, 0.10, -0.04, 0.08, -0.04, -0.03, 0.01, 0.06], grad_fn=<SelectBackward0>),
torch.Size([50, 10]))
= y_train[0:bs]
yb yb
tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1, 1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9,
3, 9, 8, 5, 9, 3])
loss_func(preds, yb)
tensor(2.30, grad_fn=<NllLossBackward0>)
=1) preds.argmax(dim
tensor([3, 9, 3, 8, 5, 9, 3, 9, 3, 9, 5, 3, 9, 9, 3, 9, 9, 5, 8, 7, 9, 5, 3, 8, 9, 5, 9, 5, 5, 9, 3, 5, 9, 7, 5, 7, 9, 9, 3, 9, 3, 5, 3, 8,
3, 5, 9, 5, 9, 5])
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
def accuracy(out, yb): return (out.argmax(dim=1)==yb).float().mean()
:::
accuracy(preds, yb)
tensor(0.08)
= 0.5 # learning rate
lr = 3 # how many epochs to train for epochs
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
def report(loss, preds, yb): print(f'{loss:.2f}, {accuracy(preds, yb):.2f}')
:::
= x_train[:bs],y_train[:bs]
xb,yb = model(xb)
preds report(loss_func(preds, yb), preds, yb)
2.30, 0.08
for epoch in range(epochs):
for i in range(0, n, bs):
= slice(i, min(n,i+bs))
s = x_train[s],y_train[s]
xb,yb = model(xb)
preds = loss_func(preds, yb)
loss
loss.backward()with torch.no_grad():
for l in model.layers:
if hasattr(l, 'weight'):
-= l.weight.grad * lr
l.weight -= l.bias.grad * lr
l.bias
l.weight.grad.zero_()
l.bias .grad.zero_() report(loss, preds, yb)
0.12, 0.98
0.12, 0.94
0.08, 0.96
Using parameters and optim
Parameters
= nn.Module()
m1 = nn.Linear(3,4)
m1.foo m1
Module(
(foo): Linear(in_features=3, out_features=4, bias=True)
)
list(m1.named_children())
[('foo', Linear(in_features=3, out_features=4, bias=True))]
m1.named_children()
<generator object Module.named_children>
list(m1.parameters())
[Parameter containing:
tensor([[ 0.57, 0.43, -0.30],
[ 0.13, -0.32, -0.24],
[ 0.51, 0.04, 0.22],
[ 0.13, -0.17, -0.24]], requires_grad=True),
Parameter containing:
tensor([-0.01, -0.51, -0.39, 0.56], requires_grad=True)]
class MLP(nn.Module):
def __init__(self, n_in, nh, n_out):
super().__init__()
self.l1 = nn.Linear(n_in,nh)
self.l2 = nn.Linear(nh,n_out)
self.relu = nn.ReLU()
def forward(self, x): return self.l2(self.relu(self.l1(x)))
= MLP(m, nh, 10)
model model.l1
Linear(in_features=784, out_features=50, bias=True)
model
MLP(
(l1): Linear(in_features=784, out_features=50, bias=True)
(l2): Linear(in_features=50, out_features=10, bias=True)
(relu): ReLU()
)
for name,l in model.named_children(): print(f"{name}: {l}")
l1: Linear(in_features=784, out_features=50, bias=True)
l2: Linear(in_features=50, out_features=10, bias=True)
relu: ReLU()
for p in model.parameters(): print(p.shape)
torch.Size([50, 784])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])
def fit():
for epoch in range(epochs):
for i in range(0, n, bs):
= slice(i, min(n,i+bs))
s = x_train[s],y_train[s]
xb,yb = model(xb)
preds = loss_func(preds, yb)
loss
loss.backward()with torch.no_grad():
for p in model.parameters(): p -= p.grad * lr
model.zero_grad() report(loss, preds, yb)
fit()
0.19, 0.96
0.11, 0.96
0.04, 1.00
Behind the scenes, PyTorch overrides the __setattr__
function in nn.Module
so that the submodules you define are properly registered as parameters of the model.
how to create a module is below. kw = create module
class MyModule:
def __init__(self, n_in, nh, n_out):
self._modules = {}
self.l1 = nn.Linear(n_in,nh)
self.l2 = nn.Linear(nh,n_out)
def __setattr__(self,k,v):
if not k.startswith("_"): self._modules[k] = v
super().__setattr__(k,v)
def __repr__(self): return f'{self._modules}'
def parameters(self):
for l in self._modules.values(): yield from l.parameters()
= MyModule(m,nh,10)
mdl mdl
{'l1': Linear(in_features=784, out_features=50, bias=True), 'l2': Linear(in_features=50, out_features=10, bias=True)}
for p in mdl.parameters(): print(p.shape)
torch.Size([50, 784])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])
Registering modules
from functools import reduce
We can use the original layers
approach, but we have to register the modules.
= [nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10)] layers
reduce(lambda val,layer: layer(val), self.layers, x)
is same as
for l in self.layers: x = l(x)
check below
class SequentialModel(nn.Module):
class Model(nn.Module):
def __init__(self, layers):
super().__init__()
self.layers = layers
for i,l in enumerate(self.layers): self.add_module(f'layer_{i}', l)
def forward(self, x): return reduce(lambda val,layer: layer(val), self.layers, x)
= Model(layers)
model model
Model(
(layer_0): Linear(in_features=784, out_features=50, bias=True)
(layer_1): ReLU()
(layer_2): Linear(in_features=50, out_features=10, bias=True)
)
model(xb).shape
torch.Size([50, 10])
nn.ModuleList
nn.ModuleList
does this for us.
class SequentialModel(nn.Module):
def __init__(self, layers):
super().__init__()
self.layers = nn.ModuleList(layers)
def forward(self, x):
for l in self.layers: x = l(x)
return x
= SequentialModel(layers)
model model
SequentialModel(
(layers): ModuleList(
(0): Linear(in_features=784, out_features=50, bias=True)
(1): ReLU()
(2): Linear(in_features=50, out_features=10, bias=True)
)
)
fit()
0.12, 0.96
0.11, 0.96
0.07, 0.98
nn.Sequential
nn.Sequential
is a convenient class which does the same as the above:
= nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10)) model
fit() loss_func(model(xb), yb), accuracy(model(xb), yb)
0.16, 0.94
0.13, 0.96
0.08, 0.96
(tensor(0.03, grad_fn=<NllLossBackward0>), tensor(1.))
model
Sequential(
(0): Linear(in_features=784, out_features=50, bias=True)
(1): ReLU()
(2): Linear(in_features=50, out_features=10, bias=True)
)
optim
class Optimizer():
def __init__(self, params, lr=0.5): self.params,self.lr=list(params),lr
def step(self):
with torch.no_grad():
for p in self.params: p -= p.grad * self.lr
def zero_grad(self):
for p in self.params: p.grad.data.zero_()
= nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10)) model
= Optimizer(model.parameters()) opt
for epoch in range(epochs):
for i in range(0, n, bs):
= slice(i, min(n,i+bs))
s = x_train[s],y_train[s]
xb,yb = model(xb)
preds = loss_func(preds, yb)
loss
loss.backward()
opt.step()
opt.zero_grad() report(loss, preds, yb)
0.18, 0.94
0.13, 0.96
0.11, 0.94
PyTorch already provides this exact functionality in optim.SGD
(it also handles stuff like momentum, which we’ll look at later)
from torch import optim
def get_model():
= nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10))
model return model, optim.SGD(model.parameters(), lr=lr)
= get_model()
model,opt loss_func(model(xb), yb)
tensor(2.33, grad_fn=<NllLossBackward0>)
for epoch in range(epochs):
for i in range(0, n, bs):
= slice(i, min(n,i+bs))
s = x_train[s],y_train[s]
xb,yb = model(xb)
preds = loss_func(preds, yb)
loss
loss.backward()
opt.step()
opt.zero_grad() report(loss, preds, yb)
0.12, 0.98
0.09, 0.98
0.07, 0.98
Dataset and DataLoader
Dataset
It’s clunky to iterate through minibatches of x and y values separately:
= x_train[s]
xb = y_train[s] yb
Instead, let’s do these two steps together, by introducing a Dataset
class:
= train_ds[s] xb,yb
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
class Dataset():
def __init__(self, x, y): self.x,self.y = x,y
def __len__(self): return len(self.x)
def __getitem__(self, i): return self.x[i],self.y[i]
:::
= Dataset(x_train, y_train),Dataset(x_valid, y_valid)
train_ds,valid_ds assert len(train_ds)==len(x_train)
assert len(valid_ds)==len(x_valid)
= train_ds[0:5]
xb,yb assert xb.shape==(5,28*28)
assert yb.shape==(5,)
xb,yb
(tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
tensor([5, 0, 4, 1, 9]))
= get_model() model,opt
for epoch in range(epochs):
for i in range(0, n, bs):
= train_ds[i:min(n,i+bs)]
xb,yb = model(xb)
preds = loss_func(preds, yb)
loss
loss.backward()
opt.step()
opt.zero_grad() report(loss, preds, yb)
0.17, 0.96
0.11, 0.94
0.09, 0.96
DataLoader
Previously, our loop iterated over batches (xb, yb) like this:
for i in range(0, n, bs):
= train_ds[i:min(n,i+bs)]
xb,yb ...
Let’s make our loop much cleaner, using a data loader:
for xb,yb in train_dl:
...
class DataLoader():
def __init__(self, ds, bs): self.ds,self.bs = ds,bs
def __iter__(self):
for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]
= DataLoader(train_ds, bs)
train_dl = DataLoader(valid_ds, bs) valid_dl
= next(iter(valid_dl))
xb,yb xb.shape
torch.Size([50, 784])
yb
tensor([3, 8, 6, 9, 6, 4, 5, 3, 8, 4, 5, 2, 3, 8, 4, 8, 1, 5, 0, 5, 9, 7, 4, 1, 0, 3, 0, 6, 2, 9, 9, 4, 1, 3, 6, 8, 0, 7, 7, 6, 8, 9, 0, 3,
8, 3, 7, 7, 8, 4])
0].view(28,28))
plt.imshow(xb[0] yb[
tensor(3)
= get_model() model,opt
def fit():
for epoch in range(epochs):
for xb,yb in train_dl:
= model(xb)
pred = loss_func(pred, yb)
loss
loss.backward()
opt.step()
opt.zero_grad() report(loss, preds, yb)
fit() loss_func(model(xb), yb), accuracy(model(xb), yb)
0.11, 0.96
0.09, 0.96
0.06, 0.96
(tensor(0.03, grad_fn=<NllLossBackward0>), tensor(1.))
Random sampling
We want our training set to be in a random order, and that order should differ each iteration. But the validation set shouldn’t be randomized.
import random
class Sampler():
def __init__(self, ds, shuffle=False): self.n,self.shuffle = len(ds),shuffle
def __iter__(self):
= list(range(self.n))
res if self.shuffle: random.shuffle(res)
return iter(res)
from itertools import islice
= Sampler(train_ds) ss
= iter(ss)
it for o in range(5): print(next(it))
0
1
2
3
4
list(islice(ss, 5))
[0, 1, 2, 3, 4]
= Sampler(train_ds, shuffle=True)
ss list(islice(ss, 5))
[11815, 32941, 21760, 21778, 35233]
import fastcore.all as fc
class BatchSampler():
def __init__(self, sampler, bs, drop_last=False): fc.store_attr()
def __iter__(self): yield from fc.chunked(iter(self.sampler), self.bs, drop_last=self.drop_last)
= BatchSampler(ss, 4)
batchs list(islice(batchs, 5))
[[30214, 5339, 461, 9948],
[8032, 20805, 16282, 13099],
[26751, 2761, 552, 12897],
[16714, 7294, 34658, 24330],
[13836, 28629, 16552, 32028]]
def collate(b):
= zip(*b)
xs,ys return torch.stack(xs),torch.stack(ys)
class DataLoader():
def __init__(self, ds, batchs, collate_fn=collate): fc.store_attr()
def __iter__(self): yield from (self.collate_fn(self.ds[i] for i in b) for b in self.batchs)
= BatchSampler(Sampler(train_ds, shuffle=True ), bs)
train_samp = BatchSampler(Sampler(valid_ds, shuffle=False), bs) valid_samp
= DataLoader(train_ds, batchs=train_samp)
train_dl = DataLoader(valid_ds, batchs=valid_samp) valid_dl
= next(iter(valid_dl))
xb,yb 0].view(28,28))
plt.imshow(xb[0] yb[
tensor(3)
xb.shape,yb.shape
(torch.Size([50, 784]), torch.Size([50]))
= get_model() model,opt
fit()
0.16, 0.08
0.08, 0.04
0.06, 0.08
Multiprocessing DataLoader
import torch.multiprocessing as mp
from fastcore.basics import store_attr
3,6,8,1]] train_ds[[
(tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
tensor([1, 1, 1, 0]))
__getitem__([3,6,8,1]) train_ds.
(tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
tensor([1, 1, 1, 0]))
for o in map(train_ds.__getitem__, ([3,6],[8,1])): print(o)
(tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]), tensor([1, 1]))
(tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]), tensor([1, 0]))
class DataLoader():
def __init__(self, ds, batchs, n_workers=1, collate_fn=collate): fc.store_attr()
def __iter__(self):
with mp.Pool(self.n_workers) as ex: yield from ex.map(self.ds.__getitem__, iter(self.batchs))
= DataLoader(train_ds, batchs=train_samp, n_workers=2)
train_dl = iter(train_dl) it
= next(it)
xb,yb xb.shape,yb.shape
(torch.Size([50, 784]), torch.Size([50]))
PyTorch DataLoader
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler, BatchSampler
:::
= BatchSampler(RandomSampler(train_ds), bs, drop_last=False)
train_samp = BatchSampler(SequentialSampler(valid_ds), bs, drop_last=False) valid_samp
= DataLoader(train_ds, batch_sampler=train_samp, collate_fn=collate)
train_dl = DataLoader(valid_ds, batch_sampler=valid_samp, collate_fn=collate) valid_dl
= get_model()
model,opt
fit() loss_func(model(xb), yb), accuracy(model(xb), yb)
0.10, 0.06
0.10, 0.04
0.27, 0.06
(tensor(0.25, grad_fn=<NllLossBackward0>), tensor(0.94))
PyTorch can auto-generate the BatchSampler for us:
= DataLoader(train_ds, bs, sampler=RandomSampler(train_ds), collate_fn=collate)
train_dl = DataLoader(valid_ds, bs, sampler=SequentialSampler(valid_ds), collate_fn=collate) valid_dl
PyTorch can also generate the Sequential/RandomSamplers too:
= DataLoader(train_ds, bs, shuffle=True, drop_last=True, num_workers=2)
train_dl = DataLoader(valid_ds, bs, shuffle=False, num_workers=2) valid_dl
= get_model()
model,opt
fit()
loss_func(model(xb), yb), accuracy(model(xb), yb)
0.21, 0.14
0.15, 0.16
0.05, 0.10
(tensor(0.22, grad_fn=<NllLossBackward0>), tensor(0.96))
Our dataset actually already knows how to sample a batch of indices all at once:
4,6,7]] train_ds[[
(tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
tensor([9, 1, 3]))
…that means that we can actually skip the batch_sampler and collate_fn entirely:
= DataLoader(train_ds, sampler=train_samp)
train_dl = DataLoader(valid_ds, sampler=valid_samp) valid_dl
= next(iter(train_dl))
xb,yb xb.shape,yb.shape
(torch.Size([1, 50, 784]), torch.Size([1, 50]))
Validation
You always should also have a validation set, in order to identify if you are overfitting.
We will calculate and print the validation loss at the end of each epoch.
(Note that we always call model.train()
before training, and model.eval()
before inference, because these are used by layers such as nn.BatchNorm2d
and nn.Dropout
to ensure appropriate behaviour for these different phases.)
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()for xb,yb in train_dl:
= loss_func(model(xb), yb)
loss
loss.backward()
opt.step()
opt.zero_grad()
eval()
model.with torch.no_grad():
= 0.,0.,0
tot_loss,tot_acc,count for xb,yb in valid_dl:
= model(xb)
pred = len(xb)
n += n
count += loss_func(pred,yb).item()*n
tot_loss += accuracy (pred,yb).item()*n
tot_acc print(epoch, tot_loss/count, tot_acc/count)
return tot_loss/count, tot_acc/count
:::
::: {.cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
def get_dls(train_ds, valid_ds, bs, **kwargs):
return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
=bs*2, **kwargs)) DataLoader(valid_ds, batch_size
:::
Now, our whole process of obtaining the data loaders and fitting the model can be run in 3 lines of code:
= get_dls(train_ds, valid_ds, bs)
train_dl,valid_dl = get_model() model,opt
%time loss,acc = fit(5, model, loss_func, opt, train_dl, valid_dl)
0 0.14236384611576797 0.958100004196167
1 0.12564025789499284 0.9632000041007995
2 0.1306914868950844 0.9645000052452087
3 0.10988455526065082 0.9670000064373017
4 0.11636362857650966 0.9678000068664551
CPU times: user 10.5 s, sys: 16.3 s, total: 26.8 s
Wall time: 1.68 s
Export -
import nbdev; nbdev.nbdev_export()
:::{.callout-warning} # How to install the module MINIAI pip install -e '.[dev]'
:::