-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvggTrainer.py
131 lines (102 loc) · 4.58 KB
/
vggTrainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from torch import cuda, max, mean, FloatTensor, no_grad
from torchvision.models import vgg11
def train(
model,
lossFunction,
optimizer,
trainingDataLoader,
validationDataLoader,
learningRates,
) -> vgg11:
"""Train the model using the given optimizer and estimating the
errors and accuracies on the validation set and training set
using the given loss function.
The model will be trained using for each epoch one of the learning rates
given, until no more will be available.
Args:
model (vgg11): vgg11 model to train
lossFunction (CrossEntropy): cross entropy loss function.
optimizer (AdamW): weighted adam optimizer.
trainingDataLoader (DataLoader): training data loader.
validationDataLoader (DataLoader): validation data loader.
learningRates (list[float]): list of learning rates.
Returns:
vgg11: trained model.
"""
trainingDataLoaderLength = len(trainingDataLoader.dataset)
validationDataLoaderLength = len(validationDataLoader.dataset)
trainingLenForPrint = len(trainingDataLoader)
# Epochs: there are as much as given learning rates.
for epochsIndex, epochsLearningRate in enumerate(learningRates):
# Initialize accuracies and losses to 0.
train_loss, valid_loss, train_acc, valid_acc = 0, 0, 0, 0
# Set the learning rate of the current epoch on every parameter
# of the model.
for param_group in optimizer.param_groups:
param_group["lr"] = epochsLearningRate
# Store the Learning rate of the current parameter.
actual_lr = epochsLearningRate
# Set the model in training mode
model.train()
# Keep count of the loaded batches: used to print the current
# epoch progress.
loadedBatches = 0
# Cuda AMP: allows for an automatic conversion of variables to FloatingPoint 16-bit
# instead of 32 when possible.
scaler = cuda.amp.GradScaler()
# Start to iterate through the Training Batches
for data, label in trainingDataLoader:
loadedBatches += 1
# Load the current batch on the GPU
data, label = data.cuda(), label.cuda()
optimizer.zero_grad(set_to_none=True)
with cuda.amp.autocast():
# Compute the output and the loss over the current batch.
output = model(data)
loss = lossFunction(output, label)
# Update the AMP scaler and the Adam Optimizer.
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
# Compute training loss: multiply average loss by number of examples in batch
train_loss += loss.item() * data.size(0)
# Calculate accuracy by finding max log probability
_, pred = max(output, dim=1)
train_acc += mean(
pred.eq(label.data.view_as(pred)).type(FloatTensor)
).item() * data.size(0)
# Every 10 batches print the current epoch progress.
if loadedBatches % 10 == 0:
print(
f"Epoch: {epochsIndex}\t{100 * (loadedBatches + 1) / trainingLenForPrint:.2f}% complete."
)
# Validation
with no_grad():
model.eval()
# Iterate through validation batches.....
for data, label in validationDataLoader:
data, label = data.cuda(), label.cuda()
with cuda.amp.autocast():
output = model(data)
loss = lossFunction(output, label)
valid_loss += loss.item() * data.size(0)
_, pred = max(output, dim=1)
valid_acc += mean(
pred.eq(label.data.view_as(pred)).type(FloatTensor)
).item() * data.size(0)
train_loss /= trainingDataLoaderLength
valid_loss /= validationDataLoaderLength
train_acc /= trainingDataLoaderLength
valid_acc /= validationDataLoaderLength
print(
(
f"EPOCH N°: {epochsIndex} \n Training Loss: {train_loss:.4f} \n Validation Loss: {valid_loss:.4f}"
f" \n Training Accuracy: {(100 * train_acc):.2f}% \n Validation Accuracy: {(100 * valid_acc):.2f}"
f"% \n Learning Rate: {actual_lr:.9f}"
)
)
model.optimizer = optimizer
print(
f"\nAchieved validation loss: {valid_loss:.2f} and accuracy: {100 * valid_acc:.2f}%"
)
return model