machine-learning/dropout-in-pytorch/dropoutregularizationpytorch_pythoncodetutorial.py

# -*- coding: utf-8 -*-
"""DropoutRegularizationPyTorch_PythonCodeTutorial.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/19NQnpsxp29J12nTTH8zaTXzkoZ-b_mfj
"""

import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# defining our device, 'cuda:0' if CUDA is available, 'cpu' otherwise
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

# make the transform pipeline, converting to tensor and normalizing
transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# the batch size during training
batch_size = 64

train_dataset = torchvision.datasets.CIFAR10(root="./data", train=True, 
                                             download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                           shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.CIFAR10(root="./data", train=False, 
                                             download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                           shuffle=False, num_workers=2)

# the MNIST classes
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
# switch to GPU if available
net.to(device)

import torch.optim as optim

# defining the loss and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

def get_test_loss(net, criterion, data_loader):
  """A simple function that iterates over `data_loader` to calculate the overall loss"""
  testing_loss = []
  with torch.no_grad():
    for data in data_loader:
      inputs, labels = data
      # get the data to GPU (if available)
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = net(inputs)
      # calculate the loss for this batch
      loss = criterion(outputs, labels)
      # add the loss of this batch to the list
      testing_loss.append(loss.item())
  # calculate the average loss
  return sum(testing_loss) / len(testing_loss)

training_loss, testing_loss = [], []
running_loss = []
i = 0
for epoch in range(150): # 150 epochs
  for data in train_loader:
    inputs, labels = data
    # get the data to GPU (if available)
    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    # forward pass
    outputs = net(inputs)
    # backward pass
    loss = criterion(outputs, labels)
    loss.backward()
    # update gradients
    optimizer.step()
    running_loss.append(loss.item())
    i += 1
    if i % 1000 == 0:
      avg_train_loss = sum(running_loss) / len(running_loss)
      avg_test_loss = get_test_loss(net, criterion, test_loader)
      # clear the list
      running_loss.clear()
      # for logging & plotting later
      training_loss.append(avg_train_loss)
      testing_loss.append(avg_test_loss)
      print(f"[{epoch:2d}] [it={i:5d}] Train Loss: {avg_train_loss:.3f}, Test Loss: {avg_test_loss:.3f}")

  
print("Done training.")

class NetDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.do1 = nn.Dropout(0.2)  # 20% Probability
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.do2 = nn.Dropout(0.2)  # 20% Probability
        self.fc2 = nn.Linear(120, 84)
        self.do3 = nn.Dropout(0.1)  # 10% Probability
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.do1(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.do2(x)
        x = F.relu(self.fc2(x))
        x = self.do3(x)
        x = self.fc3(x)
        return x


net_dropout = NetDropout()
net_dropout.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net_dropout.parameters(), lr=0.001, momentum=0.9)

training_loss_d, testing_loss_d = [], []
running_loss = []
i = 0
for epoch in range(150): # 10 epochs
  for data in train_loader:
    inputs, labels = data
    # get the data to GPU (if available)
    inputs, labels = inputs.to(device), labels.to(device)

    optimizer.zero_grad()
    # forward pass
    outputs = net_dropout(inputs)
    # backward pass
    loss = criterion(outputs, labels)
    loss.backward()
    # update gradients
    optimizer.step()
    running_loss.append(loss.item())
    i += 1
    if i % 1000 == 0:
      avg_train_loss = sum(running_loss) / len(running_loss)
      avg_test_loss = get_test_loss(net_dropout, criterion, test_loader)
      # clear the list
      running_loss.clear()
      # for logging & plotting later
      training_loss_d.append(avg_train_loss)
      testing_loss_d.append(avg_test_loss)
      print(f"[{epoch:2d}] [it={i:5d}] Train Loss: {avg_train_loss:.3f}, Test Loss: {avg_test_loss:.3f}")

  
print("Done training.")

import matplotlib.pyplot as plt

# plot both benchmarks
plt.plot(testing_loss, label="no dropout")
plt.plot(testing_loss_d, label="with dropout")
# make the legend on the plot
plt.legend()
plt.title("The Cross-entropy loss of the MNIST test data with and w/o Dropout")
plt.show()

import time

while True:
  time.sleep(30)