Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 67 additions & 20 deletions labs/06/svhn_competition.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import datetime
import os
import re

os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise

import keras
import numpy as np
from torchvision.transforms import Resize
import torch

import bboxes_utils
Expand Down Expand Up @@ -66,17 +68,58 @@ def main(args: argparse.Namespace) -> None:
# - "classes", a `[num_digits]` vector with classes of image digits,
# - "bboxes", a `[num_digits, 4]` vector with bounding boxes of image digits.
svhn = SVHN()
def get_inputs(dataset=svhn.train):
imgs, clsses, bboxes = [], [], []
for example in dataset:
img, cls, bbox = example["image"], example["classes"], example["bboxes"]
imgs.append(img)
clsses.append(cls)
bboxes.append(bbox)
return np.array(imgs), np.array(clsses), np.array(bboxes)
train_imgs, train_clsses, train_bboxes = get_inputs()
dev_imgs, dev_clsses, dev_bboxes = get_inputs(svhn.dev)
test_imgs, _, _ = get_inputs(svhn.test)

def transform_data(example):
image = example["image"]
classes = example["classes"] # Gold classes
bboxes = example["bboxes"] # Gold bboxes

threshhold = 0.5

# Resize image
resize_transform = Resize((224, 224))
resized_image = resize_transform(image)

anchors = []

anchor_size = 2 ** (2 / 3) * 4 * (2**7)

for i in range(7):
for j in range(7):
# top, left, bottom, right
anchors.append(
[
i * anchor_size,
j * anchor_size,
(i + 1) * anchor_size,
(j + 1) * anchor_size,
]
)

anchors = np.array(anchors)
anchor_classes, anchor_bboxes = bboxes_utils.bboxes_training(
anchors, bboxes, classes, threshhold
)
onehot_encoded_anchor_classes = keras.ops.one_hot(anchor_classes, svhn.LABELS)

# If class is zero, return 1, otherwise return 0
is_background_weights = map(lambda x: x / svhn.LABELS, anchor_classes)

return (
resized_image,
(onehot_encoded_anchor_classes, anchor_bboxes),
is_background_weights,
)

transformed_train_dataset = torch.utils.data.DataLoader(
svhn.train.transform(transform_data), batch_size=args.batch_size, shuffle=True
)
transformed_dev_dataset = torch.utils.data.DataLoader(
svhn.dev.transform(transform_data), batch_size=args.batch_size, shuffle=False
)
transformed_test_dataset = torch.utils.data.DataLoader(
svhn.test.transform(transform_data), batch_size=args.batch_size, shuffle=False
)

# Load the EfficientNetV2-B0 model. It assumes the input images are
# represented in the [0-255] range.
Expand All @@ -92,15 +135,15 @@ def get_inputs(dataset=svhn.train):
)

# TODO: Create the model and train it
backbone.trainable = False
inputs = keras.layers.Input(shape=train_imgs.shape)
backbone.trainable = False
inputs = keras.layers.Input(shape=(224,224,3))
# backbone outputs bottom to up: block1a, block2b, block3b, block5e, top
# shapes: 7x7x1280, 14x14x112, 28x28x40, 56x56x24, 112x112x16
# top, block5e, block3b, block2b, block1a = backbone(inputs)

# FPN: feature pyramid network
# 1. First build feature pyramid to extract features, using 3 layers (layer 3-5)
def fpn(backbone=backbone, inputs=inputs, layers=(3,4,5)):
def fpn(backbone=backbone, inputs=inputs, layers=(3, 4, 5)):
conv3_11 = keras.layers.Conv2D(256, 1, 1, "same")
conv4_11 = keras.layers.Conv2D(256, 1, 1, "same")
conv5_11 = keras.layers.Conv2D(256, 1, 1, "same")
Expand All @@ -123,6 +166,7 @@ def fpn(backbone=backbone, inputs=inputs, layers=(3,4,5)):
p6_output = conv6_33(c5_output)
p7_output = conv7_33(keras.activations.relu(p6_output))
return p3_output, p4_output, p5_output, p6_output, p7_output

### classification and bbox regression head
### 9 is the anchor number
def heads(input_feature, type="classification"):
Expand All @@ -139,15 +183,15 @@ def heads(input_feature, type="classification"):
conv4 = keras.layers.ReLU()(keras.layers.Conv2D(256, 3, 1, "same")(conv3))
outputs = keras.layers.Conv2D(output_size, 3, 1, "same", activation=activ)(conv4)
return outputs

fpn_features = fpn()
cls_outputs, reg_outputs = [], []
for feature in fpn_features:
cls_output = heads(feature)
reg_output = heads(feature, "regression")
cls_outputs.append(keras.ops.reshape(cls_output, (args.batch_size, -1, svhn.LABELS)))
reg_outputs.append(keras.ops.reshape(reg_output, (args.batch_size, -1, 4)))

cls_outputs = keras.ops.concatenate(cls_outputs, axis=1)
reg_outputs = keras.ops.concatenate(reg_outputs, axis=1)
model_outputs = keras.ops.concatenate([reg_outputs, cls_outputs], axis=-1)
Expand All @@ -156,12 +200,15 @@ def heads(input_feature, type="classification"):

model.compile(
optimizer=keras.optimizers.Adam(learning_rate=args.learning_rate),
loss=(
keras.losses.BinaryFocalCrossEntropy(),
keras.losses.Huber()),
loss=(keras.losses.BinaryFocalCrossentropy(), keras.losses.Huber()),
)

model.fit()
model.fit(
transformed_train_dataset,
epochs=args.epochs,
validation_data=transformed_dev_dataset,
callbacks=[TorchTensorBoardCallback(args.logdir)],
)

# Generate test set annotations, but in `args.logdir` to allow parallel execution.
os.makedirs(args.logdir, exist_ok=True)
Expand Down