nimrossum · nimrossum · Mar 23, 2024 · Mar 23, 2024 · Mar 23, 2024 · Mar 23, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+.venv
+logs
+*.tfrecord
+*.npz
+*.zip
+*.pkl
diff --git a/exam/questions.md b/exam/questions.md
@@ -108,6 +108,8 @@
 
 - Compare Cutout and DropBlock. [5]
 
+- Describe in detail how is CutMix performed. [5]
+
 - Describe Squeeze and Excitation applied to a ResNet block. [5]
 
 - Draw the Mobile inverted bottleneck block (including explanation of separable
@@ -119,3 +121,91 @@
   channels. Write down (or derive) the equation of transposed convolution
   (or equivalently backpropagation through a convolution to its inputs). [5]
 
+#### Questions@:, Lecture 6 Questions
+- Describe the differences among semantic segmentation, image classification,
+  object detection, and instance segmentation, and write down which metrics
+  are used for these tasks. [5]
+
+- Write down how is $\mathit{AP}_{50}$ computed. [5]
+
+- Considering a Fast-RCNN architecture, draw overall network architecture,
+  explain what a RoI-pooling layer is, show how the network parametrizes
+  bounding boxes and write down the loss. Finally, describe non-maximum
+  suppression and how the Fast-RCNN prediction is performed. [10]
+
+- Considering a Faster-RCNN architecture, describe the region proposal network
+  (what are anchors, architecture including both heads, how are the coordinates
+  of proposals parametrized, what does the loss look like). [10]
+
+- Considering Mask-RCNN architecture, describe the additions to a Faster-RCNN
+  architecture (the RoI-Align layer, the new mask-producing head). [5]
+
+- Write down the focal loss with class weighting, including the commonly used
+  hyperparameter values. [5]
+
+- Draw the overall architecture of a RetinaNet architecture (the computation of
+  $C_1, \ldots, C_7$, the FPN architecture computing $P_1, \ldots, P_7$
+  including the block combining feature maps of different resolutions; the
+  classification and bounding box generation heads, including their output
+  size). Write down the losses for both heads. [10]
+
+- Describe GroupNorm, and compare it to BatchNorm and LayerNorm. [5]
+
+#### Questions@:, Lecture 8 Questions
+- Write down how the Long Short-Term Memory (LSTM) cell operates, including
+  the explicit formulas. Also mention the forget gate bias. [10]
+
+- Write down how the Gated Recurrent Unit (GRU) operates, including
+  the explicit formulas. [10]
+
+- Describe Highway network computation. [5]
+
+- Why the usual dropout cannot be used on recurrent state? Describe
+  how the problem can be alleviated with variational dropout. [5]
+
+- Describe layer normalization including all its parameters, and write down how
+  it is computed (be sure to explicitly state over what is being normalized in
+  case of fully connected layers and convolutional layers). [5]
+
+- Draw a tagger architecture utilizing word embeddings, recurrent
+  character-level word embeddings (including how are these computed from
+  individual characters), and two sentence-level bidirectional RNNs (explaining
+  the bidirectionality) with a residual connection. Where would you put the
+  dropout layers? [10]
+
+#### Questions@:, Lecture 9 Questions
+- In the context of named entity recognition, describe what the BIO encoding
+  is and why it is used. [5]
+
+- Write down the dynamic programming algorithm for decoding a BIO-tag sequence,
+  including its asymptotic complexity. [10]
+
+- In the context of CTC loss, describe regular and extended labelings and
+  write down the algorithm for computing the log probability of a gold label
+  sequence $\boldsymbol y$. [10]
+
+- Describe how CTC predictions are performed using a beam-search. [5]
+
+- Draw the CBOW architecture from `word2vec`, including the sizes of the inputs
+  and the sizes of the outputs and used non-linearities. Also make sure to
+  indicate where the embeddings are being trained. [5]
+
+- Draw the SkipGram architecture from `word2vec`, including the sizes of the
+  inputs and the sizes of the outputs and used non-linearities. Also make sure
+  to indicate where the embeddings are being trained. [5]
+
+- Describe the hierarchical softmax used in `word2vec`. [5]
+
+- Describe the negative sampling proposed in `word2vec`, including
+  the choice of distribution of negative samples. [5]
+
+#### Questions@:, Lecture 10 Questions
+- Write down why are subword units used in text processing, and describe the BPE
+  algorithm for constructing a subword dictionary from a large corpus. [5]
+
+- Write down why are subword units used in text processing, and describe the
+  WordPieces algorithm for constructing a subword dictionary from a large
+  corpus. [5]
+
+- Pinpoint the differences between the BPE and WordPieces algorithms, both
+  during dictionary construction and during inference. [5]
diff --git a/labs/.gitignore b/labs/.gitignore
@@ -3,5 +3,5 @@ logs/
 *.h5
 *.keras
 *.npz
-*.pickle
+*.tfrecord
 *.zip
diff --git a/labs/04/cifar10.py b/labs/04/cifar10.py
@@ -33,7 +33,8 @@ def dataset(self, transform: Callable[[dict[str, np.ndarray]], Any] | None = Non
             return CIFAR10.TorchDataset(self, transform)
 
     class TorchDataset(torch.utils.data.Dataset):
-        def __init__(self, dataset: "Dataset", transform: Callable[[dict[str, np.ndarray]], Any] | None) -> None:
+        def __init__(self, dataset: "CIFAR10.Dataset",
+                     transform: Callable[[dict[str, np.ndarray]], Any] | None) -> None:
             self._dataset = dataset
             self._transform = transform
 

diff --git a/labs/04/cifar10_v2.py b/labs/04/cifar10_v2.py
@@ -0,0 +1,99 @@
+import os
+import sys
+from typing import Any, Callable, Sequence, TextIO, TypedDict
+import urllib.request
+
+import numpy as np
+import torch
+
+
+class CIFAR10:
+    H: int = 32
+    W: int = 32
+    C: int = 3
+    LABELS: list[str] = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
+
+    Element = TypedDict("Element", {"image": np.ndarray, "label": np.ndarray})
+    Elements = TypedDict("Elements", {"images": np.ndarray, "labels": np.ndarray})
+
+    _URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2324/datasets/cifar10_competition.npz"
+
+    class Dataset(torch.utils.data.Dataset):
+        def __init__(self, data: "CIFAR10.Elements") -> None:
+            self._data = data
+            self._data["labels"] = self._data["labels"].ravel()
+
+        @property
+        def data(self) -> "CIFAR10.Elements":
+            return self._data
+
+        def __len__(self) -> int:
+            return len(self._data["images"])
+
+        def __getitem__(self, index: int) -> "CIFAR10.Element":
+            return {key.removesuffix("s"): value[index] for key, value in self._data.items()}
+
+        def transform(self, transform: Callable[["CIFAR10.Element"], Any]) -> "CIFAR10.TransformedDataset":
+            return CIFAR10.TransformedDataset(self, transform)
+
+    class TransformedDataset(torch.utils.data.Dataset):
+        def __init__(self, dataset: torch.utils.data.Dataset, transform: Callable[..., Any]) -> None:
+            self._dataset = dataset
+            self._transform = transform
+
+        def __len__(self) -> int:
+            return len(self._dataset)
+
+        def __getitem__(self, index: int) -> Any:
+            item = self._dataset[index]
+            return self._transform(*item) if isinstance(item, tuple) else self._transform(item)
+
+        def transform(self, transform: Callable[..., Any]) -> "CIFAR10.TransformedDataset":
+            return CIFAR10.TransformedDataset(self, transform)
+
+    def __init__(self, size: dict[str, int] = {}) -> None:
+        path = os.path.basename(self._URL)
+        if not os.path.exists(path):
+            print("Downloading CIFAR-10 dataset...", file=sys.stderr)
+            urllib.request.urlretrieve(self._URL, filename="{}.tmp".format(path))
+            os.rename("{}.tmp".format(path), path)
+
+        cifar = np.load(path)
+        for dataset in ["train", "dev", "test"]:
+            data = {key[len(dataset) + 1:]: cifar[key][:size.get(dataset, None)]
+                    for key in cifar if key.startswith(dataset)}
+            setattr(self, dataset, self.Dataset(data))
+
+    train: Dataset
+    dev: Dataset
+    test: Dataset
+
+    # Evaluation infrastructure.
+    @staticmethod
+    def evaluate(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
+        gold = gold_dataset.data["labels"]
+
+        if len(predictions) != len(gold):
+            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
+                len(predictions), len(gold)))
+
+        correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
+        return 100 * correct / len(gold)
+
+    @staticmethod
+    def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
+        predictions = [int(line) for line in predictions_file]
+        return CIFAR10.evaluate(gold_dataset, predictions)
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--evaluate", default=None, type=str, help="Prediction file to evaluate")
+    parser.add_argument("--dataset", default="dev", type=str, help="Gold dataset to evaluate")
+    args = parser.parse_args()
+
+    if args.evaluate:
+        with open(args.evaluate, "r", encoding="utf-8-sig") as predictions_file:
+            accuracy = CIFAR10.evaluate_file(getattr(CIFAR10(), args.dataset), predictions_file)
+        print("CIFAR10 accuracy: {:.2f}%".format(accuracy))
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,5 +3,5 @@ logs/ @@
     *.h5
     *.keras
     *.npz
-    *.pickle
+    *.tfrecord
     *.zip