diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..917c1db
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+**/.venv/
+logs/
+mnist.npz
+*.zip
diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg
new file mode 100644
index 0000000..e129fd0
--- /dev/null
+++ b/.venv/pyvenv.cfg
@@ -0,0 +1,3 @@
+home = C:\Python310
+include-system-site-packages = false
+version = 3.10.7
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..dc3f727
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "python.analysis.typeCheckingMode": "basic"
+}
diff --git a/labs/01/expected.txt b/labs/01/expected.txt
new file mode 100644
index 0000000..fdaf786
--- /dev/null
+++ b/labs/01/expected.txt
@@ -0,0 +1,39 @@
+python3 mnist_layers_activations.py --hidden_layers=0 --activation=none
+Epoch  1/10 accuracy: 0.7801 - loss: 0.8405 - val_accuracy: 0.9300 - val_loss: 0.2716
+Epoch  5/10 accuracy: 0.9222 - loss: 0.2792 - val_accuracy: 0.9406 - val_loss: 0.2203
+Epoch 10/10 accuracy: 0.9304 - loss: 0.2515 - val_accuracy: 0.9432 - val_loss: 0.2159
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=none
+Epoch  1/10 accuracy: 0.8483 - loss: 0.5230 - val_accuracy: 0.9352 - val_loss: 0.2422
+Epoch  5/10 accuracy: 0.9236 - loss: 0.2758 - val_accuracy: 0.9360 - val_loss: 0.2325
+Epoch 10/10 accuracy: 0.9298 - loss: 0.2517 - val_accuracy: 0.9354 - val_loss: 0.2439
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu
+Epoch  1/10 accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432
+Epoch  5/10 accuracy: 0.9824 - loss: 0.0613 - val_accuracy: 0.9808 - val_loss: 0.0740
+Epoch 10/10 accuracy: 0.9948 - loss: 0.0202 - val_accuracy: 0.9788 - val_loss: 0.0821
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh
+Epoch  1/10 accuracy: 0.8529 - loss: 0.5183 - val_accuracy: 0.9564 - val_loss: 0.1632
+Epoch  5/10 accuracy: 0.9800 - loss: 0.0728 - val_accuracy: 0.9740 - val_loss: 0.0853
+Epoch 10/10 accuracy: 0.9948 - loss: 0.0244 - val_accuracy: 0.9782 - val_loss: 0.0772
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid
+Epoch  1/10 accuracy: 0.7851 - loss: 0.8650 - val_accuracy: 0.9414 - val_loss: 0.2196
+Epoch  5/10 accuracy: 0.9647 - loss: 0.1270 - val_accuracy: 0.9704 - val_loss: 0.1079
+Epoch 10/10 accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9756 - val_loss: 0.0837
+
+python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu
+Epoch  1/10 accuracy: 0.8497 - loss: 0.5011 - val_accuracy: 0.9664 - val_loss: 0.1225
+Epoch  5/10 accuracy: 0.9862 - loss: 0.0438 - val_accuracy: 0.9734 - val_loss: 0.1026
+Epoch 10/10 accuracy: 0.9932 - loss: 0.0202 - val_accuracy: 0.9818 - val_loss: 0.0865
+
+python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu
+Epoch  1/10 accuracy: 0.7710 - loss: 0.6793 - val_accuracy: 0.9570 - val_loss: 0.1479
+Epoch  5/10 accuracy: 0.9780 - loss: 0.0783 - val_accuracy: 0.9786 - val_loss: 0.0808
+Epoch 10/10 accuracy: 0.9869 - loss: 0.0481 - val_accuracy: 0.9724 - val_loss: 0.1163
+
+python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid
+Epoch  1/10 accuracy: 0.1072 - loss: 2.3068 - val_accuracy: 0.1784 - val_loss: 2.1247
+Epoch  5/10 accuracy: 0.8825 - loss: 0.4776 - val_accuracy: 0.9164 - val_loss: 0.3686
+Epoch 10/10 accuracy: 0.9294 - loss: 0.2994 - val_accuracy: 0.9386 - val_loss: 0.2671
diff --git a/labs/01/mnist.ps1 b/labs/01/mnist.ps1
new file mode 100644
index 0000000..a274269
--- /dev/null
+++ b/labs/01/mnist.ps1
@@ -0,0 +1,24 @@
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=0 --activation=none"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=0 --activation=none
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=none"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=none
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=relu
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=tanh
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=3 --activation=relu
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=relu
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid
+# Write-Output ""
diff --git a/labs/01/mnist_layers_activations.py b/labs/01/mnist_layers_activations.py
index d58b796..bf78be2 100644
--- a/labs/01/mnist_layers_activations.py
+++ b/labs/01/mnist_layers_activations.py
@@ -10,6 +10,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--activation", default="none", choices=["none", "relu", "tanh", "sigmoid"], help="Activation.")
@@ -68,7 +73,7 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     # Create the model
     model = keras.Sequential()
     model.add(keras.Input([MNIST.H, MNIST.W, MNIST.C]))
-    # TODO: Finish the model. Namely:
+    # Finish the model. Namely:
     # - start by adding a `keras.layers.Rescaling(1 / 255)` layer;
     # - then add a `keras.layers.Flatten()` layer;
     # - add `args.hidden_layers` number of fully connected hidden layers
@@ -76,6 +81,14 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     #   from `args.activation`, allowing "none", "relu", "tanh", "sigmoid";
     # - finally, add an output fully connected layer with  `MNIST.LABELS` units
     #   and `softmax` activation.
+    model.add(keras.layers.Rescaling(1 / 255))
+    model.add(keras.layers.Flatten())
+
+    for _ in range(args.hidden_layers):
+        activation = None if args.activation == "none" else args.activation
+        model.add(keras.layers.Dense(args.hidden_layer, activation=activation))
+
+    model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax"))
 
     model.compile(
         optimizer=keras.optimizers.Adam(),
diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 8e86bff..819b6b0 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -1,4 +1,10 @@
 #!/usr/bin/env python3
+
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 import argparse
 
 import numpy as np
@@ -12,42 +18,51 @@
 
 
 def main(args: argparse.Namespace) -> tuple[float, float, float]:
-    # TODO: Load data distribution, each line containing a datapoint -- a string.
-    with open(args.data_path, "r") as data:
+    # Load data distribution, each line containing a datapoint -- a string.
+    data_map = {}
+
+    # Load data distribution, each line containing a datapoint -- a string.
+    with open(args.data_path, "r", encoding="utf-8") as data:
         for line in data:
             line = line.rstrip("\n")
-            # TODO: Process the line, aggregating data with built-in Python
+
+            # Process the line, aggregating data with built-in Python
             # data structures (not NumPy, which is not suitable for incremental
             # addition and string mapping).
+            if line in data_map:
+                data_map[line] += 1
+            else:
+                data_map[line] = 1
 
-    # TODO: Create a NumPy array containing the data distribution. The
+    # Create a NumPy array containing the data distribution. The
     # NumPy array should contain only data, not any mapping. Alternatively,
     # the NumPy array might be created after loading the model distribution.
+    data_dist = np.array(list(data_map.values())) / sum(data_map.values())
+
+    # Load model distribution, each line `string \t probability`.
+    model_map = {}
 
-    # TODO: Load model distribution, each line `string \t probability`.
     with open(args.model_path, "r") as model:
         for line in model:
             line = line.rstrip("\n")
-            # TODO: Process the line, aggregating using Python data structures.
+            key, value = line.split("\t")
+            model_map[key] = float(value)
 
-    # TODO: Create a NumPy array containing the model distribution.
+    # Create a NumPy array containing the model distribution.
+    model_dist = np.array([model_map[key] if key in model_map else np.inf for key in data_map.keys()])
 
-    # TODO: Compute the entropy H(data distribution). You should not use
-    # manual for/while cycles, but instead use the fact that most NumPy methods
-    # operate on all elements (for example `*` is vector element-wise multiplication).
-    entropy = ...
+    # Compute the entropy H(data distribution).
+    entropy = -np.sum(data_dist * np.log(data_dist))
 
-    # TODO: Compute cross-entropy H(data distribution, model distribution).
-    # When some data distribution elements are missing in the model distribution,
-    # return `np.inf`.
-    crossentropy = ...
+    # Compute cross-entropy H(data distribution, model distribution).
+    crossentropy = -np.sum(data_dist * np.log(model_dist))
 
-    # TODO: Compute KL-divergence D_KL(data distribution, model_distribution),
-    # again using `np.inf` when needed.
-    kl_divergence = ...
+    # Compute KL-divergence D_KL(data distribution, model_distribution).
+    kl_divergence = crossentropy - entropy
+    # kl_divergence = np.where(np.isinf(kl_divergence), np.inf, kl_divergence)
 
     # Return the computed values for ReCodEx to validate.
-    return entropy, crossentropy, kl_divergence
+    return entropy, crossentropy if np.isfinite(crossentropy) else np.inf, kl_divergence if np.isfinite(kl_divergence) else np.inf
 
 
 if __name__ == "__main__":
diff --git a/labs/01/output.txt b/labs/01/output.txt
new file mode 100644
index 0000000..916c534
--- /dev/null
+++ b/labs/01/output.txt
@@ -0,0 +1,167 @@
+Epoch 1/10
+1100/1100 14s 12ms/step - accuracy: 0.7761 - loss: 0.8442 - val_accuracy: 0.9298 - val_loss: 0.2730
+Epoch 2/10
+1100/1100 12s 11ms/step - accuracy: 0.9057 - loss: 0.3428 - val_accuracy: 0.9336 - val_loss: 0.2418
+Epoch 3/10
+1100/1100 11s 10ms/step - accuracy: 0.9177 - loss: 0.2945 - val_accuracy: 0.9366 - val_loss: 0.2284
+Epoch 4/10
+1100/1100 12s 10ms/step - accuracy: 0.9193 - loss: 0.2839 - val_accuracy: 0.9384 - val_loss: 0.2267
+Epoch 5/10
+1100/1100 11s 10ms/step - accuracy: 0.9228 - loss: 0.2790 - val_accuracy: 0.9392 - val_loss: 0.2208
+Epoch 6/10
+1100/1100 12s 11ms/step - accuracy: 0.9244 - loss: 0.2713 - val_accuracy: 0.9440 - val_loss: 0.2162
+Epoch 7/10
+1100/1100 13s 12ms/step - accuracy: 0.9252 - loss: 0.2662 - val_accuracy: 0.9398 - val_loss: 0.2178
+Epoch 8/10
+1100/1100 14s 12ms/step - accuracy: 0.9269 - loss: 0.2626 - val_accuracy: 0.9398 - val_loss: 0.2169
+Epoch 9/10
+1100/1100 13s 12ms/step - accuracy: 0.9286 - loss: 0.2612 - val_accuracy: 0.9458 - val_loss: 0.2128
+Epoch 10/10
+1100/1100 13s 12ms/step - accuracy: 0.9307 - loss: 0.2515 - val_accuracy: 0.9438 - val_loss: 0.2161
+
+Epoch 1/10
+1100/1100 15s 13ms/step - accuracy: 0.8422 - loss: 0.5383 - val_accuracy: 0.9346 - val_loss: 0.2400
+Epoch 2/10
+1100/1100 18s 17ms/step - accuracy: 0.9120 - loss: 0.3102 - val_accuracy: 0.9364 - val_loss: 0.2372
+Epoch 3/10
+1100/1100 16s 15ms/step - accuracy: 0.9233 - loss: 0.2774 - val_accuracy: 0.9352 - val_loss: 0.2342
+Epoch 4/10
+1100/1100 16s 14ms/step - accuracy: 0.9225 - loss: 0.2736 - val_accuracy: 0.9366 - val_loss: 0.2336
+Epoch 5/10
+1100/1100 15s 13ms/step - accuracy: 0.9233 - loss: 0.2760 - val_accuracy: 0.9344 - val_loss: 0.2331
+Epoch 6/10
+1100/1100 22s 20ms/step - accuracy: 0.9251 - loss: 0.2683 - val_accuracy: 0.9382 - val_loss: 0.2247
+Epoch 7/10
+1100/1100 15s 14ms/step - accuracy: 0.9261 - loss: 0.2658 - val_accuracy: 0.9356 - val_loss: 0.2367
+Epoch 8/10
+1100/1100 15s 14ms/step - accuracy: 0.9256 - loss: 0.2635 - val_accuracy: 0.9364 - val_loss: 0.2308
+Epoch 9/10
+1100/1100 15s 13ms/step - accuracy: 0.9253 - loss: 0.2625 - val_accuracy: 0.9386 - val_loss: 0.2277
+Epoch 10/10
+1100/1100 15s 13ms/step - accuracy: 0.9301 - loss: 0.2515 - val_accuracy: 0.9358 - val_loss: 0.2441
+
+Epoch 1/10
+1100/1100 16s 13ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400
+Epoch 2/10
+1100/1100 15s 13ms/step - accuracy: 0.9517 - loss: 0.1637 - val_accuracy: 0.9682 - val_loss: 0.1153
+Epoch 3/10
+1100/1100 14s 13ms/step - accuracy: 0.9700 - loss: 0.1021 - val_accuracy: 0.9730 - val_loss: 0.0897
+Epoch 4/10
+1100/1100 13s 12ms/step - accuracy: 0.9774 - loss: 0.0757 - val_accuracy: 0.9754 - val_loss: 0.0835
+Epoch 5/10
+1100/1100 13s 12ms/step - accuracy: 0.9824 - loss: 0.0603 - val_accuracy: 0.9772 - val_loss: 0.0766
+Epoch 6/10
+1100/1100 14s 12ms/step - accuracy: 0.9855 - loss: 0.0486 - val_accuracy: 0.9762 - val_loss: 0.0850
+Epoch 7/10
+1100/1100 14s 13ms/step - accuracy: 0.9889 - loss: 0.0374 - val_accuracy: 0.9776 - val_loss: 0.0774
+Epoch 8/10
+1100/1100 13s 12ms/step - accuracy: 0.9901 - loss: 0.0318 - val_accuracy: 0.9786 - val_loss: 0.0765
+Epoch 9/10
+1100/1100 13s 12ms/step - accuracy: 0.9928 - loss: 0.0267 - val_accuracy: 0.9804 - val_loss: 0.0766
+Epoch 10/10
+1100/1100 14s 12ms/step - accuracy: 0.9944 - loss: 0.0208 - val_accuracy: 0.9792 - val_loss: 0.0801
+
+Epoch 1/10
+1100/1100 14s 12ms/step - accuracy: 0.8468 - loss: 0.5308 - val_accuracy: 0.9594 - val_loss: 0.1591
+Epoch 2/10
+1100/1100 13s 12ms/step - accuracy: 0.9433 - loss: 0.1909 - val_accuracy: 0.9646 - val_loss: 0.1300
+Epoch 3/10
+1100/1100 13s 12ms/step - accuracy: 0.9658 - loss: 0.1235 - val_accuracy: 0.9726 - val_loss: 0.0973
+Epoch 4/10
+1100/1100 13s 12ms/step - accuracy: 0.9744 - loss: 0.0909 - val_accuracy: 0.9732 - val_loss: 0.0876
+Epoch 5/10
+1100/1100 13s 12ms/step - accuracy: 0.9798 - loss: 0.0747 - val_accuracy: 0.9788 - val_loss: 0.0770
+Epoch 6/10
+1100/1100 13s 12ms/step - accuracy: 0.9832 - loss: 0.0606 - val_accuracy: 0.9766 - val_loss: 0.0801
+Epoch 7/10
+1100/1100 13s 12ms/step - accuracy: 0.9881 - loss: 0.0460 - val_accuracy: 0.9792 - val_loss: 0.0714
+Epoch 8/10
+1100/1100 13s 12ms/step - accuracy: 0.9894 - loss: 0.0397 - val_accuracy: 0.9768 - val_loss: 0.0741
+Epoch 9/10
+1100/1100 13s 12ms/step - accuracy: 0.9923 - loss: 0.0312 - val_accuracy: 0.9796 - val_loss: 0.0709
+Epoch 10/10
+1100/1100 14s 12ms/step - accuracy: 0.9940 - loss: 0.0257 - val_accuracy: 0.9802 - val_loss: 0.0720
+
+Epoch 1/10
+1100/1100 15s 13ms/step - accuracy: 0.8072 - loss: 0.8138 - val_accuracy: 0.9452 - val_loss: 0.2121
+Epoch 2/10
+1100/1100 15s 14ms/step - accuracy: 0.9241 - loss: 0.2602 - val_accuracy: 0.9570 - val_loss: 0.1663
+Epoch 3/10
+1100/1100 15s 14ms/step - accuracy: 0.9476 - loss: 0.1863 - val_accuracy: 0.9648 - val_loss: 0.1322
+Epoch 4/10
+1100/1100 14s 13ms/step - accuracy: 0.9583 - loss: 0.1490 - val_accuracy: 0.9670 - val_loss: 0.1168
+Epoch 5/10
+1100/1100 14s 13ms/step - accuracy: 0.9658 - loss: 0.1243 - val_accuracy: 0.9696 - val_loss: 0.1047
+Epoch 6/10
+1100/1100 14s 12ms/step - accuracy: 0.9706 - loss: 0.1065 - val_accuracy: 0.9718 - val_loss: 0.0975
+Epoch 7/10
+1100/1100 13s 12ms/step - accuracy: 0.9758 - loss: 0.0891 - val_accuracy: 0.9740 - val_loss: 0.0918
+Epoch 8/10
+1100/1100 13s 12ms/step - accuracy: 0.9779 - loss: 0.0792 - val_accuracy: 0.9758 - val_loss: 0.0885
+Epoch 9/10
+1100/1100 14s 13ms/step - accuracy: 0.9816 - loss: 0.0681 - val_accuracy: 0.9776 - val_loss: 0.0825
+Epoch 10/10
+1100/1100 14s 12ms/step - accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9766 - val_loss: 0.0831
+
+Epoch 1/10
+1100/1100 16s 14ms/step - accuracy: 0.8483 - loss: 0.5002 - val_accuracy: 0.9650 - val_loss: 0.1189
+Epoch 2/10
+1100/1100 16s 14ms/step - accuracy: 0.9609 - loss: 0.1262 - val_accuracy: 0.9718 - val_loss: 0.0971
+Epoch 3/10
+1100/1100 16s 14ms/step - accuracy: 0.9759 - loss: 0.0783 - val_accuracy: 0.9772 - val_loss: 0.0690
+Epoch 4/10
+1100/1100 16s 14ms/step - accuracy: 0.9810 - loss: 0.0597 - val_accuracy: 0.9788 - val_loss: 0.0752
+Epoch 5/10
+1100/1100 15s 14ms/step - accuracy: 0.9855 - loss: 0.0468 - val_accuracy: 0.9748 - val_loss: 0.0817
+Epoch 6/10
+1100/1100 16s 14ms/step - accuracy: 0.9884 - loss: 0.0398 - val_accuracy: 0.9758 - val_loss: 0.0909
+Epoch 7/10
+1100/1100 15s 14ms/step - accuracy: 0.9898 - loss: 0.0318 - val_accuracy: 0.9724 - val_loss: 0.0998
+Epoch 8/10
+1100/1100 16s 14ms/step - accuracy: 0.9892 - loss: 0.0305 - val_accuracy: 0.9778 - val_loss: 0.0952
+Epoch 9/10
+1100/1100 16s 14ms/step - accuracy: 0.9914 - loss: 0.0267 - val_accuracy: 0.9756 - val_loss: 0.0878
+Epoch 10/10
+1100/1100 16s 15ms/step - accuracy: 0.9935 - loss: 0.0203 - val_accuracy: 0.9770 - val_loss: 0.0974
+
+Epoch 1/10
+1100/1100 24s 21ms/step - accuracy: 0.7772 - loss: 0.6657 - val_accuracy: 0.9524 - val_loss: 0.1752
+Epoch 2/10
+1100/1100 24s 22ms/step - accuracy: 0.9525 - loss: 0.1705 - val_accuracy: 0.9682 - val_loss: 0.1261
+Epoch 3/10
+1100/1100 22s 20ms/step - accuracy: 0.9675 - loss: 0.1162 - val_accuracy: 0.9750 - val_loss: 0.0945
+Epoch 4/10
+1100/1100 22s 20ms/step - accuracy: 0.9735 - loss: 0.0929 - val_accuracy: 0.9720 - val_loss: 0.1018
+Epoch 5/10
+1100/1100 22s 20ms/step - accuracy: 0.9789 - loss: 0.0794 - val_accuracy: 0.9762 - val_loss: 0.0888
+Epoch 6/10
+1100/1100 22s 20ms/step - accuracy: 0.9806 - loss: 0.0729 - val_accuracy: 0.9760 - val_loss: 0.0961
+Epoch 7/10
+1100/1100 22s 20ms/step - accuracy: 0.9847 - loss: 0.0578 - val_accuracy: 0.9810 - val_loss: 0.0932
+Epoch 8/10
+1100/1100 22s 20ms/step - accuracy: 0.9824 - loss: 0.0643 - val_accuracy: 0.9786 - val_loss: 0.0854
+Epoch 9/10
+1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0487 - val_accuracy: 0.9764 - val_loss: 0.1054
+Epoch 10/10
+1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0493 - val_accuracy: 0.9780 - val_loss: 0.1108
+
+Epoch 1/10
+1100/1100 23s 20ms/step - accuracy: 0.1052 - loss: 2.3130 - val_accuracy: 0.1808 - val_loss: 1.9383
+Epoch 2/10
+1100/1100 22s 20ms/step - accuracy: 0.2002 - loss: 1.9364 - val_accuracy: 0.2168 - val_loss: 1.8587
+Epoch 3/10
+1100/1100 23s 20ms/step - accuracy: 0.2161 - loss: 1.8392 - val_accuracy: 0.5588 - val_loss: 1.2106
+Epoch 4/10
+1100/1100 22s 20ms/step - accuracy: 0.5594 - loss: 1.1159 - val_accuracy: 0.8168 - val_loss: 0.7119
+Epoch 5/10
+1100/1100 22s 20ms/step - accuracy: 0.8359 - loss: 0.6312 - val_accuracy: 0.8994 - val_loss: 0.4360
+Epoch 6/10
+1100/1100 22s 20ms/step - accuracy: 0.8827 - loss: 0.4854 - val_accuracy: 0.9066 - val_loss: 0.4053
+Epoch 7/10
+1100/1100 22s 20ms/step - accuracy: 0.9007 - loss: 0.4218 - val_accuracy: 0.9166 - val_loss: 0.3660
+Epoch 8/10
+1100/1100 22s 20ms/step - accuracy: 0.9075 - loss: 0.3940 - val_accuracy: 0.9204 - val_loss: 0.3552
+Epoch 9/10
+1100/1100 22s 20ms/step - accuracy: 0.9090 - loss: 0.3922 - val_accuracy: 0.9242 - val_loss: 0.3356
+Epoch 10/10
+1100/1100 24s 22ms/step - accuracy: 0.9191 - loss: 0.3534 - val_accuracy: 0.9270 - val_loss: 0.3286
diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py
index 1f99e21..0632b22 100644
--- a/labs/01/pca_first.keras.py
+++ b/labs/01/pca_first.keras.py
@@ -9,6 +9,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.")
@@ -32,39 +37,43 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False)
     data = keras.ops.convert_to_tensor(mnist.train.data["images"][data_indices] / 255, dtype="float32")
 
-    # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
+    # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `keras.ops.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = ...
+    data = keras.ops.reshape(data, [data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]])
 
-    # TODO: Now compute mean of every feature. Use `keras.ops.mean`, and set
+    # Now compute mean of every feature. Use `keras.ops.mean`, and set
     # `axis` to zero -- therefore, the mean will be computed across the first
     # dimension, so across examples.
-    mean = ...
+    mean = keras.ops.mean(data, axis=0)
 
-    # TODO: Compute the covariance matrix. The covariance matrix is
+    # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `keras.ops.transpose` and
     # matrix multiplication using either Python operator @ or `keras.ops.matmul`.
-    cov = ...
+    cov = keras.ops.transpose(data-mean) @ (data-mean) / data.shape[0]
 
-    # TODO: Compute the total variance, which is the sum of the diagonal
+    # Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `keras.ops.diagonal`,
     # and to sum a tensor use `keras.ops.sum`.
-    total_variance = ...
+    total_variance = keras.ops.sum(keras.ops.diagonal(cov))
 
-    # TODO: Now run `args.iterations` of the power iteration algorithm.
+    # Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `"float32"` using `keras.ops.ones`.
-    v = ...
+    v = keras.ops.ones(cov.shape[0], dtype="float32")
     for i in range(args.iterations):
-        # TODO: In the power iteration algorithm, we compute
+        # In the power iteration algorithm, we compute
         # 1. v = cov v
         #    The matrix-vector multiplication can be computed as regular matrix multiplication.
+        v = keras.ops.matmul(cov, v)
+
         # 2. s = l2_norm(v)
         #    The l2_norm can be computed using for example `keras.ops.norm`.
+        s = keras.ops.norm(v, 2)
+
         # 3. v = v / s
-        pass
+        v = v / s
 
     # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`.
     # We now compute the explained variance, which is the ratio of `s` and `total_variance`.
diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index 2e4ef10..deecf06 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -7,6 +7,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.")
@@ -30,43 +35,46 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False)
     data = torch.tensor(mnist.train.data["images"][data_indices] / 255, dtype=torch.float32)
 
-    # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
+    # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `torch.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = ...
+    data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]))
 
-    # TODO: Now compute mean of every feature. Use `torch.mean`, and set
+    # Now compute mean of every feature. Use `torch.mean`, and set
     # `dim` (or `axis`) argument to zero -- therefore, the mean will be
     # computed across the first dimension, so across examples.
     #
     # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments
     # in PyTorch can be also called `axis`.
-    mean = ...
+    mean = torch.mean(data, axis=0)
 
-    # TODO: Compute the covariance matrix. The covariance matrix is
+    # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `torch.transpose` or `torch.t` and
     # matrix multiplication using either Python operator @ or `torch.matmul`.
-    cov = ...
+    cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0]
 
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = ...
+    total_variance = torch.sum(torch.diagonal(cov)).item()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
-    v = ...
+    v = torch.ones(cov.shape[0], dtype=torch.float32)
+
     for i in range(args.iterations):
-        # TODO: In the power iteration algorithm, we compute
-        # 1. v = cov v
-        #    The matrix-vector multiplication can be computed as regular matrix multiplication
-        #    or using `torch.mv`.
-        # 2. s = l2_norm(v)
-        #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
-        # 3. v = v / s
-        pass
+         # TODO: In the power iteration algorithm, we compute
+         # 1. v = cov v
+         #    The matrix-vector multiplication can be computed as regular matrix multiplication
+         #    or using `torch.mv`.
+         # 2. s = l2_norm(v)
+         #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
+         # 3. v = v / s
+        v = cov @ v
+        s = torch.linalg.vector_norm(v)
+        v = v/s
 
     # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`.
     # We now compute the explained variance, which is the ratio of `s` and `total_variance`.
diff --git a/labs/01/run.ps1 b/labs/01/run.ps1
new file mode 100644
index 0000000..a68f5e8
--- /dev/null
+++ b/labs/01/run.ps1
@@ -0,0 +1 @@
+..\..\.venv\Scripts\python .\pca_first.keras.py
diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
new file mode 100644
index 0000000..75ddf37
--- /dev/null
+++ b/labs/01/test.ps1
@@ -0,0 +1,4 @@
+python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
+spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt
diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 7befc72..b708b63 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -8,6 +8,12 @@
 import keras
 import numpy as np
 import torch
+from collections import Counter
+
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
 
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
@@ -17,8 +23,8 @@
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 # If you add more arguments, ReCodEx will keep them with your default values.
-parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
-parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--batch_size", default=10, type=int, help="Batch size.")
+parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.")
 parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.")
 
 
@@ -49,7 +55,7 @@ def on_epoch_end(self, epoch, logs=None):
 
 def evaluate_model(
     model: keras.Model, seed: int = 42, episodes: int = 100, render: bool = False, report_per_episode: bool = False
-) -> float:
+    ) -> float:
     """Evaluate the given model on CartPole-v1 environment.
 
     Returns the average score achieved on the given number of episodes.
@@ -86,16 +92,10 @@ def evaluate_model(
 def main(args: argparse.Namespace) -> keras.Model | None:
     # Set the random seed and the number of threads.
     keras.utils.set_random_seed(args.seed)
-    if args.threads:
-        torch.set_num_threads(args.threads)
-        torch.set_num_interop_threads(args.threads)
+    torch.set_num_threads(args.threads)
+    torch.set_num_interop_threads(args.threads)
 
     if not args.evaluate:
-        if args.batch_size is ...:
-            raise ValueError("You must specify the batch size, either in the defaults or on the command line.")
-        if args.epochs is ...:
-            raise ValueError("You must specify the number of epochs, either in the defaults or on the command line.")
-
         # Create logdir name
         args.logdir = os.path.join("logs", "{}-{}-{}".format(
             os.path.basename(globals().get("__file__", "notebook")),
@@ -107,16 +107,37 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         data = np.loadtxt("gym_cartpole_data.txt")
         observations, labels = data[:, :-1], data[:, -1].astype(np.int32)
 
+
+
         # TODO: Create the model in the `model` variable. Note that
         # the model can perform any of:
         # - binary classification with 1 output and sigmoid activation;
         # - two-class classification with 2 outputs and softmax activation.
-        model = ...
+
+        # Convert the labels to one-hot encoding
+        labels = keras.ops.one_hot(labels, num_classes=2)
+
+        model = keras.Sequential(name="gym_model", layers=[
+            # Input layer
+            keras.layers.Input(shape=(observations.shape[1],)),
+            # Hidden layers
+            keras.layers.Dense(8, activation="tanh"),
+            # Output layer
+            keras.layers.Dense(2, activation="softmax"),  # 2 outputs because we have 2 actions in the cart pole problem
+        ])
+
+
+        model.summary()
 
         # TODO: Prepare the model for training using the `model.compile` method.
-        model.compile(...)
+        model.compile(
+            loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
+            optimizer=keras.optimizers.Adam(learning_rate=0.009),
+            metrics=["accuracy"],
+        )
 
         tb_callback = TorchTensorBoardCallback(args.logdir)
+        labels = keras.ops.one_hot(labels,num_classes=2)
         model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback])
 
         # Save the model, without the optimizer state.
diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py
index 6655133..116ae98 100644
--- a/labs/02/mnist_training.py
+++ b/labs/02/mnist_training.py
@@ -11,6 +11,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
@@ -107,8 +112,34 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     #   in `model.optimizer._learning_rate` if needed), so after training, the learning rate
     #   should be `args.learning_rate_final`.
 
+    optimizer = None
+    lr, momen, decay, final_lr, epochs = args.learning_rate, args.momentum, args.decay, args.learning_rate_final, args.epochs
+    if decay:
+        if not final_lr:
+            print("Please define a final learning rate!")
+        else:    
+            steps = mnist.train.size/args.batch_size*epochs
+            init_lr = args.learning_rate
+            if decay == "linear":
+                lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr)
+            elif decay == "exponential":
+                decay_rate = final_lr/init_lr
+                lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=decay_rate)
+            elif decay == "cosine":
+                alpha = final_lr/init_lr
+                lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=alpha)
+
+    if args.optimizer == 'SGD':
+        if momen:
+            optimizer = keras.optimizers.SGD(learning_rate=lr, momentum=momen, nesterov=True)
+        else:
+            optimizer = keras.optimizers.SGD(learning_rate=lr)
+    elif args.optimizer =="Adam":
+        optimizer = keras.optimizers.Adam(learning_rate=lr)
+              
+        
     model.compile(
-        optimizer=...,
+        optimizer=optimizer,
         loss=keras.losses.SparseCategoricalCrossentropy(),
         metrics=[keras.metrics.SparseCategoricalAccuracy("accuracy")],
     )
@@ -121,6 +152,10 @@ def main(args: argparse.Namespace) -> dict[str, float]:
         validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
         callbacks=[tb_callback],
     )
+    model.summary()
+
+    if decay:        
+        print("Next learning rate to be used:", model.optimizer.learning_rate.item())
 
     # Return development metrics for ReCodEx to validate.
     return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
diff --git a/labs/02/sgd_backpropagation.ps1 b/labs/02/sgd_backpropagation.ps1
new file mode 100644
index 0000000..f613710
--- /dev/null
+++ b/labs/02/sgd_backpropagation.ps1
@@ -0,0 +1,50 @@
+# Examples:
+# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=64 --hidden_layer=20 --learning_rate=0.1
+# Dev accuracy after epoch 1 is 93.30
+# Dev accuracy after epoch 2 is 94.38
+# Dev accuracy after epoch 3 is 95.16
+# Dev accuracy after epoch 4 is 95.50
+# Dev accuracy after epoch 5 is 95.96
+# Dev accuracy after epoch 6 is 96.04
+# Dev accuracy after epoch 7 is 95.82
+# Dev accuracy after epoch 8 is 95.92
+# Dev accuracy after epoch 9 is 95.96
+# Dev accuracy after epoch 10 is 96.16
+# Test accuracy after epoch 10 is 95.26
+
+# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=100 --hidden_layer=32 --learning_rate=0.2
+# Dev accuracy after epoch 1 is 93.64
+# Dev accuracy after epoch 2 is 94.80
+# Dev accuracy after epoch 3 is 95.56
+# Dev accuracy after epoch 4 is 95.98
+# Dev accuracy after epoch 5 is 96.24
+# Dev accuracy after epoch 6 is 96.74
+# Dev accuracy after epoch 7 is 96.52
+# Dev accuracy after epoch 8 is 96.54
+# Dev accuracy after epoch 9 is 97.04
+# Dev accuracy after epoch 10 is 97.02
+# Test accuracy after epoch 10 is 96.16
+
+# Tests:
+../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=64 --hidden_layer=20 --learning_rate=0.1
+# Expected
+# Dev accuracy after epoch 1 is 93.30
+# Dev accuracy after epoch 2 is 94.38
+# Test accuracy after epoch 2 is 93.15
+
+# Actual
+# Dev accuracy after epoch 1 is 92.98
+# Dev accuracy after epoch 2 is 93.98
+# Test accuracy after epoch 2 is 92.73
+
+
+../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=100 --hidden_layer=32 --learning_rate=0.2
+# Expected:
+# Dev accuracy after epoch 1 is 93.64
+# Dev accuracy after epoch 2 is 94.80
+# Test accuracy after epoch 2 is 93.54
+
+# Actual:
+# Dev accuracy after epoch 1 is 94.16
+# Dev accuracy after epoch 2 is 94.98
+# Test accuracy after epoch 2 is 93.56
diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py
index cff312a..e3cfacf 100644
--- a/labs/02/sgd_backpropagation.py
+++ b/labs/02/sgd_backpropagation.py
@@ -3,7 +3,10 @@
 import datetime
 import os
 import re
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import numpy as np
@@ -12,15 +15,26 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
-parser.add_argument("--hidden_layer", default=100, type=int, help="Size of the hidden layer.")
+parser.add_argument(
+    "--hidden_layer", default=100, type=int, help="Size of the hidden layer."
+)
 parser.add_argument("--learning_rate", default=0.1, type=float, help="Learning rate.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
@@ -30,29 +44,57 @@ def __init__(self, args: argparse.Namespace) -> None:
         self._args = args
 
         self._W1 = keras.Variable(
-            keras.random.normal([MNIST.W * MNIST.H * MNIST.C, args.hidden_layer], stddev=0.1, seed=args.seed),
+            keras.random.normal(
+                [MNIST.W * MNIST.H * MNIST.C, args.hidden_layer],
+                stddev=0.1,
+                seed=args.seed,
+            ),
             trainable=True,
         )
         self._b1 = keras.Variable(keras.ops.zeros([args.hidden_layer]), trainable=True)
 
-        # TODO: Create variables:
+        # Create variables:
         # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`,
         #   initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`,
         # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros
-        ...
+        self._W2 = keras.Variable(
+            keras.random.normal(
+                [args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed
+            ),
+            trainable=True,
+        )
+
+        self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True)
 
     def predict(self, inputs: torch.Tensor) -> torch.Tensor:
-        # TODO: Define the computation of the network. Notably:
+        # Define the computation of the network. Notably:
         # - start by casting the input byte image to `float32` with `keras.ops.cast`
+
+        cast_inputs = keras.ops.cast(inputs, dtype="float32")
+
         # - then divide the tensor by 255 to normalize it to the `[0, 1]` range
+
+        normalized_inputs = cast_inputs / 255
+
         # - then reshape it to the shape `[inputs.shape[0], -1]`.
         #   The -1 is a wildcard which is computed so that the number
         #   of elements before and after the reshape is preserved.
+
+        reshaped_inputs = keras.ops.reshape(normalized_inputs, [inputs.shape[0], -1])
+
         # - then multiply it by `self._W1` and then add `self._b1`
         # - apply `keras.ops.tanh`
+
+        hidden_layer_output = keras.ops.tanh(
+            keras.ops.matmul(reshaped_inputs, self._W1) + self._b1
+        )
+
         # - multiply the result by `self._W2` and then add `self._b2`
+
+        hidden_layer_output = keras.ops.matmul(hidden_layer_output, self._W2) + self._b2
+
         # - finally apply `keras.ops.softmax` and return the result
-        return ...
+        return keras.ops.softmax(hidden_layer_output)
 
     def train_epoch(self, dataset: MNIST.Dataset) -> None:
         for batch in dataset.batches(self._args.batch_size):
@@ -62,49 +104,54 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             # Size of the batch is `self._args.batch_size`, except for the last, which
             # might be smaller.
 
-            # TODO: Compute the predicted probabilities of the batch images using `self.predict`
-            probabilities = ...
+            # Compute the predicted probabilities of the batch images using `self.predict`
+            probabilities = self.predict(batch["images"])
 
-            # TODO: Manually compute the loss:
+            # Manually compute the loss:
             # - For every batch example, the loss is the categorical crossentropy of the
             #   predicted probabilities and the gold label. To compute the crossentropy, you can
             #   - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels,
             #   - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities.
             # - Finally, compute the average across the batch examples.
-            loss = ...
-
+            loss = keras.ops.mean(
+                keras.ops.categorical_crossentropy(
+                    keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities
+                )
+            )
             # We create a list of all variables. Note that a `keras.Model/Layer` automatically
             # tracks owned variables, so we could also use `self.trainable_variables`
             # (or even `self.variables`, which is useful for loading/saving).
             variables = [self._W1, self._b1, self._W2, self._b2]
+            # print("w1, b1, w2, b2:", self._W1.shape, self._b1.shape, self._W2.shape, self._b2.shape)
 
-            # TODO: Compute the gradient of the loss with respect to variables using
+            # Compute the gradient of the loss with respect to variables using
             # backpropagation algorithm by
             # - first resetting the gradients of all variables to zero with `self.zero_grad()`,
             # - then calling `loss.backward()`.
-            ...
+            self.zero_grad()
+            loss.backward()
 
             gradients = [variable.value.grad for variable in variables]
+            # print("gradients:", gradients)
             with torch.no_grad():
                 for variable, gradient in zip(variables, gradients):
-                    # TODO: Perform the SGD update with learning rate `self._args.learning_rate`
+                    # Perform the SGD update with learning rate `self._args.learning_rate`
                     # for the variable and computed gradient. You can modify the
                     # variable value with `variable.assign` or in this case the more
                     # efficient `variable.assign_sub`.
-                    ...
+                    variable.assign_sub(self._args.learning_rate * gradient)
 
     def evaluate(self, dataset: MNIST.Dataset) -> float:
         # Compute the accuracy of the model prediction
         correct = 0
         for batch in dataset.batches(self._args.batch_size):
-            # TODO: Compute the probabilities of the batch images using `self.predict`
+            # Compute the probabilities of the batch images using `self.predict`
             # and convert them to Numpy with `keras.ops.convert_to_numpy`.
-            probabilities = ...
+            probabilities = keras.ops.convert_to_numpy(self.predict(batch["images"]))
 
-            # TODO: Evaluate how many batch examples were predicted
+            # Evaluate how many batch examples were predicted
             # correctly and increase `correct` variable accordingly.
-            correct += ...
-
+            correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"])
         return correct / dataset.size
 
 
@@ -116,11 +163,19 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
         torch.set_num_interop_threads(args.threads)
 
     # Create logdir name
-    args.logdir = os.path.join("logs", "{}-{}-{}".format(
-        os.path.basename(globals().get("__file__", "notebook")),
-        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
-        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
-    ))
+    args.logdir = os.path.join(
+        "logs",
+        "{}-{}-{}".format(
+            os.path.basename(globals().get("__file__", "notebook")),
+            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+            ",".join(
+                (
+                    "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
+                    for k, v in sorted(vars(args).items())
+                )
+            ),
+        ),
+    )
 
     # Load data
     mnist = MNIST()
@@ -132,16 +187,23 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     model = Model(args)
 
     for epoch in range(args.epochs):
-        # TODO: Run the `train_epoch` with `mnist.train` dataset
-
-        # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset
-        accuracy = ...
-        print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True)
+        # Run the `train_epoch` with `mnist.train` dataset
+        model.train_epoch(mnist.train)
+
+        # Evaluate the dev data using `evaluate` on `mnist.dev` dataset
+        accuracy = model.evaluate(mnist.dev)
+        print(
+            "Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy),
+            flush=True,
+        )
         writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1)
 
-    # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset
-    test_accuracy = ...
-    print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True)
+    # Evaluate the test data using `evaluate` on `mnist.test` dataset
+    test_accuracy = model.evaluate(mnist.test)
+    print(
+        "Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy),
+        flush=True,
+    )
     writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1)
 
     # Return dev and test accuracies for ReCodEx to validate.
diff --git a/labs/02/sgd_manual.py b/labs/02/sgd_manual.py
index 422d3e9..f023328 100644
--- a/labs/02/sgd_manual.py
+++ b/labs/02/sgd_manual.py
@@ -12,6 +12,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
@@ -39,7 +44,9 @@ def __init__(self, args: argparse.Namespace) -> None:
         # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`,
         #   initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`,
         # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros
-        ...
+        self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed),
+            trainable=True)
+        self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True)
 
     def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         # TODO(sgd_backpropagation): Define the computation of the network. Notably:
@@ -56,7 +63,14 @@ def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, tor
         # TODO: In order to support manual gradient computation, you should
         # return not only the output layer, but also the hidden layer after applying
         # tanh, and the input layer after reshaping.
-        return ..., ..., ...
+        input = keras.ops.cast(inputs, dtype="float32")
+        input = torch.div(input, 255)
+        input = input.reshape([input.shape[0], -1])
+        hidden_input = keras.ops.matmul(input,self._W1) + self._b1
+        hidden_output = keras.ops.tanh(hidden_input)
+        sm_input = keras.ops.matmul(hidden_output,self._W2) + self._b2
+        output = keras.ops.softmax(sm_input)
+        return input, hidden_output, output
 
     def train_epoch(self, dataset: MNIST.Dataset) -> None:
         for batch in dataset.batches(self._args.batch_size):
@@ -72,7 +86,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #
             # Compute the input layer, hidden layer and output layer
             # of the batch images using `self.predict`.
-
+            input_layer, hidden_layer, probabilities = self.predict(torch.tensor(batch['images']))
             # TODO: Compute the gradient of the loss with respect to all
             # variables. Note that the loss is computed as in `sgd_backpropagation`:
             # - For every batch example, the loss is the categorical crossentropy of the
@@ -80,7 +94,6 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels,
             #   - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities.
             # - Finally, compute the average across the batch examples.
-            #
             # During the gradient computation, you will need to compute
             # a batched version of a so-called outer product
             #   `C[a, i, j] = A[a, i] * B[a, j]`,
@@ -88,12 +101,30 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   `A[:, :, np.newaxis] * B[:, np.newaxis, :]`
             # or with
             #   `keras.ops.einsum("ai,aj->aij", A, B)`.
+            gold_labels = keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS)
+            loss = torch.mean(keras.ops.categorical_crossentropy(gold_labels, probabilities))
+            
+            gd_loss = probabilities - gold_labels
+            gd_b2 = gd_loss
+            #print("loss gradient, hidden_layer, input", gd_b2.shape, hidden_layer.shape, input_layer.shape)
+            gd_w2 = keras.ops.einsum("ai,aj->aij", hidden_layer, gd_loss)
+            gd_h = keras.ops.matmul(gd_loss, keras.ops.transpose(self._W2))
+            hidden_input = keras.ops.matmul(input_layer,self._W1) + self._b1
+            gd_h_i = gd_h*(1-keras.ops.power(keras.ops.tanh(hidden_input), 2))
+            gd_b1 = gd_h_i
+            gd_w1 = keras.ops.einsum("ai,aj->aij", input_layer, gd_h_i)
+            #print("gd_w2, gd_w1, gd_b2, gd_b1:", gd_w2.shape, gd_w1.shape, gd_b2.shape, gd_b1.shape)
 
             # TODO(sgd_backpropagation): Perform the SGD update with learning rate `self._args.learning_rate`
             # for the variable and computed gradient. You can modify the
             # variable value with `variable.assign` or in this case the more
             # efficient `variable.assign_sub`.
-            ...
+            variables = [self._W1, self._b1, self._W2, self._b2]
+            gradients = [gd_w1, gd_b1, gd_w2, gd_b2]
+            with torch.no_grad():
+                for variable, gradient in zip(variables, gradients):
+                    variable.assign_sub(self._args.learning_rate*keras.ops.mean(gradient, axis=0))
+
 
     def evaluate(self, dataset: MNIST.Dataset) -> float:
         # Compute the accuracy of the model prediction
@@ -101,11 +132,11 @@ def evaluate(self, dataset: MNIST.Dataset) -> float:
         for batch in dataset.batches(self._args.batch_size):
             # TODO: Compute the probabilities of the batch images using `self.predict`
             # and convert them to Numpy with `keras.ops.convert_to_numpy`.
-            probabilities = ...
+            probabilities = keras.ops.convert_to_numpy(self.predict(torch.tensor(batch['images']))[2])
 
             # TODO(sgd_backpropagation): Evaluate how many batch examples were predicted
             # correctly and increase `correct` variable accordingly.
-            correct += ...
+            correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"])
 
         return correct / dataset.size
 
@@ -135,14 +166,14 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
 
     for epoch in range(args.epochs):
         # TODO: Run the `train_epoch` with `mnist.train` dataset
-
+        model.train_epoch(mnist.train)
         # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset
-        accuracy = ...
+        accuracy = model.evaluate(mnist.dev)
         print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True)
         writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1)
 
     # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset
-    test_accuracy = ...
+    test_accuracy = model.evaluate(mnist.test)
     print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True)
     writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1)
 
diff --git a/labs/02/test.ps1 b/labs/02/test.ps1
new file mode 100644
index 0000000..fa38f74
--- /dev/null
+++ b/labs/02/test.ps1
@@ -0,0 +1 @@
+../../.venv/Scripts/python .\gym_cartpole.py  && ../../.venv/Scripts/python .\gym_cartpole.py --evaluate
diff --git a/labs/03/mnist_ensemble.ps1 b/labs/03/mnist_ensemble.ps1
new file mode 100644
index 0000000..526a6bd
--- /dev/null
+++ b/labs/03/mnist_ensemble.ps1
@@ -0,0 +1,2 @@
+python3 mnist_ensemble.py --epochs=1 --models=5
+python3 mnist_ensemble.py --epochs=1 --models=5 --hidden_layers=200
diff --git a/labs/03/mnist_ensemble.py b/labs/03/mnist_ensemble.py
index ebffcf9..93bb2eb 100644
--- a/labs/03/mnist_ensemble.py
+++ b/labs/03/mnist_ensemble.py
@@ -7,6 +7,7 @@
 import torch
 
 from mnist import MNIST
+import numpy as np
 
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
@@ -54,11 +55,13 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]:
         print("Done")
 
     individual_accuracies, ensemble_accuracies = [], []
+    model_predictions = []
     for model in range(args.models):
-        # TODO: Compute the accuracy on the dev set for the individual `models[model]`.
-        individual_accuracy = ...
+        # Compute the accuracy on the dev set for the individual `models[model]`.
+        individual_accuracy = models[model].evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1]
+        print(individual_accuracy)
 
-        # TODO: Compute the accuracy on the dev set for the ensemble `models[0:model+1]`.
+        # Compute the accuracy on the dev set for the ensemble `models[0:model+1]`.
         #
         # Generally you can choose one of the following approaches:
         # 1) Use Keras Functional API and construct a `keras.Model` averaging the models
@@ -69,7 +72,17 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]:
         #    need to construct Keras ensemble model at all, and instead call `model.predict`
         #    on the individual models and average the results. To measure accuracy,
         #    either do it completely manually or use `keras.metrics.SparseCategoricalAccuracy`.
-        ensemble_accuracy = ...
+        inputs = keras.Input(shape=(MNIST.W, MNIST.H, MNIST.C))
+        ensemble_output = keras.layers.Average()([model(inputs) for model in models[0:model+1]])
+        ensemble_model = keras.Model(inputs=inputs, outputs=ensemble_output)
+
+        ensemble_model.compile(
+            optimizer=keras.optimizers.Adam(),
+            loss=keras.losses.SparseCategoricalCrossentropy(),
+            metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+        )
+
+        ensemble_accuracy = ensemble_model.evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1]
 
         # Store the accuracies
         individual_accuracies.append(individual_accuracy)
diff --git a/labs/03/mnist_regularization.ps1 b/labs/03/mnist_regularization.ps1
new file mode 100644
index 0000000..2a61e88
--- /dev/null
+++ b/labs/03/mnist_regularization.ps1
@@ -0,0 +1,24 @@
+# Run script from root repo directory
+
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.3
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.5 --hidden_layers 300 300
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.1
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.3
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.1
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.3
+
+# Expected
+# accuracy: 0.5981 - loss: 1.2688 - val_accuracy: 0.9174 - val_loss: 0.3051
+# accuracy: 0.3429 - loss: 1.9163 - val_accuracy: 0.8826 - val_loss: 0.4937
+# accuracy: 0.7014 - loss: 1.0412 - val_accuracy: 0.9236 - val_loss: 0.2776
+# accuracy: 0.7006 - loss: 1.0429 - val_accuracy: 0.9232 - val_loss: 0.2801
+# accuracy: 0.7102 - loss: 1.3015 - val_accuracy: 0.9276 - val_loss: 0.7656
+# accuracy: 0.7113 - loss: 1.6854 - val_accuracy: 0.9332 - val_loss: 1.3709
+
+# Actual
+# accuracy: 0.6178 - loss: 1.2374 - val_accuracy: 0.9164 - val_loss: 0.3045
+# accuracy: 0.3412 - loss: 1.8919 - val_accuracy: 0.8818 - val_loss: 0.4794
+# accuracy: 0.6948 - loss: 1.0394 - val_accuracy: 0.9186 - val_loss: 0.2859
+# accuracy: 0.6947 - loss: 1.0410 - val_accuracy: 0.9184 - val_loss: 0.2885
+# accuracy: 0.6996 - loss: 1.3013 - val_accuracy: 0.9228 - val_loss: 0.7735
+# accuracy: 0.7102 - loss: 1.6879 - val_accuracy: 0.9284 - val_loss: 1.3739
diff --git a/labs/03/mnist_regularization.py b/labs/03/mnist_regularization.py
index cd78fcf..0b2e5a2 100644
--- a/labs/03/mnist_regularization.py
+++ b/labs/03/mnist_regularization.py
@@ -3,7 +3,10 @@
 import datetime
 import os
 import re
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import torch
@@ -15,12 +18,20 @@
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--dropout", default=0, type=float, help="Dropout regularization.")
 parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.")
-parser.add_argument("--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes.")
+parser.add_argument(
+    "--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes."
+)
 parser.add_argument("--label_smoothing", default=0, type=float, help="Label smoothing.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
-parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay strength.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
+parser.add_argument(
+    "--weight_decay", default=0, type=float, help="Weight decay strength."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
@@ -32,7 +43,10 @@ def __init__(self, path):
     def writer(self, writer):
         if writer not in self._writers:
             import torch.utils.tensorboard
-            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer))
+
+            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(
+                os.path.join(self._path, writer)
+            )
         return self._writers[writer]
 
     def add_logs(self, writer, logs, step):
@@ -43,10 +57,24 @@ def add_logs(self, writer, logs, step):
 
     def on_epoch_end(self, epoch, logs=None):
         if logs:
-            if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer):
-                logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)}
-            self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1)
-            self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1)
+            if isinstance(
+                getattr(self.model, "optimizer", None), keras.optimizers.Optimizer
+            ):
+                logs = logs | {
+                    "learning_rate": keras.ops.convert_to_numpy(
+                        self.model.optimizer.learning_rate
+                    )
+                }
+            self.add_logs(
+                "train",
+                {k: v for k, v in logs.items() if not k.startswith("val_")},
+                epoch + 1,
+            )
+            self.add_logs(
+                "val",
+                {k[4:]: v for k, v in logs.items() if k.startswith("val_")},
+                epoch + 1,
+            )
 
 
 def main(args: argparse.Namespace) -> dict[str, float]:
@@ -57,16 +85,24 @@ def main(args: argparse.Namespace) -> dict[str, float]:
         torch.set_num_interop_threads(args.threads)
 
     # Create logdir name
-    args.logdir = os.path.join("logs", "{}-{}-{}".format(
-        os.path.basename(globals().get("__file__", "notebook")),
-        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
-        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
-    ))
+    args.logdir = os.path.join(
+        "logs",
+        "{}-{}-{}".format(
+            os.path.basename(globals().get("__file__", "notebook")),
+            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+            ",".join(
+                (
+                    "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
+                    for k, v in sorted(vars(args).items())
+                )
+            ),
+        ),
+    )
 
     # Load data
     mnist = MNIST(size={"train": 5_000})
 
-    # TODO: Incorporate dropout to the model below. Namely, add
+    # Incorporate dropout to the model below. Namely, add
     #   a `keras.layers.Dropout` layer with `args.dropout` rate after
     #   the `Flatten` layer and after each `Dense` hidden layer (but not after
     #   the output `Dense` layer).
@@ -74,11 +110,15 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model = keras.Sequential()
     model.add(keras.layers.Rescaling(1 / 255))
     model.add(keras.layers.Flatten())
+    model.add(keras.layers.Dropout(args.dropout))
+
     for hidden_layer in args.hidden_layers:
         model.add(keras.layers.Dense(hidden_layer, activation="relu"))
+        model.add(keras.layers.Dropout(rate=args.dropout))
+
     model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax"))
 
-    # TODO: Implement label smoothing with the given `args.label_smoothing` strength.
+    # Implement label smoothing with the given `args.label_smoothing` strength.
     # You need to change the `SparseCategorical{Crossentropy,Accuracy}` to
     # `Categorical{Crossentropy,Accuracy}`, because `label_smoothing` is supported
     # only by the `CategoricalCrossentropy`. That means you also need to modify
@@ -86,29 +126,52 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     # of the gold class to a full categorical distribution (you can use either NumPy,
     # or there is a helper method also in the `keras.utils` module).
 
-    # TODO: Create a `keras.optimizers.AdamW`, using the default learning
+    # Create a `keras.optimizers.AdamW`, using the default learning
     # rate and a weight decay of strength `args.weight_decay`. Then call the
     # `exclude_from_weight_decay` method to specify that all variables with "bias"
     # in their name should not be decayed.
-    optimizer = ...
-
-    model.compile(
-        optimizer=optimizer,
-        loss=keras.losses.SparseCategoricalCrossentropy(),
-        metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
-    )
+    optimizer = keras.optimizers.AdamW(weight_decay=args.weight_decay)
+    optimizer.exclude_from_weight_decay(var_names=["bias"])
+
+    s = args.label_smoothing != 0
+
+    if s:
+        model.compile(
+            optimizer=optimizer,
+            loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing),
+            metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")],
+        )
+    else:
+        model.compile(
+            optimizer=optimizer,
+            loss=keras.losses.SparseCategoricalCrossentropy(),
+            metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+        )
 
     tb_callback = TorchTensorBoardCallback(args.logdir)
 
     logs = model.fit(
-        mnist.train.data["images"], mnist.train.data["labels"],
-        batch_size=args.batch_size, epochs=args.epochs,
-        validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
+        mnist.train.data["images"],
+        keras.utils.to_categorical(
+            mnist.train.data["labels"], num_classes=mnist.LABELS
+        ) if s else mnist.train.data["labels"],
+        batch_size=args.batch_size,
+        epochs=args.epochs,
+        validation_data=(
+            mnist.dev.data["images"],
+            keras.utils.to_categorical(
+                mnist.dev.data["labels"], num_classes=mnist.LABELS
+            ) if s else mnist.dev.data["labels"],
+        ),
         callbacks=[tb_callback],
     )
 
     # Return development metrics for ReCodEx to validate.
-    return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
+    return {
+        metric: values[-1]
+        for metric, values in logs.history.items()
+        if metric.startswith("val_")
+    }
 
 
 if __name__ == "__main__":
diff --git a/labs/03/uppercase.py b/labs/03/uppercase.py
index c975e3f..c83d5c5 100644
--- a/labs/03/uppercase.py
+++ b/labs/03/uppercase.py
@@ -10,16 +10,16 @@
 
 from uppercase_data import UppercaseData
 
-# TODO: Set reasonable values for the hyperparameters, especially for
+# Set reasonable values for the hyperparameters, especially for
 # `alphabet_size`, `batch_size`, `epochs`, and `window`.
 # Also, you can set the number of threads to 0 to use all your CPU cores.
 parser = argparse.ArgumentParser()
-parser.add_argument("--alphabet_size", default=..., type=int, help="If given, use this many most frequent chars.")
-parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
-parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--alphabet_size", default=70, type=int, help="If given, use this many most frequent chars.")
+parser.add_argument("--batch_size", default=1024, type=int, help="Batch size.")
+parser.add_argument("--epochs", default=2, type=int, help="Number of epochs.")
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
-parser.add_argument("--window", default=..., type=int, help="Window size to use.")
+parser.add_argument("--threads", default=0, type=int, help="Maximum number of threads to use.")
+parser.add_argument("--window", default=4, type=int, help="Window size to use.")
 
 
 class TorchTensorBoardCallback(keras.callbacks.Callback):
@@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None:
     # Load data
     uppercase_data = UppercaseData(args.window, args.alphabet_size)
 
-    # TODO: Implement a suitable model, optionally including regularization, select
+    # Implement a suitable model, optionally including regularization, select
     # good hyperparameters and train the model.
     #
     # The inputs are _windows_ of fixed size (`args.window` characters on the left,
@@ -79,16 +79,34 @@ def main(args: argparse.Namespace) -> None:
     #   You can then flatten the one-hot encoded windows and follow with a dense layer.
     # - Alternatively, you can use `keras.layers.Embedding` (which is an efficient
     #   implementation of one-hot encoding followed by a Dense layer) and flatten afterwards.
-    model = ...
+    model = keras.Sequential([
+        keras.layers.InputLayer(shape=[2 * args.window + 1], dtype="int32"),
+        keras.layers.CategoryEncoding(len(uppercase_data.train.alphabet)),
+        keras.layers.Embedding(len(uppercase_data.train.alphabet), 8),
+
+        keras.layers.Flatten(),
+        keras.layers.Dense(64, activation='relu'),
+        keras.layers.Dropout(rate=0.5),
+        keras.layers.Dense(1, activation='sigmoid') # Sigmoid activation function for binary classification
+    ])
+
+    # Generate correctly capitalized test set.
+
+    predictions = model.predict(uppercase_data.test.data, batch_size=args.batch_size)
 
-    # TODO: Generate correctly capitalized test set.
     # Use `uppercase_data.test.text` as input, capitalize suitable characters,
     # and write the result to predictions_file (which is
     # `uppercase_test.txt` in the `args.logdir` directory).
     os.makedirs(args.logdir, exist_ok=True)
     with open(os.path.join(args.logdir, "uppercase_test.txt"), "w", encoding="utf-8") as predictions_file:
-        ...
-
+        new_text = ""
+        for pred, word in zip(predictions, uppercase_data.test.text):
+            if pred > .5:
+                new_word = word.upper()
+                new_text += new_word
+            else:
+                new_text
+        predictions_file.write(new_text)
 
 if __name__ == "__main__":
     args = parser.parse_args([] if "__file__" not in globals() else None)
diff --git a/labs/04/cifar_competition.ps1 b/labs/04/cifar_competition.ps1
new file mode 100644
index 0000000..0d919fe
--- /dev/null
+++ b/labs/04/cifar_competition.ps1
@@ -0,0 +1 @@
+clear && python .\cifar_competition.py
diff --git a/labs/04/cifar_competition.py b/labs/04/cifar_competition.py
index 0541de8..be29019 100644
--- a/labs/04/cifar_competition.py
+++ b/labs/04/cifar_competition.py
@@ -3,7 +3,10 @@
 import datetime
 import os
 import re
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import numpy as np
@@ -11,13 +14,23 @@
 
 from cifar10 import CIFAR10
 
-# TODO: Define reasonable defaults and optionally more parameters.
+# Define reasonable defaults and optionally more parameters.
 # Also, you can set the number of threads to 0 to use all your CPU cores.
 parser = argparse.ArgumentParser()
-parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
-parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--batch_size", default=128, type=int, help="Batch size.")
+parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.")
+# parser.add_argument("--epochs", default=200, type=int, help="Number of epochs.")
+parser.add_argument("--learning_rate", default=0.001, help="Initial learning rate")
+parser.add_argument(
+    "--weight_decay", default=1e-4, type=float, help="L2 regularization weight decay."
+)
+parser.add_argument(
+    "--label_smoothing", default=0.1, type=float, help="Label smoothing."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 
 
 class TorchTensorBoardCallback(keras.callbacks.Callback):
@@ -28,7 +41,10 @@ def __init__(self, path):
     def writer(self, writer):
         if writer not in self._writers:
             import torch.utils.tensorboard
-            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer))
+
+            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(
+                os.path.join(self._path, writer)
+            )
         return self._writers[writer]
 
     def add_logs(self, writer, logs, step):
@@ -39,13 +55,51 @@ def add_logs(self, writer, logs, step):
 
     def on_epoch_end(self, epoch, logs=None):
         if logs:
-            if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer):
-                logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)}
-            self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1)
-            self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1)
-
+            if isinstance(
+                getattr(self.model, "optimizer", None), keras.optimizers.Optimizer
+            ):
+                logs = logs | {
+                    "learning_rate": keras.ops.convert_to_numpy(
+                        self.model.optimizer.learning_rate
+                    )
+                }
+            self.add_logs(
+                "train",
+                {k: v for k, v in logs.items() if not k.startswith("val_")},
+                epoch + 1,
+            )
+            self.add_logs(
+                "val",
+                {k[4:]: v for k, v in logs.items() if k.startswith("val_")},
+                epoch + 1,
+            )
+
+def create_res(input_layer, filters, kernel_size, strides):
+    h = keras.layers.Conv2D(
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding="same",
+        activation=None,
+    )(input_layer)
+
+    h = keras.layers.BatchNormalization()(h)
+    h = keras.layers.Activation("relu")(h)
+    h = keras.layers.Conv2D(
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=1,
+        padding="same",
+        activation=None,
+        use_bias=False,
+    )(h)
+    h = keras.layers.BatchNormalization()(h)
+    h = keras.layers.Add()([input_layer, h])
+    h = keras.layers.Activation("relu")(h)
+    return h
 
 def main(args: argparse.Namespace) -> None:
+
     # Set the random seed and the number of threads.
     keras.utils.set_random_seed(args.seed)
     if args.threads:
@@ -53,23 +107,75 @@ def main(args: argparse.Namespace) -> None:
         torch.set_num_interop_threads(args.threads)
 
     # Create logdir name
-    args.logdir = os.path.join("logs", "{}-{}-{}".format(
-        os.path.basename(globals().get("__file__", "notebook")),
-        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
-        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
-    ))
+    args.logdir = os.path.join(
+        "logs",
+        "{}-{}-{}".format(
+            os.path.basename(globals().get("__file__", "notebook")),
+            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+            ",".join(
+                (
+                    "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
+                    for k, v in sorted(vars(args).items())
+                )
+            ),
+        ),
+    )
 
     # Load data
     cifar = CIFAR10()
 
-    # TODO: Create the model and train it
-    model = ...
+    # Create the model and train it
+    inputs = keras.Input(shape=cifar.train.data["images"][0].shape)
+    h = keras.layers.Rescaling(1 / 255)(inputs)
+    h = keras.layers.Conv2D(64, 3, 1, "same", activation="relu")(h)
+    h = create_res(h, 64, 3, 1)
+    h = keras.layers.MaxPool2D(2)(h)
+    h = create_res(h, 64, 3, 1)
+    h = keras.layers.MaxPool2D(2)(h)
+    h = keras.layers.Dropout(0.2)(h)
+    h = create_res(h, 64, 3, 1)
+    h = keras.layers.Flatten()(h)
+    h = keras.layers.Dropout(0.2)(h)
+    h = keras.layers.Dense(200, activation="relu")(h)
+    outputs = keras.layers.Dense(len(CIFAR10.LABELS), activation="softmax")(h)
+
+    model = keras.Model(inputs=inputs, outputs=outputs)
+
+    model.summary()
+
+
+    lr_optimizer = keras.optimizers.schedules.CosineDecay(
+        initial_learning_rate=args.learning_rate,
+        decay_steps=len(cifar.train.data["images"] / args.batch_size * args.epochs)
+    )
+
+    model.compile(
+        optimizer=keras.optimizers.Adam(
+            learning_rate=lr_optimizer,
+            weight_decay=args.weight_decay),
+        loss=keras.losses.SparseCategoricalCrossentropy(),
+        metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+    )
+
+    model.fit(
+        cifar.train.data["images"],
+        cifar.train.data["labels"],
+        batch_size=args.batch_size,
+        epochs=args.epochs,
+
+    )
+
+    model.save(os.path.join(args.logdir, "cifar.h5"), include_optimizer=False)
 
     # Generate test set annotations, but in `args.logdir` to allow parallel execution.
     os.makedirs(args.logdir, exist_ok=True)
-    with open(os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8") as predictions_file:
-        # TODO: Perform the prediction on the test data.
-        for probs in model.predict(...):
+    with open(
+        os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8"
+    ) as predictions_file:
+        # Perform the prediction on the test data.
+        for probs in model.predict(
+            cifar.test.data["images"], batch_size=args.batch_size
+        ):
             print(np.argmax(probs), file=predictions_file)
 
 
diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt
new file mode 100644
index 0000000..63271eb
--- /dev/null
+++ b/labs/04/mnist_cnn results.txt	
@@ -0,0 +1,29 @@
+👉 TEST 1
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432
+
+👉 TEST 2
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606
+
+👉 TEST 3
+python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894
+
+👉 TEST 4
+python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079
+
+👉 TEST 5
+python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 17ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537
+
+👉 TEST 6
+python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.7476 - loss: 0.7841 - val_accuracy: 0.9370 - val_loss: 0.2037
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734
diff --git a/labs/04/mnist_cnn.ps1 b/labs/04/mnist_cnn.ps1
new file mode 100644
index 0000000..bf78797
--- /dev/null
+++ b/labs/04/mnist_cnn.ps1
@@ -0,0 +1,30 @@
+""
+"👉 TEST 1"
+"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100"
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432"
+""
+"👉 TEST 2"
+"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5"
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606"
+""
+"👉 TEST 3"
+"python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50"
+python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894"
+""
+"👉 TEST 4"
+"python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50"
+python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079"
+""
+"👉 TEST 5"
+"python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32"
+python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537"
+""
+"👉 TEST 6"
+"python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50"
+python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734"
diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py
index a3a91cd..b3c5727 100644
--- a/labs/04/mnist_cnn.py
+++ b/labs/04/mnist_cnn.py
@@ -1,9 +1,14 @@
 #!/usr/bin/env python3
 import argparse
 import os
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+import re
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
+from keras.layers import add
 import torch
 
 from mnist import MNIST
@@ -11,42 +16,115 @@
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
-parser.add_argument("--cnn", default=None, type=str, help="CNN architecture.")
+parser.add_argument(
+    "--cnn",
+    default="CB-16-5-2-same,M-3-2,F,H-100,D-0.5",
+    type=str,
+    help="CNN architecture.",
+)
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
+def create_layer(layer_type, layer_args, hidden):
+    if layer_type == "C":
+        filters, kernel_size, stride, padding = layer_args
+        hidden = keras.layers.Conv2D(
+            filters=int(filters),
+            kernel_size=(int(kernel_size)),
+            strides=(int(stride)),
+            padding=padding,
+            activation="relu",
+        )(hidden)
+
+        return hidden
+
+    # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
+    #   In detail, start with a convolutional layer **without bias** and activation,
+    #   then add a batch normalization layer, and finally the ReLU activation.
+    if layer_type == "CB":
+        filters, kernel_size, stride, padding = layer_args
+        hidden = keras.layers.Conv2D(
+            filters=int(filters),
+            kernel_size=(int(kernel_size)),
+            strides=(int(stride)),
+            padding=padding,
+            use_bias=False,
+        )(hidden)
+        hidden = keras.layers.BatchNormalization()(hidden)
+        hidden = keras.layers.ReLU()(hidden)
+        return hidden
+
+    # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
+    #   the default "valid" padding.
+    if layer_type == "M":
+        pool_size, stride = layer_args
+        hidden = keras.layers.MaxPooling2D(
+            pool_size=int(pool_size),
+            strides=(int(stride)),
+        )(hidden)
+        return hidden
+
+    # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
+    #   of at least one convolutional layer (but not a recursive residual connection `R`).
+    #   The input to the `R` layer should be processed sequentially by `layers`, and the
+    #   produced output (after the ReLU nonlinearity of the last layer) should be added
+    #   to the input (of this `R` layer).
+    if layer_type == "R":
+        input_layer = hidden
+        layers = "-".join(layer_args)[1:-1].split(",")
+
+        for layer in layers:
+            layer_type, *layer_args = layer.split("-")
+
+            hidden = create_layer(layer_type, layer_args, hidden)
+
+        hidden = keras.layers.Add()([input_layer, hidden])
+
+        return hidden
+
+    # - `F`: Flatten inputs. Must appear exactly once in the architecture.
+    if layer_type == "F":
+        hidden = keras.layers.Flatten()(hidden)
+        return hidden
+
+    # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size.
+    if layer_type == "H":
+        hidden_layer_size,  = layer_args
+        hidden = keras.layers.Dense(units=int(hidden_layer_size), activation="relu")(hidden)
+        return hidden
+
+    # - `D-dropout_rate`: Apply dropout with the given dropout rate.
+    if layer_type == "D":
+        dropout_rate, = layer_args
+        hidden = keras.layers.Dropout(rate=float(dropout_rate))(hidden)
+        return hidden
+
+
 class Model(keras.Model):
     def __init__(self, args: argparse.Namespace) -> None:
-        # TODO: Create the model. The template uses the functional API, but
+        # Create the model. The template uses the functional API, but
         # feel free to use subclassing if you want.
         inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C])
         hidden = keras.layers.Rescaling(1 / 255)(inputs)
 
-        # TODO: Add CNN layers specified by `args.cnn`, which contains
-        # a comma-separated list of the following layers:
-        # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU
-        #   activation and specified number of filters, kernel size, stride and padding.
-        # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
-        #   In detail, start with a convolutional layer **without bias** and activation,
-        #   then add a batch normalization layer, and finally the ReLU activation.
-        # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
-        #   the default "valid" padding.
-        # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
-        #   of at least one convolutional layer (but not a recursive residual connection `R`).
-        #   The input to the `R` layer should be processed sequentially by `layers`, and the
-        #   produced output (after the ReLU nonlinearity of the last layer) should be added
-        #   to the input (of this `R` layer).
-        # - `F`: Flatten inputs. Must appear exactly once in the architecture.
-        # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size.
-        # - `D-dropout_rate`: Apply dropout with the given dropout rate.
+        cnn_args = re.split(r",(?![^\[]*\])", args.cnn)
+
+        for layer in cnn_args:
+            layer_type, *layer_args = layer.split("-")
+
+            hidden = create_layer(layer_type, layer_args, hidden)
+
         # You can assume the resulting network is valid; it is fine to crash if it is not.
         #
         # Produce the results in the variable `hidden`.
-        hidden = ...
 
         # Add the final output layer
         outputs = keras.layers.Dense(MNIST.LABELS, activation="softmax")(hidden)
@@ -73,13 +151,19 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model = Model(args)
 
     logs = model.fit(
-        mnist.train.data["images"], mnist.train.data["labels"],
-        batch_size=args.batch_size, epochs=args.epochs,
+        mnist.train.data["images"],
+        mnist.train.data["labels"],
+        batch_size=args.batch_size,
+        epochs=args.epochs,
         validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
     )
 
     # Return development metrics for ReCodEx to validate.
-    return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
+    return {
+        metric: values[-1]
+        for metric, values in logs.history.items()
+        if metric.startswith("val_")
+    }
 
 
 if __name__ == "__main__":
diff --git a/labs/04/mnist_multiple.ps1 b/labs/04/mnist_multiple.ps1
new file mode 100644
index 0000000..3416b36
--- /dev/null
+++ b/labs/04/mnist_multiple.ps1
@@ -0,0 +1,11 @@
+""
+"👉 TEST 1"
+"python3 mnist_multiple.py --epochs=1 --batch_size=50"
+python3 mnist_multiple.py --epochs=1 --batch_size=50
+"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984"
+""
+"👉 TEST 2"
+"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5"
+python3 mnist_multiple.py --epochs=1 --batch_size=100
+"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157"
+""
diff --git a/labs/04/mnist_multiple.py b/labs/04/mnist_multiple.py
index 06b9d9e..def13ab 100644
--- a/labs/04/mnist_multiple.py
+++ b/labs/04/mnist_multiple.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import argparse
 import os
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import numpy as np
 import keras
@@ -13,9 +16,13 @@
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=5, type=int, help="Number of epochs.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
@@ -27,7 +34,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]),
         )
 
-        # TODO: The model starts by passing each input image through the same
+        # The model starts by passing each input image through the same
         # subnetwork (with shared weights), which should perform
         # - keras.layers.Rescaling(1 / 255) to convert images to floats in [0, 1] range,
         # - convolution with 10 filters, 3x3 kernel size, stride 2, "valid" padding, ReLU activation,
@@ -36,24 +43,49 @@ def __init__(self, args: argparse.Namespace) -> None:
         # - fully connected layer with 200 neurons and ReLU activation,
         # obtaining a 200-dimensional feature vector FV of each image.
 
-        # TODO: Using the computed representations, the model should produce four outputs:
+        rescale = keras.layers.Rescaling(1 / 255)
+        c1 = keras.layers.Conv2D(
+            filters=10, kernel_size=3, strides=2, padding="valid", activation="relu"
+        )
+        c2 = keras.layers.Conv2D(
+            filters=20, kernel_size=3, strides=2, padding="valid", activation="relu"
+        )
+        flat = keras.layers.Flatten()
+        hidden = keras.layers.Dense(200, activation="relu")
+
+        fv1 = hidden(flat(c2(c1(rescale(images[0])))))
+        fv2 = hidden(flat(c2(c1(rescale(images[1])))))
+
+        # Using the computed representations, the model should produce four outputs:
         # - first, compute _direct comparison_ whether the first digit is
         #   greater than the second, by
         #   - concatenating the two 200-dimensional image representations FV,
         #   - processing them using another 200-neuron ReLU dense layer
         #   - computing one output using a dense layer with "sigmoid" activation
+        concatenation = keras.layers.Concatenate()([fv1, fv2])
+        hidden2 = keras.layers.Dense(200, activation="relu")
+        pred_layer = keras.layers.Dense(1, activation="sigmoid")
+        direct_comparison = pred_layer(hidden2(concatenation))
         # - then, classify the computed representation FV of the first image using
         #   a densely connected softmax layer into 10 classes;
         # - then, classify the computed representation FV of the second image using
         #   the same layer (identical, i.e., with shared weights) into 10 classes;
+        classification_layer = keras.layers.Dense(10, activation="softmax")
+        d1 = classification_layer(fv1)
+        d2 = classification_layer(fv2)
         # - finally, compute _indirect comparison_ whether the first digit
         #   is greater than second, by comparing the predictions from the above
         #   two outputs; convert the comparison to "float32" using `keras.ops.cast`.
         outputs = {
-            "direct_comparison": ...,
-            "digit_1": ...,
-            "digit_2": ...,
-            "indirect_comparison": ...,
+            "direct_comparison": direct_comparison,
+            "digit_1": d1,
+            "digit_2": d2,
+            "indirect_comparison": keras.ops.cast(
+                keras.ops.greater(
+                    keras.ops.argmax(d1, axis=1), keras.ops.argmax(d2, axis=1)
+                ),
+                "float32",
+            ),
         }
 
         # Finally, construct the model.
@@ -65,7 +97,7 @@ def __init__(self, args: argparse.Namespace) -> None:
         # the keys of the `outputs` dictionary.
         self.output_names = sorted(outputs.keys())
 
-        # TODO: Define the appropriate losses for the model outputs
+        # Define the appropriate losses for the model outputs
         # "direct_comparison", "digit_1", "digit_2". Regarding metrics,
         # the accuracy of both the direct and indirect comparisons should be
         # computed; name both metrics "accuracy" (i.e., pass "accuracy" as the
@@ -73,19 +105,25 @@ def __init__(self, args: argparse.Namespace) -> None:
         self.compile(
             optimizer=keras.optimizers.Adam(),
             loss={
-                "direct_comparison": ...,
-                "digit_1": ...,
-                "digit_2": ...,
+                "direct_comparison": keras.losses.BinaryCrossentropy(),
+                "digit_1": keras.losses.SparseCategoricalCrossentropy(),
+                "digit_2": keras.losses.SparseCategoricalCrossentropy(),
             },
             metrics={
-                "direct_comparison": [...],
-                "indirect_comparison": [...],
+                "direct_comparison": [
+                    keras.metrics.BinaryAccuracy(name="accuracy"),
+                ],
+                "indirect_comparison": [
+                    keras.metrics.BinaryAccuracy(name="accuracy"),
+                ],
             },
         )
 
     # Create an appropriate dataset using the MNIST data.
     def create_dataset(
-        self, mnist_dataset: MNIST.Dataset, args: argparse.Namespace,
+        self,
+        mnist_dataset: MNIST.Dataset,
+        args: argparse.Namespace,
     ) -> torch.utils.data.Dataset:
         # Original MNIST dataset.
         images, labels = mnist_dataset.data["images"], mnist_dataset.data["labels"]
@@ -94,16 +132,27 @@ def create_dataset(
         # You can assume that the size of the original dataset is even.
         class TorchDataset(torch.utils.data.Dataset):
             def __len__(self) -> int:
-                # TODO: The new dataset has half the size of the original one.
-                return ...
+                # The new dataset has half the size of the original one.
+                return len(images) // 2
 
-            def __getitem__(self, index: int) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]:
-                # TODO: Given an `index`, generate a dataset element suitable for our model.
+            def __getitem__(
+                self, index: int
+            ) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]:
+                # Given an `index`, generate a dataset element suitable for our model.
                 # Notably, the element should be a pair `(input, output)`, with
                 # - `input` being a pair of images `(images[2 * index], images[2 * index + 1])`,
                 # - `output` being a dictionary with keys "digit_1", "digit_2", "direct_comparison",
                 #   and "indirect_comparison".
-                return ...
+                return (
+                    (images[2 * index], images[2 * index + 1]),
+                    {
+                        "digit_1": labels[2 * index],
+                        "digit_2": labels[2 * index + 1],
+                        "direct_comparison": labels[2 * index] > labels[2 * index + 1],
+                        "indirect_comparison": labels[2 * index]
+                        > labels[2 * index + 1],
+                    },
+                )
 
         return TorchDataset()
 
@@ -122,14 +171,22 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model = Model(args)
 
     # Construct suitable dataloaders from the MNIST data.
-    train = torch.utils.data.DataLoader(model.create_dataset(mnist.train, args), args.batch_size, shuffle=True)
-    dev = torch.utils.data.DataLoader(model.create_dataset(mnist.dev, args), args.batch_size)
+    train = torch.utils.data.DataLoader(
+        model.create_dataset(mnist.train, args), args.batch_size, shuffle=True
+    )
+    dev = torch.utils.data.DataLoader(
+        model.create_dataset(mnist.dev, args), args.batch_size
+    )
 
     # Train
     logs = model.fit(train, epochs=args.epochs, validation_data=dev)
 
     # Return development metrics for ReCodEx to validate.
-    return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
+    return {
+        metric: values[-1]
+        for metric, values in logs.history.items()
+        if metric.startswith("val_")
+    }
 
 
 if __name__ == "__main__":
diff --git a/labs/04/torch_dataset.ps1 b/labs/04/torch_dataset.ps1
new file mode 100644
index 0000000..46fa378
--- /dev/null
+++ b/labs/04/torch_dataset.ps1
@@ -0,0 +1,11 @@
+# ""
+# "👉 TEST 1"
+# "python3 torch_dataset.py --epochs=1 --batch_size=100"
+# python3 torch_dataset.py --epochs=1 --batch_size=100
+# "50/50 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.1297 - loss: 2.2519 - val_accuracy: 0.2710 - val_loss: 1.9796"
+""
+"👉 TEST 2"
+"python3 torch_dataset.py --epochs=1 --batch_size=50 --augment"
+python3 torch_dataset.py --epochs=1 --batch_size=50 --augment
+"100/100 ━━━━━━━━━━━━━━━━━━━━ 4s 34ms/step - accuracy: 0.1354 - loss: 2.2565 - val_accuracy: 0.2690 - val_loss: 1.9889"
+""
diff --git a/labs/04/torch_dataset.py b/labs/04/torch_dataset.py
index 5e0c330..f689e54 100644
--- a/labs/04/torch_dataset.py
+++ b/labs/04/torch_dataset.py
@@ -53,54 +53,67 @@ def main(args: argparse.Namespace) -> dict[str, float]:
         metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
     )
 
-    # TODO: Create a Torch dataset constructible from the given `CIFAR10.Dataset`.
+    # Create a Torch dataset constructible from the given `CIFAR10.Dataset`.
     # You should use only the first `size` examples of the dataset, and optional
     # augmentation function `augmentation_fn` may be applied to the images.
     class TorchDataset(torch.utils.data.Dataset):
+        images: np.ndarray
+        labels: np.ndarray
+        augmentation_fn: callable
+
         def __init__(self, cifar: CIFAR10.Dataset, size: int, augmentation_fn=None) -> None:
-            # TODO: Note that the images and labels are available in `cifar.data["images"]`
+            # Note that the images and labels are available in `cifar.data["images"]`
             # and `cifar.data["labels"]`.
-            ...
+            self.images = cifar.data["images"][:size]
+            self.labels = cifar.data["labels"][:size]
+            self.augmentation_fn = augmentation_fn
 
         def __len__(self) -> int:
-            # TODO: Return the appropriate size.
-            ...
+            # Return the appropriate size.
+            size = len(self.images)
+            return size
+
 
         def __getitem__(self, index: int) -> tuple[np.ndarray | torch.Tensor, int]:
-            # TODO: Return the `index`-th example from the dataset, with the image optionally
+            # Return the `index`-th example from the dataset, with the image optionally
             # passed through the `augmentation_fn` if it is not `None`.
-            ...
+            return self.augmentation_fn(self.images[index]) if self.augmentation_fn else self.images[index], self.labels[index]
 
     if args.augment:
         # Construct a sequence of augmentation transformations from `torchvision.transforms.v2`.
         transformation = v2.Compose([
-            # TODO: Add the following transformations:
+            # Add the following transformations:
             # - first create a `v2.RandomResize` that scales the image to
             #   random size in range [28, 36],
             # - then add `v2.Pad` that pads the image with 4 pixels on each side,
             # - then add `v2.RandomCrop` that chooses a random crop of size 32x32,
             # - and finally add `v2.RandomHorizontalFlip` that uniformly
             #   randomly flips the image horizontally.
-            ...
+            v2.RandomResize(28, 36),
+            v2.Pad(4),
+            v2.RandomCrop(32),
+            v2.RandomHorizontalFlip(),
         ])
 
         def augmentation_fn(image: np.ndarray) -> torch.Tensor:
-            # TODO: First, convert the numpy `images` to a PyTorch tensor of uint8s,
+            # First, convert the numpy `images` to a PyTorch tensor of uint8s,
             # preferably by using `torch.from_numpy` or `torch.as_tensor` to avoid copying.
             # Then, because of the channels-position mismatch, permute the axes
             # in the image to change the order of the axes from HWC to CHW.
             # Next, apply the `transformation` to the image (by calling it with
             # the image as an argument), and finally permute the axes back to
             # the original order.
-            return ...
+
+            return transformation(torch.as_tensor(image).permute(2, 0, 1)).permute(1, 2, 0)
+
     else:
         augmentation_fn = None
 
-    # TODO: Create `train` and `dev` instances of `TorchDataset` from the corresponding
+    # Create `train` and `dev` instances of `TorchDataset` from the corresponding
     # `cifar` datasets. Limit their sizes to 5_000 and 1_000 examples, respectively,
     # and use the `augmentation_fn` for the training dataset.
-    train = ...
-    dev = ...
+    train = TorchDataset(cifar.train, 5_000, augmentation_fn)
+    dev = TorchDataset(cifar.dev, 1_000)
 
     if args.show_images:
         from torch.utils import tensorboard
@@ -114,10 +127,10 @@ def augmentation_fn(image: np.ndarray) -> torch.Tensor:
         tb_writer.close()
         print("Saved first {} training imaged to logs/{}".format(GRID * GRID, TAG))
 
-    # TODO: Create `train` and `dev` instances of `torch.utils.data.DataLoader` from
+    # Create `train` and `dev` instances of `torch.utils.data.DataLoader` from
     # the datasets, using the given `args.batch_size` and shuffling the training dataset.
-    train = ...
-    dev = ...
+    train = torch.utils.data.DataLoader(train, args.batch_size, shuffle=True)
+    dev = torch.utils.data.DataLoader(dev, args.batch_size)
 
     # Train
     logs = model.fit(train, epochs=args.epochs, validation_data=dev)
diff --git a/labs/team_description.py b/labs/team_description.py
index 14ed5e1..1d232bc 100644
--- a/labs/team_description.py
+++ b/labs/team_description.py
@@ -6,4 +6,7 @@
 #
 # You can find out ReCodEx ID in the URL bar after navigating
 # to your User profile page. The ID has the following format:
-# 01234567-89ab-cdef-0123-456789abcdef.
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
diff --git a/pull.ps1 b/pull.ps1
new file mode 100644
index 0000000..9cadfe4
--- /dev/null
+++ b/pull.ps1
@@ -0,0 +1 @@
+git pull upstream master
diff --git a/setup.ps1 b/setup.ps1
new file mode 100644
index 0000000..f1f7bbe
--- /dev/null
+++ b/setup.ps1
@@ -0,0 +1,6 @@
+git remote rename origin upstream
+git remote add origin git@github.com:joglr/npfl138.git
+git fetch
+git checkout master
+python -m venv .venv
+.venv/Scripts/pip install -r .\labs\requirements.txt