diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..917c1db --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +**/.venv/ +logs/ +mnist.npz +*.zip diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg new file mode 100644 index 0000000..e129fd0 --- /dev/null +++ b/.venv/pyvenv.cfg @@ -0,0 +1,3 @@ +home = C:\Python310 +include-system-site-packages = false +version = 3.10.7 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..dc3f727 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.typeCheckingMode": "basic" +} diff --git a/labs/01/expected.txt b/labs/01/expected.txt new file mode 100644 index 0000000..fdaf786 --- /dev/null +++ b/labs/01/expected.txt @@ -0,0 +1,39 @@ +python3 mnist_layers_activations.py --hidden_layers=0 --activation=none +Epoch 1/10 accuracy: 0.7801 - loss: 0.8405 - val_accuracy: 0.9300 - val_loss: 0.2716 +Epoch 5/10 accuracy: 0.9222 - loss: 0.2792 - val_accuracy: 0.9406 - val_loss: 0.2203 +Epoch 10/10 accuracy: 0.9304 - loss: 0.2515 - val_accuracy: 0.9432 - val_loss: 0.2159 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=none +Epoch 1/10 accuracy: 0.8483 - loss: 0.5230 - val_accuracy: 0.9352 - val_loss: 0.2422 +Epoch 5/10 accuracy: 0.9236 - loss: 0.2758 - val_accuracy: 0.9360 - val_loss: 0.2325 +Epoch 10/10 accuracy: 0.9298 - loss: 0.2517 - val_accuracy: 0.9354 - val_loss: 0.2439 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu +Epoch 1/10 accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432 +Epoch 5/10 accuracy: 0.9824 - loss: 0.0613 - val_accuracy: 0.9808 - val_loss: 0.0740 +Epoch 10/10 accuracy: 0.9948 - loss: 0.0202 - val_accuracy: 0.9788 - val_loss: 0.0821 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh +Epoch 1/10 accuracy: 0.8529 - loss: 0.5183 - val_accuracy: 0.9564 - val_loss: 0.1632 +Epoch 5/10 accuracy: 0.9800 - loss: 0.0728 - val_accuracy: 0.9740 - val_loss: 0.0853 +Epoch 10/10 accuracy: 0.9948 - loss: 0.0244 - val_accuracy: 0.9782 - val_loss: 0.0772 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid +Epoch 1/10 accuracy: 0.7851 - loss: 0.8650 - val_accuracy: 0.9414 - val_loss: 0.2196 +Epoch 5/10 accuracy: 0.9647 - loss: 0.1270 - val_accuracy: 0.9704 - val_loss: 0.1079 +Epoch 10/10 accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9756 - val_loss: 0.0837 + +python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu +Epoch 1/10 accuracy: 0.8497 - loss: 0.5011 - val_accuracy: 0.9664 - val_loss: 0.1225 +Epoch 5/10 accuracy: 0.9862 - loss: 0.0438 - val_accuracy: 0.9734 - val_loss: 0.1026 +Epoch 10/10 accuracy: 0.9932 - loss: 0.0202 - val_accuracy: 0.9818 - val_loss: 0.0865 + +python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu +Epoch 1/10 accuracy: 0.7710 - loss: 0.6793 - val_accuracy: 0.9570 - val_loss: 0.1479 +Epoch 5/10 accuracy: 0.9780 - loss: 0.0783 - val_accuracy: 0.9786 - val_loss: 0.0808 +Epoch 10/10 accuracy: 0.9869 - loss: 0.0481 - val_accuracy: 0.9724 - val_loss: 0.1163 + +python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid +Epoch 1/10 accuracy: 0.1072 - loss: 2.3068 - val_accuracy: 0.1784 - val_loss: 2.1247 +Epoch 5/10 accuracy: 0.8825 - loss: 0.4776 - val_accuracy: 0.9164 - val_loss: 0.3686 +Epoch 10/10 accuracy: 0.9294 - loss: 0.2994 - val_accuracy: 0.9386 - val_loss: 0.2671 diff --git a/labs/01/mnist.ps1 b/labs/01/mnist.ps1 new file mode 100644 index 0000000..a274269 --- /dev/null +++ b/labs/01/mnist.ps1 @@ -0,0 +1,24 @@ +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=0 --activation=none" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=0 --activation=none +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=none" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=none +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=relu +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=tanh +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=3 --activation=relu +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=relu +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid +# Write-Output "" diff --git a/labs/01/mnist_layers_activations.py b/labs/01/mnist_layers_activations.py index d58b796..bf78be2 100644 --- a/labs/01/mnist_layers_activations.py +++ b/labs/01/mnist_layers_activations.py @@ -10,6 +10,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--activation", default="none", choices=["none", "relu", "tanh", "sigmoid"], help="Activation.") @@ -68,7 +73,7 @@ def main(args: argparse.Namespace) -> dict[str, float]: # Create the model model = keras.Sequential() model.add(keras.Input([MNIST.H, MNIST.W, MNIST.C])) - # TODO: Finish the model. Namely: + # Finish the model. Namely: # - start by adding a `keras.layers.Rescaling(1 / 255)` layer; # - then add a `keras.layers.Flatten()` layer; # - add `args.hidden_layers` number of fully connected hidden layers @@ -76,6 +81,14 @@ def main(args: argparse.Namespace) -> dict[str, float]: # from `args.activation`, allowing "none", "relu", "tanh", "sigmoid"; # - finally, add an output fully connected layer with `MNIST.LABELS` units # and `softmax` activation. + model.add(keras.layers.Rescaling(1 / 255)) + model.add(keras.layers.Flatten()) + + for _ in range(args.hidden_layers): + activation = None if args.activation == "none" else args.activation + model.add(keras.layers.Dense(args.hidden_layer, activation=activation)) + + model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax")) model.compile( optimizer=keras.optimizers.Adam(), diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 8e86bff..819b6b0 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -1,4 +1,10 @@ #!/usr/bin/env python3 + +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + import argparse import numpy as np @@ -12,42 +18,51 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]: - # TODO: Load data distribution, each line containing a datapoint -- a string. - with open(args.data_path, "r") as data: + # Load data distribution, each line containing a datapoint -- a string. + data_map = {} + + # Load data distribution, each line containing a datapoint -- a string. + with open(args.data_path, "r", encoding="utf-8") as data: for line in data: line = line.rstrip("\n") - # TODO: Process the line, aggregating data with built-in Python + + # Process the line, aggregating data with built-in Python # data structures (not NumPy, which is not suitable for incremental # addition and string mapping). + if line in data_map: + data_map[line] += 1 + else: + data_map[line] = 1 - # TODO: Create a NumPy array containing the data distribution. The + # Create a NumPy array containing the data distribution. The # NumPy array should contain only data, not any mapping. Alternatively, # the NumPy array might be created after loading the model distribution. + data_dist = np.array(list(data_map.values())) / sum(data_map.values()) + + # Load model distribution, each line `string \t probability`. + model_map = {} - # TODO: Load model distribution, each line `string \t probability`. with open(args.model_path, "r") as model: for line in model: line = line.rstrip("\n") - # TODO: Process the line, aggregating using Python data structures. + key, value = line.split("\t") + model_map[key] = float(value) - # TODO: Create a NumPy array containing the model distribution. + # Create a NumPy array containing the model distribution. + model_dist = np.array([model_map[key] if key in model_map else np.inf for key in data_map.keys()]) - # TODO: Compute the entropy H(data distribution). You should not use - # manual for/while cycles, but instead use the fact that most NumPy methods - # operate on all elements (for example `*` is vector element-wise multiplication). - entropy = ... + # Compute the entropy H(data distribution). + entropy = -np.sum(data_dist * np.log(data_dist)) - # TODO: Compute cross-entropy H(data distribution, model distribution). - # When some data distribution elements are missing in the model distribution, - # return `np.inf`. - crossentropy = ... + # Compute cross-entropy H(data distribution, model distribution). + crossentropy = -np.sum(data_dist * np.log(model_dist)) - # TODO: Compute KL-divergence D_KL(data distribution, model_distribution), - # again using `np.inf` when needed. - kl_divergence = ... + # Compute KL-divergence D_KL(data distribution, model_distribution). + kl_divergence = crossentropy - entropy + # kl_divergence = np.where(np.isinf(kl_divergence), np.inf, kl_divergence) # Return the computed values for ReCodEx to validate. - return entropy, crossentropy, kl_divergence + return entropy, crossentropy if np.isfinite(crossentropy) else np.inf, kl_divergence if np.isfinite(kl_divergence) else np.inf if __name__ == "__main__": diff --git a/labs/01/output.txt b/labs/01/output.txt new file mode 100644 index 0000000..916c534 --- /dev/null +++ b/labs/01/output.txt @@ -0,0 +1,167 @@ +Epoch 1/10 +1100/1100 14s 12ms/step - accuracy: 0.7761 - loss: 0.8442 - val_accuracy: 0.9298 - val_loss: 0.2730 +Epoch 2/10 +1100/1100 12s 11ms/step - accuracy: 0.9057 - loss: 0.3428 - val_accuracy: 0.9336 - val_loss: 0.2418 +Epoch 3/10 +1100/1100 11s 10ms/step - accuracy: 0.9177 - loss: 0.2945 - val_accuracy: 0.9366 - val_loss: 0.2284 +Epoch 4/10 +1100/1100 12s 10ms/step - accuracy: 0.9193 - loss: 0.2839 - val_accuracy: 0.9384 - val_loss: 0.2267 +Epoch 5/10 +1100/1100 11s 10ms/step - accuracy: 0.9228 - loss: 0.2790 - val_accuracy: 0.9392 - val_loss: 0.2208 +Epoch 6/10 +1100/1100 12s 11ms/step - accuracy: 0.9244 - loss: 0.2713 - val_accuracy: 0.9440 - val_loss: 0.2162 +Epoch 7/10 +1100/1100 13s 12ms/step - accuracy: 0.9252 - loss: 0.2662 - val_accuracy: 0.9398 - val_loss: 0.2178 +Epoch 8/10 +1100/1100 14s 12ms/step - accuracy: 0.9269 - loss: 0.2626 - val_accuracy: 0.9398 - val_loss: 0.2169 +Epoch 9/10 +1100/1100 13s 12ms/step - accuracy: 0.9286 - loss: 0.2612 - val_accuracy: 0.9458 - val_loss: 0.2128 +Epoch 10/10 +1100/1100 13s 12ms/step - accuracy: 0.9307 - loss: 0.2515 - val_accuracy: 0.9438 - val_loss: 0.2161 + +Epoch 1/10 +1100/1100 15s 13ms/step - accuracy: 0.8422 - loss: 0.5383 - val_accuracy: 0.9346 - val_loss: 0.2400 +Epoch 2/10 +1100/1100 18s 17ms/step - accuracy: 0.9120 - loss: 0.3102 - val_accuracy: 0.9364 - val_loss: 0.2372 +Epoch 3/10 +1100/1100 16s 15ms/step - accuracy: 0.9233 - loss: 0.2774 - val_accuracy: 0.9352 - val_loss: 0.2342 +Epoch 4/10 +1100/1100 16s 14ms/step - accuracy: 0.9225 - loss: 0.2736 - val_accuracy: 0.9366 - val_loss: 0.2336 +Epoch 5/10 +1100/1100 15s 13ms/step - accuracy: 0.9233 - loss: 0.2760 - val_accuracy: 0.9344 - val_loss: 0.2331 +Epoch 6/10 +1100/1100 22s 20ms/step - accuracy: 0.9251 - loss: 0.2683 - val_accuracy: 0.9382 - val_loss: 0.2247 +Epoch 7/10 +1100/1100 15s 14ms/step - accuracy: 0.9261 - loss: 0.2658 - val_accuracy: 0.9356 - val_loss: 0.2367 +Epoch 8/10 +1100/1100 15s 14ms/step - accuracy: 0.9256 - loss: 0.2635 - val_accuracy: 0.9364 - val_loss: 0.2308 +Epoch 9/10 +1100/1100 15s 13ms/step - accuracy: 0.9253 - loss: 0.2625 - val_accuracy: 0.9386 - val_loss: 0.2277 +Epoch 10/10 +1100/1100 15s 13ms/step - accuracy: 0.9301 - loss: 0.2515 - val_accuracy: 0.9358 - val_loss: 0.2441 + +Epoch 1/10 +1100/1100 16s 13ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400 +Epoch 2/10 +1100/1100 15s 13ms/step - accuracy: 0.9517 - loss: 0.1637 - val_accuracy: 0.9682 - val_loss: 0.1153 +Epoch 3/10 +1100/1100 14s 13ms/step - accuracy: 0.9700 - loss: 0.1021 - val_accuracy: 0.9730 - val_loss: 0.0897 +Epoch 4/10 +1100/1100 13s 12ms/step - accuracy: 0.9774 - loss: 0.0757 - val_accuracy: 0.9754 - val_loss: 0.0835 +Epoch 5/10 +1100/1100 13s 12ms/step - accuracy: 0.9824 - loss: 0.0603 - val_accuracy: 0.9772 - val_loss: 0.0766 +Epoch 6/10 +1100/1100 14s 12ms/step - accuracy: 0.9855 - loss: 0.0486 - val_accuracy: 0.9762 - val_loss: 0.0850 +Epoch 7/10 +1100/1100 14s 13ms/step - accuracy: 0.9889 - loss: 0.0374 - val_accuracy: 0.9776 - val_loss: 0.0774 +Epoch 8/10 +1100/1100 13s 12ms/step - accuracy: 0.9901 - loss: 0.0318 - val_accuracy: 0.9786 - val_loss: 0.0765 +Epoch 9/10 +1100/1100 13s 12ms/step - accuracy: 0.9928 - loss: 0.0267 - val_accuracy: 0.9804 - val_loss: 0.0766 +Epoch 10/10 +1100/1100 14s 12ms/step - accuracy: 0.9944 - loss: 0.0208 - val_accuracy: 0.9792 - val_loss: 0.0801 + +Epoch 1/10 +1100/1100 14s 12ms/step - accuracy: 0.8468 - loss: 0.5308 - val_accuracy: 0.9594 - val_loss: 0.1591 +Epoch 2/10 +1100/1100 13s 12ms/step - accuracy: 0.9433 - loss: 0.1909 - val_accuracy: 0.9646 - val_loss: 0.1300 +Epoch 3/10 +1100/1100 13s 12ms/step - accuracy: 0.9658 - loss: 0.1235 - val_accuracy: 0.9726 - val_loss: 0.0973 +Epoch 4/10 +1100/1100 13s 12ms/step - accuracy: 0.9744 - loss: 0.0909 - val_accuracy: 0.9732 - val_loss: 0.0876 +Epoch 5/10 +1100/1100 13s 12ms/step - accuracy: 0.9798 - loss: 0.0747 - val_accuracy: 0.9788 - val_loss: 0.0770 +Epoch 6/10 +1100/1100 13s 12ms/step - accuracy: 0.9832 - loss: 0.0606 - val_accuracy: 0.9766 - val_loss: 0.0801 +Epoch 7/10 +1100/1100 13s 12ms/step - accuracy: 0.9881 - loss: 0.0460 - val_accuracy: 0.9792 - val_loss: 0.0714 +Epoch 8/10 +1100/1100 13s 12ms/step - accuracy: 0.9894 - loss: 0.0397 - val_accuracy: 0.9768 - val_loss: 0.0741 +Epoch 9/10 +1100/1100 13s 12ms/step - accuracy: 0.9923 - loss: 0.0312 - val_accuracy: 0.9796 - val_loss: 0.0709 +Epoch 10/10 +1100/1100 14s 12ms/step - accuracy: 0.9940 - loss: 0.0257 - val_accuracy: 0.9802 - val_loss: 0.0720 + +Epoch 1/10 +1100/1100 15s 13ms/step - accuracy: 0.8072 - loss: 0.8138 - val_accuracy: 0.9452 - val_loss: 0.2121 +Epoch 2/10 +1100/1100 15s 14ms/step - accuracy: 0.9241 - loss: 0.2602 - val_accuracy: 0.9570 - val_loss: 0.1663 +Epoch 3/10 +1100/1100 15s 14ms/step - accuracy: 0.9476 - loss: 0.1863 - val_accuracy: 0.9648 - val_loss: 0.1322 +Epoch 4/10 +1100/1100 14s 13ms/step - accuracy: 0.9583 - loss: 0.1490 - val_accuracy: 0.9670 - val_loss: 0.1168 +Epoch 5/10 +1100/1100 14s 13ms/step - accuracy: 0.9658 - loss: 0.1243 - val_accuracy: 0.9696 - val_loss: 0.1047 +Epoch 6/10 +1100/1100 14s 12ms/step - accuracy: 0.9706 - loss: 0.1065 - val_accuracy: 0.9718 - val_loss: 0.0975 +Epoch 7/10 +1100/1100 13s 12ms/step - accuracy: 0.9758 - loss: 0.0891 - val_accuracy: 0.9740 - val_loss: 0.0918 +Epoch 8/10 +1100/1100 13s 12ms/step - accuracy: 0.9779 - loss: 0.0792 - val_accuracy: 0.9758 - val_loss: 0.0885 +Epoch 9/10 +1100/1100 14s 13ms/step - accuracy: 0.9816 - loss: 0.0681 - val_accuracy: 0.9776 - val_loss: 0.0825 +Epoch 10/10 +1100/1100 14s 12ms/step - accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9766 - val_loss: 0.0831 + +Epoch 1/10 +1100/1100 16s 14ms/step - accuracy: 0.8483 - loss: 0.5002 - val_accuracy: 0.9650 - val_loss: 0.1189 +Epoch 2/10 +1100/1100 16s 14ms/step - accuracy: 0.9609 - loss: 0.1262 - val_accuracy: 0.9718 - val_loss: 0.0971 +Epoch 3/10 +1100/1100 16s 14ms/step - accuracy: 0.9759 - loss: 0.0783 - val_accuracy: 0.9772 - val_loss: 0.0690 +Epoch 4/10 +1100/1100 16s 14ms/step - accuracy: 0.9810 - loss: 0.0597 - val_accuracy: 0.9788 - val_loss: 0.0752 +Epoch 5/10 +1100/1100 15s 14ms/step - accuracy: 0.9855 - loss: 0.0468 - val_accuracy: 0.9748 - val_loss: 0.0817 +Epoch 6/10 +1100/1100 16s 14ms/step - accuracy: 0.9884 - loss: 0.0398 - val_accuracy: 0.9758 - val_loss: 0.0909 +Epoch 7/10 +1100/1100 15s 14ms/step - accuracy: 0.9898 - loss: 0.0318 - val_accuracy: 0.9724 - val_loss: 0.0998 +Epoch 8/10 +1100/1100 16s 14ms/step - accuracy: 0.9892 - loss: 0.0305 - val_accuracy: 0.9778 - val_loss: 0.0952 +Epoch 9/10 +1100/1100 16s 14ms/step - accuracy: 0.9914 - loss: 0.0267 - val_accuracy: 0.9756 - val_loss: 0.0878 +Epoch 10/10 +1100/1100 16s 15ms/step - accuracy: 0.9935 - loss: 0.0203 - val_accuracy: 0.9770 - val_loss: 0.0974 + +Epoch 1/10 +1100/1100 24s 21ms/step - accuracy: 0.7772 - loss: 0.6657 - val_accuracy: 0.9524 - val_loss: 0.1752 +Epoch 2/10 +1100/1100 24s 22ms/step - accuracy: 0.9525 - loss: 0.1705 - val_accuracy: 0.9682 - val_loss: 0.1261 +Epoch 3/10 +1100/1100 22s 20ms/step - accuracy: 0.9675 - loss: 0.1162 - val_accuracy: 0.9750 - val_loss: 0.0945 +Epoch 4/10 +1100/1100 22s 20ms/step - accuracy: 0.9735 - loss: 0.0929 - val_accuracy: 0.9720 - val_loss: 0.1018 +Epoch 5/10 +1100/1100 22s 20ms/step - accuracy: 0.9789 - loss: 0.0794 - val_accuracy: 0.9762 - val_loss: 0.0888 +Epoch 6/10 +1100/1100 22s 20ms/step - accuracy: 0.9806 - loss: 0.0729 - val_accuracy: 0.9760 - val_loss: 0.0961 +Epoch 7/10 +1100/1100 22s 20ms/step - accuracy: 0.9847 - loss: 0.0578 - val_accuracy: 0.9810 - val_loss: 0.0932 +Epoch 8/10 +1100/1100 22s 20ms/step - accuracy: 0.9824 - loss: 0.0643 - val_accuracy: 0.9786 - val_loss: 0.0854 +Epoch 9/10 +1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0487 - val_accuracy: 0.9764 - val_loss: 0.1054 +Epoch 10/10 +1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0493 - val_accuracy: 0.9780 - val_loss: 0.1108 + +Epoch 1/10 +1100/1100 23s 20ms/step - accuracy: 0.1052 - loss: 2.3130 - val_accuracy: 0.1808 - val_loss: 1.9383 +Epoch 2/10 +1100/1100 22s 20ms/step - accuracy: 0.2002 - loss: 1.9364 - val_accuracy: 0.2168 - val_loss: 1.8587 +Epoch 3/10 +1100/1100 23s 20ms/step - accuracy: 0.2161 - loss: 1.8392 - val_accuracy: 0.5588 - val_loss: 1.2106 +Epoch 4/10 +1100/1100 22s 20ms/step - accuracy: 0.5594 - loss: 1.1159 - val_accuracy: 0.8168 - val_loss: 0.7119 +Epoch 5/10 +1100/1100 22s 20ms/step - accuracy: 0.8359 - loss: 0.6312 - val_accuracy: 0.8994 - val_loss: 0.4360 +Epoch 6/10 +1100/1100 22s 20ms/step - accuracy: 0.8827 - loss: 0.4854 - val_accuracy: 0.9066 - val_loss: 0.4053 +Epoch 7/10 +1100/1100 22s 20ms/step - accuracy: 0.9007 - loss: 0.4218 - val_accuracy: 0.9166 - val_loss: 0.3660 +Epoch 8/10 +1100/1100 22s 20ms/step - accuracy: 0.9075 - loss: 0.3940 - val_accuracy: 0.9204 - val_loss: 0.3552 +Epoch 9/10 +1100/1100 22s 20ms/step - accuracy: 0.9090 - loss: 0.3922 - val_accuracy: 0.9242 - val_loss: 0.3356 +Epoch 10/10 +1100/1100 24s 22ms/step - accuracy: 0.9191 - loss: 0.3534 - val_accuracy: 0.9270 - val_loss: 0.3286 diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py index 1f99e21..0632b22 100644 --- a/labs/01/pca_first.keras.py +++ b/labs/01/pca_first.keras.py @@ -9,6 +9,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.") @@ -32,39 +37,43 @@ def main(args: argparse.Namespace) -> tuple[float, float]: data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False) data = keras.ops.convert_to_tensor(mnist.train.data["images"][data_indices] / 255, dtype="float32") - # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. + # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `keras.ops.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = ... + data = keras.ops.reshape(data, [data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]) - # TODO: Now compute mean of every feature. Use `keras.ops.mean`, and set + # Now compute mean of every feature. Use `keras.ops.mean`, and set # `axis` to zero -- therefore, the mean will be computed across the first # dimension, so across examples. - mean = ... + mean = keras.ops.mean(data, axis=0) - # TODO: Compute the covariance matrix. The covariance matrix is + # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `keras.ops.transpose` and # matrix multiplication using either Python operator @ or `keras.ops.matmul`. - cov = ... + cov = keras.ops.transpose(data-mean) @ (data-mean) / data.shape[0] - # TODO: Compute the total variance, which is the sum of the diagonal + # Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `keras.ops.diagonal`, # and to sum a tensor use `keras.ops.sum`. - total_variance = ... + total_variance = keras.ops.sum(keras.ops.diagonal(cov)) - # TODO: Now run `args.iterations` of the power iteration algorithm. + # Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `"float32"` using `keras.ops.ones`. - v = ... + v = keras.ops.ones(cov.shape[0], dtype="float32") for i in range(args.iterations): - # TODO: In the power iteration algorithm, we compute + # In the power iteration algorithm, we compute # 1. v = cov v # The matrix-vector multiplication can be computed as regular matrix multiplication. + v = keras.ops.matmul(cov, v) + # 2. s = l2_norm(v) # The l2_norm can be computed using for example `keras.ops.norm`. + s = keras.ops.norm(v, 2) + # 3. v = v / s - pass + v = v / s # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`. # We now compute the explained variance, which is the ratio of `s` and `total_variance`. diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index 2e4ef10..deecf06 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -7,6 +7,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.") @@ -30,43 +35,46 @@ def main(args: argparse.Namespace) -> tuple[float, float]: data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False) data = torch.tensor(mnist.train.data["images"][data_indices] / 255, dtype=torch.float32) - # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. + # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `torch.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = ... + data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])) - # TODO: Now compute mean of every feature. Use `torch.mean`, and set + # Now compute mean of every feature. Use `torch.mean`, and set # `dim` (or `axis`) argument to zero -- therefore, the mean will be # computed across the first dimension, so across examples. # # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments # in PyTorch can be also called `axis`. - mean = ... + mean = torch.mean(data, axis=0) - # TODO: Compute the covariance matrix. The covariance matrix is + # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `torch.transpose` or `torch.t` and # matrix multiplication using either Python operator @ or `torch.matmul`. - cov = ... + cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0] # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = ... + total_variance = torch.sum(torch.diagonal(cov)).item() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. - v = ... + v = torch.ones(cov.shape[0], dtype=torch.float32) + for i in range(args.iterations): - # TODO: In the power iteration algorithm, we compute - # 1. v = cov v - # The matrix-vector multiplication can be computed as regular matrix multiplication - # or using `torch.mv`. - # 2. s = l2_norm(v) - # The l2_norm can be computed using for example `torch.linalg.vector_norm`. - # 3. v = v / s - pass + # TODO: In the power iteration algorithm, we compute + # 1. v = cov v + # The matrix-vector multiplication can be computed as regular matrix multiplication + # or using `torch.mv`. + # 2. s = l2_norm(v) + # The l2_norm can be computed using for example `torch.linalg.vector_norm`. + # 3. v = v / s + v = cov @ v + s = torch.linalg.vector_norm(v) + v = v/s # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`. # We now compute the explained variance, which is the ratio of `s` and `total_variance`. diff --git a/labs/01/run.ps1 b/labs/01/run.ps1 new file mode 100644 index 0000000..a68f5e8 --- /dev/null +++ b/labs/01/run.ps1 @@ -0,0 +1 @@ +..\..\.venv\Scripts\python .\pca_first.keras.py diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 new file mode 100644 index 0000000..75ddf37 --- /dev/null +++ b/labs/01/test.ps1 @@ -0,0 +1,4 @@ +python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt +spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 7befc72..b708b63 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -8,6 +8,12 @@ import keras import numpy as np import torch +from collections import Counter + +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. @@ -17,8 +23,8 @@ parser.add_argument("--seed", default=42, type=int, help="Random seed.") parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") # If you add more arguments, ReCodEx will keep them with your default values. -parser.add_argument("--batch_size", default=..., type=int, help="Batch size.") -parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.") +parser.add_argument("--batch_size", default=10, type=int, help="Batch size.") +parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.") parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.") @@ -49,7 +55,7 @@ def on_epoch_end(self, epoch, logs=None): def evaluate_model( model: keras.Model, seed: int = 42, episodes: int = 100, render: bool = False, report_per_episode: bool = False -) -> float: + ) -> float: """Evaluate the given model on CartPole-v1 environment. Returns the average score achieved on the given number of episodes. @@ -86,16 +92,10 @@ def evaluate_model( def main(args: argparse.Namespace) -> keras.Model | None: # Set the random seed and the number of threads. keras.utils.set_random_seed(args.seed) - if args.threads: - torch.set_num_threads(args.threads) - torch.set_num_interop_threads(args.threads) + torch.set_num_threads(args.threads) + torch.set_num_interop_threads(args.threads) if not args.evaluate: - if args.batch_size is ...: - raise ValueError("You must specify the batch size, either in the defaults or on the command line.") - if args.epochs is ...: - raise ValueError("You must specify the number of epochs, either in the defaults or on the command line.") - # Create logdir name args.logdir = os.path.join("logs", "{}-{}-{}".format( os.path.basename(globals().get("__file__", "notebook")), @@ -107,16 +107,37 @@ def main(args: argparse.Namespace) -> keras.Model | None: data = np.loadtxt("gym_cartpole_data.txt") observations, labels = data[:, :-1], data[:, -1].astype(np.int32) + + # TODO: Create the model in the `model` variable. Note that # the model can perform any of: # - binary classification with 1 output and sigmoid activation; # - two-class classification with 2 outputs and softmax activation. - model = ... + + # Convert the labels to one-hot encoding + labels = keras.ops.one_hot(labels, num_classes=2) + + model = keras.Sequential(name="gym_model", layers=[ + # Input layer + keras.layers.Input(shape=(observations.shape[1],)), + # Hidden layers + keras.layers.Dense(8, activation="tanh"), + # Output layer + keras.layers.Dense(2, activation="softmax"), # 2 outputs because we have 2 actions in the cart pole problem + ]) + + + model.summary() # TODO: Prepare the model for training using the `model.compile` method. - model.compile(...) + model.compile( + loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), + optimizer=keras.optimizers.Adam(learning_rate=0.009), + metrics=["accuracy"], + ) tb_callback = TorchTensorBoardCallback(args.logdir) + labels = keras.ops.one_hot(labels,num_classes=2) model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback]) # Save the model, without the optimizer state. diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py index 6655133..116ae98 100644 --- a/labs/02/mnist_training.py +++ b/labs/02/mnist_training.py @@ -11,6 +11,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") @@ -107,8 +112,34 @@ def main(args: argparse.Namespace) -> dict[str, float]: # in `model.optimizer._learning_rate` if needed), so after training, the learning rate # should be `args.learning_rate_final`. + optimizer = None + lr, momen, decay, final_lr, epochs = args.learning_rate, args.momentum, args.decay, args.learning_rate_final, args.epochs + if decay: + if not final_lr: + print("Please define a final learning rate!") + else: + steps = mnist.train.size/args.batch_size*epochs + init_lr = args.learning_rate + if decay == "linear": + lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr) + elif decay == "exponential": + decay_rate = final_lr/init_lr + lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=decay_rate) + elif decay == "cosine": + alpha = final_lr/init_lr + lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=alpha) + + if args.optimizer == 'SGD': + if momen: + optimizer = keras.optimizers.SGD(learning_rate=lr, momentum=momen, nesterov=True) + else: + optimizer = keras.optimizers.SGD(learning_rate=lr) + elif args.optimizer =="Adam": + optimizer = keras.optimizers.Adam(learning_rate=lr) + + model.compile( - optimizer=..., + optimizer=optimizer, loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[keras.metrics.SparseCategoricalAccuracy("accuracy")], ) @@ -121,6 +152,10 @@ def main(args: argparse.Namespace) -> dict[str, float]: validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]), callbacks=[tb_callback], ) + model.summary() + + if decay: + print("Next learning rate to be used:", model.optimizer.learning_rate.item()) # Return development metrics for ReCodEx to validate. return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} diff --git a/labs/02/sgd_backpropagation.ps1 b/labs/02/sgd_backpropagation.ps1 new file mode 100644 index 0000000..f613710 --- /dev/null +++ b/labs/02/sgd_backpropagation.ps1 @@ -0,0 +1,50 @@ +# Examples: +# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=64 --hidden_layer=20 --learning_rate=0.1 +# Dev accuracy after epoch 1 is 93.30 +# Dev accuracy after epoch 2 is 94.38 +# Dev accuracy after epoch 3 is 95.16 +# Dev accuracy after epoch 4 is 95.50 +# Dev accuracy after epoch 5 is 95.96 +# Dev accuracy after epoch 6 is 96.04 +# Dev accuracy after epoch 7 is 95.82 +# Dev accuracy after epoch 8 is 95.92 +# Dev accuracy after epoch 9 is 95.96 +# Dev accuracy after epoch 10 is 96.16 +# Test accuracy after epoch 10 is 95.26 + +# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=100 --hidden_layer=32 --learning_rate=0.2 +# Dev accuracy after epoch 1 is 93.64 +# Dev accuracy after epoch 2 is 94.80 +# Dev accuracy after epoch 3 is 95.56 +# Dev accuracy after epoch 4 is 95.98 +# Dev accuracy after epoch 5 is 96.24 +# Dev accuracy after epoch 6 is 96.74 +# Dev accuracy after epoch 7 is 96.52 +# Dev accuracy after epoch 8 is 96.54 +# Dev accuracy after epoch 9 is 97.04 +# Dev accuracy after epoch 10 is 97.02 +# Test accuracy after epoch 10 is 96.16 + +# Tests: +../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=64 --hidden_layer=20 --learning_rate=0.1 +# Expected +# Dev accuracy after epoch 1 is 93.30 +# Dev accuracy after epoch 2 is 94.38 +# Test accuracy after epoch 2 is 93.15 + +# Actual +# Dev accuracy after epoch 1 is 92.98 +# Dev accuracy after epoch 2 is 93.98 +# Test accuracy after epoch 2 is 92.73 + + +../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=100 --hidden_layer=32 --learning_rate=0.2 +# Expected: +# Dev accuracy after epoch 1 is 93.64 +# Dev accuracy after epoch 2 is 94.80 +# Test accuracy after epoch 2 is 93.54 + +# Actual: +# Dev accuracy after epoch 1 is 94.16 +# Dev accuracy after epoch 2 is 94.98 +# Test accuracy after epoch 2 is 93.56 diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py index cff312a..e3cfacf 100644 --- a/labs/02/sgd_backpropagation.py +++ b/labs/02/sgd_backpropagation.py @@ -3,7 +3,10 @@ import datetime import os import re -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import numpy as np @@ -12,15 +15,26 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.") -parser.add_argument("--hidden_layer", default=100, type=int, help="Size of the hidden layer.") +parser.add_argument( + "--hidden_layer", default=100, type=int, help="Size of the hidden layer." +) parser.add_argument("--learning_rate", default=0.1, type=float, help="Learning rate.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) # If you add more arguments, ReCodEx will keep them with your default values. @@ -30,29 +44,57 @@ def __init__(self, args: argparse.Namespace) -> None: self._args = args self._W1 = keras.Variable( - keras.random.normal([MNIST.W * MNIST.H * MNIST.C, args.hidden_layer], stddev=0.1, seed=args.seed), + keras.random.normal( + [MNIST.W * MNIST.H * MNIST.C, args.hidden_layer], + stddev=0.1, + seed=args.seed, + ), trainable=True, ) self._b1 = keras.Variable(keras.ops.zeros([args.hidden_layer]), trainable=True) - # TODO: Create variables: + # Create variables: # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`, # initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`, # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros - ... + self._W2 = keras.Variable( + keras.random.normal( + [args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed + ), + trainable=True, + ) + + self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True) def predict(self, inputs: torch.Tensor) -> torch.Tensor: - # TODO: Define the computation of the network. Notably: + # Define the computation of the network. Notably: # - start by casting the input byte image to `float32` with `keras.ops.cast` + + cast_inputs = keras.ops.cast(inputs, dtype="float32") + # - then divide the tensor by 255 to normalize it to the `[0, 1]` range + + normalized_inputs = cast_inputs / 255 + # - then reshape it to the shape `[inputs.shape[0], -1]`. # The -1 is a wildcard which is computed so that the number # of elements before and after the reshape is preserved. + + reshaped_inputs = keras.ops.reshape(normalized_inputs, [inputs.shape[0], -1]) + # - then multiply it by `self._W1` and then add `self._b1` # - apply `keras.ops.tanh` + + hidden_layer_output = keras.ops.tanh( + keras.ops.matmul(reshaped_inputs, self._W1) + self._b1 + ) + # - multiply the result by `self._W2` and then add `self._b2` + + hidden_layer_output = keras.ops.matmul(hidden_layer_output, self._W2) + self._b2 + # - finally apply `keras.ops.softmax` and return the result - return ... + return keras.ops.softmax(hidden_layer_output) def train_epoch(self, dataset: MNIST.Dataset) -> None: for batch in dataset.batches(self._args.batch_size): @@ -62,49 +104,54 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # Size of the batch is `self._args.batch_size`, except for the last, which # might be smaller. - # TODO: Compute the predicted probabilities of the batch images using `self.predict` - probabilities = ... + # Compute the predicted probabilities of the batch images using `self.predict` + probabilities = self.predict(batch["images"]) - # TODO: Manually compute the loss: + # Manually compute the loss: # - For every batch example, the loss is the categorical crossentropy of the # predicted probabilities and the gold label. To compute the crossentropy, you can # - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels, # - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities. # - Finally, compute the average across the batch examples. - loss = ... - + loss = keras.ops.mean( + keras.ops.categorical_crossentropy( + keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities + ) + ) # We create a list of all variables. Note that a `keras.Model/Layer` automatically # tracks owned variables, so we could also use `self.trainable_variables` # (or even `self.variables`, which is useful for loading/saving). variables = [self._W1, self._b1, self._W2, self._b2] + # print("w1, b1, w2, b2:", self._W1.shape, self._b1.shape, self._W2.shape, self._b2.shape) - # TODO: Compute the gradient of the loss with respect to variables using + # Compute the gradient of the loss with respect to variables using # backpropagation algorithm by # - first resetting the gradients of all variables to zero with `self.zero_grad()`, # - then calling `loss.backward()`. - ... + self.zero_grad() + loss.backward() gradients = [variable.value.grad for variable in variables] + # print("gradients:", gradients) with torch.no_grad(): for variable, gradient in zip(variables, gradients): - # TODO: Perform the SGD update with learning rate `self._args.learning_rate` + # Perform the SGD update with learning rate `self._args.learning_rate` # for the variable and computed gradient. You can modify the # variable value with `variable.assign` or in this case the more # efficient `variable.assign_sub`. - ... + variable.assign_sub(self._args.learning_rate * gradient) def evaluate(self, dataset: MNIST.Dataset) -> float: # Compute the accuracy of the model prediction correct = 0 for batch in dataset.batches(self._args.batch_size): - # TODO: Compute the probabilities of the batch images using `self.predict` + # Compute the probabilities of the batch images using `self.predict` # and convert them to Numpy with `keras.ops.convert_to_numpy`. - probabilities = ... + probabilities = keras.ops.convert_to_numpy(self.predict(batch["images"])) - # TODO: Evaluate how many batch examples were predicted + # Evaluate how many batch examples were predicted # correctly and increase `correct` variable accordingly. - correct += ... - + correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"]) return correct / dataset.size @@ -116,11 +163,19 @@ def main(args: argparse.Namespace) -> tuple[float, float]: torch.set_num_interop_threads(args.threads) # Create logdir name - args.logdir = os.path.join("logs", "{}-{}-{}".format( - os.path.basename(globals().get("__file__", "notebook")), - datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), - ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items()))) - )) + args.logdir = os.path.join( + "logs", + "{}-{}-{}".format( + os.path.basename(globals().get("__file__", "notebook")), + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), + ",".join( + ( + "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) + for k, v in sorted(vars(args).items()) + ) + ), + ), + ) # Load data mnist = MNIST() @@ -132,16 +187,23 @@ def main(args: argparse.Namespace) -> tuple[float, float]: model = Model(args) for epoch in range(args.epochs): - # TODO: Run the `train_epoch` with `mnist.train` dataset - - # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset - accuracy = ... - print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True) + # Run the `train_epoch` with `mnist.train` dataset + model.train_epoch(mnist.train) + + # Evaluate the dev data using `evaluate` on `mnist.dev` dataset + accuracy = model.evaluate(mnist.dev) + print( + "Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), + flush=True, + ) writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1) - # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset - test_accuracy = ... - print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True) + # Evaluate the test data using `evaluate` on `mnist.test` dataset + test_accuracy = model.evaluate(mnist.test) + print( + "Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), + flush=True, + ) writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1) # Return dev and test accuracies for ReCodEx to validate. diff --git a/labs/02/sgd_manual.py b/labs/02/sgd_manual.py index 422d3e9..f023328 100644 --- a/labs/02/sgd_manual.py +++ b/labs/02/sgd_manual.py @@ -12,6 +12,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") @@ -39,7 +44,9 @@ def __init__(self, args: argparse.Namespace) -> None: # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`, # initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`, # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros - ... + self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed), + trainable=True) + self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True) def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: # TODO(sgd_backpropagation): Define the computation of the network. Notably: @@ -56,7 +63,14 @@ def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, tor # TODO: In order to support manual gradient computation, you should # return not only the output layer, but also the hidden layer after applying # tanh, and the input layer after reshaping. - return ..., ..., ... + input = keras.ops.cast(inputs, dtype="float32") + input = torch.div(input, 255) + input = input.reshape([input.shape[0], -1]) + hidden_input = keras.ops.matmul(input,self._W1) + self._b1 + hidden_output = keras.ops.tanh(hidden_input) + sm_input = keras.ops.matmul(hidden_output,self._W2) + self._b2 + output = keras.ops.softmax(sm_input) + return input, hidden_output, output def train_epoch(self, dataset: MNIST.Dataset) -> None: for batch in dataset.batches(self._args.batch_size): @@ -72,7 +86,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # # Compute the input layer, hidden layer and output layer # of the batch images using `self.predict`. - + input_layer, hidden_layer, probabilities = self.predict(torch.tensor(batch['images'])) # TODO: Compute the gradient of the loss with respect to all # variables. Note that the loss is computed as in `sgd_backpropagation`: # - For every batch example, the loss is the categorical crossentropy of the @@ -80,7 +94,6 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels, # - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities. # - Finally, compute the average across the batch examples. - # # During the gradient computation, you will need to compute # a batched version of a so-called outer product # `C[a, i, j] = A[a, i] * B[a, j]`, @@ -88,12 +101,30 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # `A[:, :, np.newaxis] * B[:, np.newaxis, :]` # or with # `keras.ops.einsum("ai,aj->aij", A, B)`. + gold_labels = keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS) + loss = torch.mean(keras.ops.categorical_crossentropy(gold_labels, probabilities)) + + gd_loss = probabilities - gold_labels + gd_b2 = gd_loss + #print("loss gradient, hidden_layer, input", gd_b2.shape, hidden_layer.shape, input_layer.shape) + gd_w2 = keras.ops.einsum("ai,aj->aij", hidden_layer, gd_loss) + gd_h = keras.ops.matmul(gd_loss, keras.ops.transpose(self._W2)) + hidden_input = keras.ops.matmul(input_layer,self._W1) + self._b1 + gd_h_i = gd_h*(1-keras.ops.power(keras.ops.tanh(hidden_input), 2)) + gd_b1 = gd_h_i + gd_w1 = keras.ops.einsum("ai,aj->aij", input_layer, gd_h_i) + #print("gd_w2, gd_w1, gd_b2, gd_b1:", gd_w2.shape, gd_w1.shape, gd_b2.shape, gd_b1.shape) # TODO(sgd_backpropagation): Perform the SGD update with learning rate `self._args.learning_rate` # for the variable and computed gradient. You can modify the # variable value with `variable.assign` or in this case the more # efficient `variable.assign_sub`. - ... + variables = [self._W1, self._b1, self._W2, self._b2] + gradients = [gd_w1, gd_b1, gd_w2, gd_b2] + with torch.no_grad(): + for variable, gradient in zip(variables, gradients): + variable.assign_sub(self._args.learning_rate*keras.ops.mean(gradient, axis=0)) + def evaluate(self, dataset: MNIST.Dataset) -> float: # Compute the accuracy of the model prediction @@ -101,11 +132,11 @@ def evaluate(self, dataset: MNIST.Dataset) -> float: for batch in dataset.batches(self._args.batch_size): # TODO: Compute the probabilities of the batch images using `self.predict` # and convert them to Numpy with `keras.ops.convert_to_numpy`. - probabilities = ... + probabilities = keras.ops.convert_to_numpy(self.predict(torch.tensor(batch['images']))[2]) # TODO(sgd_backpropagation): Evaluate how many batch examples were predicted # correctly and increase `correct` variable accordingly. - correct += ... + correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"]) return correct / dataset.size @@ -135,14 +166,14 @@ def main(args: argparse.Namespace) -> tuple[float, float]: for epoch in range(args.epochs): # TODO: Run the `train_epoch` with `mnist.train` dataset - + model.train_epoch(mnist.train) # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset - accuracy = ... + accuracy = model.evaluate(mnist.dev) print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True) writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1) # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset - test_accuracy = ... + test_accuracy = model.evaluate(mnist.test) print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True) writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1) diff --git a/labs/02/test.ps1 b/labs/02/test.ps1 new file mode 100644 index 0000000..fa38f74 --- /dev/null +++ b/labs/02/test.ps1 @@ -0,0 +1 @@ +../../.venv/Scripts/python .\gym_cartpole.py && ../../.venv/Scripts/python .\gym_cartpole.py --evaluate diff --git a/labs/03/mnist_ensemble.ps1 b/labs/03/mnist_ensemble.ps1 new file mode 100644 index 0000000..526a6bd --- /dev/null +++ b/labs/03/mnist_ensemble.ps1 @@ -0,0 +1,2 @@ +python3 mnist_ensemble.py --epochs=1 --models=5 +python3 mnist_ensemble.py --epochs=1 --models=5 --hidden_layers=200 diff --git a/labs/03/mnist_ensemble.py b/labs/03/mnist_ensemble.py index ebffcf9..93bb2eb 100644 --- a/labs/03/mnist_ensemble.py +++ b/labs/03/mnist_ensemble.py @@ -7,6 +7,7 @@ import torch from mnist import MNIST +import numpy as np parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. @@ -54,11 +55,13 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]: print("Done") individual_accuracies, ensemble_accuracies = [], [] + model_predictions = [] for model in range(args.models): - # TODO: Compute the accuracy on the dev set for the individual `models[model]`. - individual_accuracy = ... + # Compute the accuracy on the dev set for the individual `models[model]`. + individual_accuracy = models[model].evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1] + print(individual_accuracy) - # TODO: Compute the accuracy on the dev set for the ensemble `models[0:model+1]`. + # Compute the accuracy on the dev set for the ensemble `models[0:model+1]`. # # Generally you can choose one of the following approaches: # 1) Use Keras Functional API and construct a `keras.Model` averaging the models @@ -69,7 +72,17 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]: # need to construct Keras ensemble model at all, and instead call `model.predict` # on the individual models and average the results. To measure accuracy, # either do it completely manually or use `keras.metrics.SparseCategoricalAccuracy`. - ensemble_accuracy = ... + inputs = keras.Input(shape=(MNIST.W, MNIST.H, MNIST.C)) + ensemble_output = keras.layers.Average()([model(inputs) for model in models[0:model+1]]) + ensemble_model = keras.Model(inputs=inputs, outputs=ensemble_output) + + ensemble_model.compile( + optimizer=keras.optimizers.Adam(), + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + ) + + ensemble_accuracy = ensemble_model.evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1] # Store the accuracies individual_accuracies.append(individual_accuracy) diff --git a/labs/03/mnist_regularization.ps1 b/labs/03/mnist_regularization.ps1 new file mode 100644 index 0000000..2a61e88 --- /dev/null +++ b/labs/03/mnist_regularization.ps1 @@ -0,0 +1,24 @@ +# Run script from root repo directory + +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.3 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.5 --hidden_layers 300 300 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.1 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.3 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.1 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.3 + +# Expected +# accuracy: 0.5981 - loss: 1.2688 - val_accuracy: 0.9174 - val_loss: 0.3051 +# accuracy: 0.3429 - loss: 1.9163 - val_accuracy: 0.8826 - val_loss: 0.4937 +# accuracy: 0.7014 - loss: 1.0412 - val_accuracy: 0.9236 - val_loss: 0.2776 +# accuracy: 0.7006 - loss: 1.0429 - val_accuracy: 0.9232 - val_loss: 0.2801 +# accuracy: 0.7102 - loss: 1.3015 - val_accuracy: 0.9276 - val_loss: 0.7656 +# accuracy: 0.7113 - loss: 1.6854 - val_accuracy: 0.9332 - val_loss: 1.3709 + +# Actual +# accuracy: 0.6178 - loss: 1.2374 - val_accuracy: 0.9164 - val_loss: 0.3045 +# accuracy: 0.3412 - loss: 1.8919 - val_accuracy: 0.8818 - val_loss: 0.4794 +# accuracy: 0.6948 - loss: 1.0394 - val_accuracy: 0.9186 - val_loss: 0.2859 +# accuracy: 0.6947 - loss: 1.0410 - val_accuracy: 0.9184 - val_loss: 0.2885 +# accuracy: 0.6996 - loss: 1.3013 - val_accuracy: 0.9228 - val_loss: 0.7735 +# accuracy: 0.7102 - loss: 1.6879 - val_accuracy: 0.9284 - val_loss: 1.3739 diff --git a/labs/03/mnist_regularization.py b/labs/03/mnist_regularization.py index cd78fcf..0b2e5a2 100644 --- a/labs/03/mnist_regularization.py +++ b/labs/03/mnist_regularization.py @@ -3,7 +3,10 @@ import datetime import os import re -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import torch @@ -15,12 +18,20 @@ parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") parser.add_argument("--dropout", default=0, type=float, help="Dropout regularization.") parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.") -parser.add_argument("--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes.") +parser.add_argument( + "--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes." +) parser.add_argument("--label_smoothing", default=0, type=float, help="Label smoothing.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") -parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay strength.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) +parser.add_argument( + "--weight_decay", default=0, type=float, help="Weight decay strength." +) # If you add more arguments, ReCodEx will keep them with your default values. @@ -32,7 +43,10 @@ def __init__(self, path): def writer(self, writer): if writer not in self._writers: import torch.utils.tensorboard - self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer)) + + self._writers[writer] = torch.utils.tensorboard.SummaryWriter( + os.path.join(self._path, writer) + ) return self._writers[writer] def add_logs(self, writer, logs, step): @@ -43,10 +57,24 @@ def add_logs(self, writer, logs, step): def on_epoch_end(self, epoch, logs=None): if logs: - if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer): - logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)} - self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1) - self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1) + if isinstance( + getattr(self.model, "optimizer", None), keras.optimizers.Optimizer + ): + logs = logs | { + "learning_rate": keras.ops.convert_to_numpy( + self.model.optimizer.learning_rate + ) + } + self.add_logs( + "train", + {k: v for k, v in logs.items() if not k.startswith("val_")}, + epoch + 1, + ) + self.add_logs( + "val", + {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, + epoch + 1, + ) def main(args: argparse.Namespace) -> dict[str, float]: @@ -57,16 +85,24 @@ def main(args: argparse.Namespace) -> dict[str, float]: torch.set_num_interop_threads(args.threads) # Create logdir name - args.logdir = os.path.join("logs", "{}-{}-{}".format( - os.path.basename(globals().get("__file__", "notebook")), - datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), - ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items()))) - )) + args.logdir = os.path.join( + "logs", + "{}-{}-{}".format( + os.path.basename(globals().get("__file__", "notebook")), + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), + ",".join( + ( + "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) + for k, v in sorted(vars(args).items()) + ) + ), + ), + ) # Load data mnist = MNIST(size={"train": 5_000}) - # TODO: Incorporate dropout to the model below. Namely, add + # Incorporate dropout to the model below. Namely, add # a `keras.layers.Dropout` layer with `args.dropout` rate after # the `Flatten` layer and after each `Dense` hidden layer (but not after # the output `Dense` layer). @@ -74,11 +110,15 @@ def main(args: argparse.Namespace) -> dict[str, float]: model = keras.Sequential() model.add(keras.layers.Rescaling(1 / 255)) model.add(keras.layers.Flatten()) + model.add(keras.layers.Dropout(args.dropout)) + for hidden_layer in args.hidden_layers: model.add(keras.layers.Dense(hidden_layer, activation="relu")) + model.add(keras.layers.Dropout(rate=args.dropout)) + model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax")) - # TODO: Implement label smoothing with the given `args.label_smoothing` strength. + # Implement label smoothing with the given `args.label_smoothing` strength. # You need to change the `SparseCategorical{Crossentropy,Accuracy}` to # `Categorical{Crossentropy,Accuracy}`, because `label_smoothing` is supported # only by the `CategoricalCrossentropy`. That means you also need to modify @@ -86,29 +126,52 @@ def main(args: argparse.Namespace) -> dict[str, float]: # of the gold class to a full categorical distribution (you can use either NumPy, # or there is a helper method also in the `keras.utils` module). - # TODO: Create a `keras.optimizers.AdamW`, using the default learning + # Create a `keras.optimizers.AdamW`, using the default learning # rate and a weight decay of strength `args.weight_decay`. Then call the # `exclude_from_weight_decay` method to specify that all variables with "bias" # in their name should not be decayed. - optimizer = ... - - model.compile( - optimizer=optimizer, - loss=keras.losses.SparseCategoricalCrossentropy(), - metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], - ) + optimizer = keras.optimizers.AdamW(weight_decay=args.weight_decay) + optimizer.exclude_from_weight_decay(var_names=["bias"]) + + s = args.label_smoothing != 0 + + if s: + model.compile( + optimizer=optimizer, + loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing), + metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")], + ) + else: + model.compile( + optimizer=optimizer, + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + ) tb_callback = TorchTensorBoardCallback(args.logdir) logs = model.fit( - mnist.train.data["images"], mnist.train.data["labels"], - batch_size=args.batch_size, epochs=args.epochs, - validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]), + mnist.train.data["images"], + keras.utils.to_categorical( + mnist.train.data["labels"], num_classes=mnist.LABELS + ) if s else mnist.train.data["labels"], + batch_size=args.batch_size, + epochs=args.epochs, + validation_data=( + mnist.dev.data["images"], + keras.utils.to_categorical( + mnist.dev.data["labels"], num_classes=mnist.LABELS + ) if s else mnist.dev.data["labels"], + ), callbacks=[tb_callback], ) # Return development metrics for ReCodEx to validate. - return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} + return { + metric: values[-1] + for metric, values in logs.history.items() + if metric.startswith("val_") + } if __name__ == "__main__": diff --git a/labs/03/uppercase.py b/labs/03/uppercase.py index c975e3f..c83d5c5 100644 --- a/labs/03/uppercase.py +++ b/labs/03/uppercase.py @@ -10,16 +10,16 @@ from uppercase_data import UppercaseData -# TODO: Set reasonable values for the hyperparameters, especially for +# Set reasonable values for the hyperparameters, especially for # `alphabet_size`, `batch_size`, `epochs`, and `window`. # Also, you can set the number of threads to 0 to use all your CPU cores. parser = argparse.ArgumentParser() -parser.add_argument("--alphabet_size", default=..., type=int, help="If given, use this many most frequent chars.") -parser.add_argument("--batch_size", default=..., type=int, help="Batch size.") -parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.") +parser.add_argument("--alphabet_size", default=70, type=int, help="If given, use this many most frequent chars.") +parser.add_argument("--batch_size", default=1024, type=int, help="Batch size.") +parser.add_argument("--epochs", default=2, type=int, help="Number of epochs.") parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") -parser.add_argument("--window", default=..., type=int, help="Window size to use.") +parser.add_argument("--threads", default=0, type=int, help="Maximum number of threads to use.") +parser.add_argument("--window", default=4, type=int, help="Window size to use.") class TorchTensorBoardCallback(keras.callbacks.Callback): @@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None: # Load data uppercase_data = UppercaseData(args.window, args.alphabet_size) - # TODO: Implement a suitable model, optionally including regularization, select + # Implement a suitable model, optionally including regularization, select # good hyperparameters and train the model. # # The inputs are _windows_ of fixed size (`args.window` characters on the left, @@ -79,16 +79,34 @@ def main(args: argparse.Namespace) -> None: # You can then flatten the one-hot encoded windows and follow with a dense layer. # - Alternatively, you can use `keras.layers.Embedding` (which is an efficient # implementation of one-hot encoding followed by a Dense layer) and flatten afterwards. - model = ... + model = keras.Sequential([ + keras.layers.InputLayer(shape=[2 * args.window + 1], dtype="int32"), + keras.layers.CategoryEncoding(len(uppercase_data.train.alphabet)), + keras.layers.Embedding(len(uppercase_data.train.alphabet), 8), + + keras.layers.Flatten(), + keras.layers.Dense(64, activation='relu'), + keras.layers.Dropout(rate=0.5), + keras.layers.Dense(1, activation='sigmoid') # Sigmoid activation function for binary classification + ]) + + # Generate correctly capitalized test set. + + predictions = model.predict(uppercase_data.test.data, batch_size=args.batch_size) - # TODO: Generate correctly capitalized test set. # Use `uppercase_data.test.text` as input, capitalize suitable characters, # and write the result to predictions_file (which is # `uppercase_test.txt` in the `args.logdir` directory). os.makedirs(args.logdir, exist_ok=True) with open(os.path.join(args.logdir, "uppercase_test.txt"), "w", encoding="utf-8") as predictions_file: - ... - + new_text = "" + for pred, word in zip(predictions, uppercase_data.test.text): + if pred > .5: + new_word = word.upper() + new_text += new_word + else: + new_text + predictions_file.write(new_text) if __name__ == "__main__": args = parser.parse_args([] if "__file__" not in globals() else None) diff --git a/labs/04/cifar_competition.ps1 b/labs/04/cifar_competition.ps1 new file mode 100644 index 0000000..0d919fe --- /dev/null +++ b/labs/04/cifar_competition.ps1 @@ -0,0 +1 @@ +clear && python .\cifar_competition.py diff --git a/labs/04/cifar_competition.py b/labs/04/cifar_competition.py index 0541de8..be29019 100644 --- a/labs/04/cifar_competition.py +++ b/labs/04/cifar_competition.py @@ -3,7 +3,10 @@ import datetime import os import re -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import numpy as np @@ -11,13 +14,23 @@ from cifar10 import CIFAR10 -# TODO: Define reasonable defaults and optionally more parameters. +# Define reasonable defaults and optionally more parameters. # Also, you can set the number of threads to 0 to use all your CPU cores. parser = argparse.ArgumentParser() -parser.add_argument("--batch_size", default=..., type=int, help="Batch size.") -parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.") +parser.add_argument("--batch_size", default=128, type=int, help="Batch size.") +parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.") +# parser.add_argument("--epochs", default=200, type=int, help="Number of epochs.") +parser.add_argument("--learning_rate", default=0.001, help="Initial learning rate") +parser.add_argument( + "--weight_decay", default=1e-4, type=float, help="L2 regularization weight decay." +) +parser.add_argument( + "--label_smoothing", default=0.1, type=float, help="Label smoothing." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) class TorchTensorBoardCallback(keras.callbacks.Callback): @@ -28,7 +41,10 @@ def __init__(self, path): def writer(self, writer): if writer not in self._writers: import torch.utils.tensorboard - self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer)) + + self._writers[writer] = torch.utils.tensorboard.SummaryWriter( + os.path.join(self._path, writer) + ) return self._writers[writer] def add_logs(self, writer, logs, step): @@ -39,13 +55,51 @@ def add_logs(self, writer, logs, step): def on_epoch_end(self, epoch, logs=None): if logs: - if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer): - logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)} - self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1) - self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1) - + if isinstance( + getattr(self.model, "optimizer", None), keras.optimizers.Optimizer + ): + logs = logs | { + "learning_rate": keras.ops.convert_to_numpy( + self.model.optimizer.learning_rate + ) + } + self.add_logs( + "train", + {k: v for k, v in logs.items() if not k.startswith("val_")}, + epoch + 1, + ) + self.add_logs( + "val", + {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, + epoch + 1, + ) + +def create_res(input_layer, filters, kernel_size, strides): + h = keras.layers.Conv2D( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding="same", + activation=None, + )(input_layer) + + h = keras.layers.BatchNormalization()(h) + h = keras.layers.Activation("relu")(h) + h = keras.layers.Conv2D( + filters=filters, + kernel_size=kernel_size, + strides=1, + padding="same", + activation=None, + use_bias=False, + )(h) + h = keras.layers.BatchNormalization()(h) + h = keras.layers.Add()([input_layer, h]) + h = keras.layers.Activation("relu")(h) + return h def main(args: argparse.Namespace) -> None: + # Set the random seed and the number of threads. keras.utils.set_random_seed(args.seed) if args.threads: @@ -53,23 +107,75 @@ def main(args: argparse.Namespace) -> None: torch.set_num_interop_threads(args.threads) # Create logdir name - args.logdir = os.path.join("logs", "{}-{}-{}".format( - os.path.basename(globals().get("__file__", "notebook")), - datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), - ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items()))) - )) + args.logdir = os.path.join( + "logs", + "{}-{}-{}".format( + os.path.basename(globals().get("__file__", "notebook")), + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), + ",".join( + ( + "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) + for k, v in sorted(vars(args).items()) + ) + ), + ), + ) # Load data cifar = CIFAR10() - # TODO: Create the model and train it - model = ... + # Create the model and train it + inputs = keras.Input(shape=cifar.train.data["images"][0].shape) + h = keras.layers.Rescaling(1 / 255)(inputs) + h = keras.layers.Conv2D(64, 3, 1, "same", activation="relu")(h) + h = create_res(h, 64, 3, 1) + h = keras.layers.MaxPool2D(2)(h) + h = create_res(h, 64, 3, 1) + h = keras.layers.MaxPool2D(2)(h) + h = keras.layers.Dropout(0.2)(h) + h = create_res(h, 64, 3, 1) + h = keras.layers.Flatten()(h) + h = keras.layers.Dropout(0.2)(h) + h = keras.layers.Dense(200, activation="relu")(h) + outputs = keras.layers.Dense(len(CIFAR10.LABELS), activation="softmax")(h) + + model = keras.Model(inputs=inputs, outputs=outputs) + + model.summary() + + + lr_optimizer = keras.optimizers.schedules.CosineDecay( + initial_learning_rate=args.learning_rate, + decay_steps=len(cifar.train.data["images"] / args.batch_size * args.epochs) + ) + + model.compile( + optimizer=keras.optimizers.Adam( + learning_rate=lr_optimizer, + weight_decay=args.weight_decay), + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + ) + + model.fit( + cifar.train.data["images"], + cifar.train.data["labels"], + batch_size=args.batch_size, + epochs=args.epochs, + + ) + + model.save(os.path.join(args.logdir, "cifar.h5"), include_optimizer=False) # Generate test set annotations, but in `args.logdir` to allow parallel execution. os.makedirs(args.logdir, exist_ok=True) - with open(os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8") as predictions_file: - # TODO: Perform the prediction on the test data. - for probs in model.predict(...): + with open( + os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8" + ) as predictions_file: + # Perform the prediction on the test data. + for probs in model.predict( + cifar.test.data["images"], batch_size=args.batch_size + ): print(np.argmax(probs), file=predictions_file) diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt new file mode 100644 index 0000000..63271eb --- /dev/null +++ b/labs/04/mnist_cnn results.txt @@ -0,0 +1,29 @@ +👉 TEST 1 +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432 + +👉 TEST 2 +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606 + +👉 TEST 3 +python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894 + +👉 TEST 4 +python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079 + +👉 TEST 5 +python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 17ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537 + +👉 TEST 6 +python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.7476 - loss: 0.7841 - val_accuracy: 0.9370 - val_loss: 0.2037 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734 diff --git a/labs/04/mnist_cnn.ps1 b/labs/04/mnist_cnn.ps1 new file mode 100644 index 0000000..bf78797 --- /dev/null +++ b/labs/04/mnist_cnn.ps1 @@ -0,0 +1,30 @@ +"" +"👉 TEST 1" +"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100" +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432" +"" +"👉 TEST 2" +"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5" +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606" +"" +"👉 TEST 3" +"python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50" +python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894" +"" +"👉 TEST 4" +"python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50" +python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079" +"" +"👉 TEST 5" +"python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32" +python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537" +"" +"👉 TEST 6" +"python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50" +python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734" diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py index a3a91cd..b3c5727 100644 --- a/labs/04/mnist_cnn.py +++ b/labs/04/mnist_cnn.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 import argparse import os -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise +import re + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras +from keras.layers import add import torch from mnist import MNIST @@ -11,42 +16,115 @@ parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") -parser.add_argument("--cnn", default=None, type=str, help="CNN architecture.") +parser.add_argument( + "--cnn", + default="CB-16-5-2-same,M-3-2,F,H-100,D-0.5", + type=str, + help="CNN architecture.", +) parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) # If you add more arguments, ReCodEx will keep them with your default values. +def create_layer(layer_type, layer_args, hidden): + if layer_type == "C": + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size)), + strides=(int(stride)), + padding=padding, + activation="relu", + )(hidden) + + return hidden + + # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. + # In detail, start with a convolutional layer **without bias** and activation, + # then add a batch normalization layer, and finally the ReLU activation. + if layer_type == "CB": + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size)), + strides=(int(stride)), + padding=padding, + use_bias=False, + )(hidden) + hidden = keras.layers.BatchNormalization()(hidden) + hidden = keras.layers.ReLU()(hidden) + return hidden + + # - `M-pool_size-stride`: Add max pooling with specified size and stride, using + # the default "valid" padding. + if layer_type == "M": + pool_size, stride = layer_args + hidden = keras.layers.MaxPooling2D( + pool_size=int(pool_size), + strides=(int(stride)), + )(hidden) + return hidden + + # - `R-[layers]`: Add a residual connection. The `layers` contain a specification + # of at least one convolutional layer (but not a recursive residual connection `R`). + # The input to the `R` layer should be processed sequentially by `layers`, and the + # produced output (after the ReLU nonlinearity of the last layer) should be added + # to the input (of this `R` layer). + if layer_type == "R": + input_layer = hidden + layers = "-".join(layer_args)[1:-1].split(",") + + for layer in layers: + layer_type, *layer_args = layer.split("-") + + hidden = create_layer(layer_type, layer_args, hidden) + + hidden = keras.layers.Add()([input_layer, hidden]) + + return hidden + + # - `F`: Flatten inputs. Must appear exactly once in the architecture. + if layer_type == "F": + hidden = keras.layers.Flatten()(hidden) + return hidden + + # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size. + if layer_type == "H": + hidden_layer_size, = layer_args + hidden = keras.layers.Dense(units=int(hidden_layer_size), activation="relu")(hidden) + return hidden + + # - `D-dropout_rate`: Apply dropout with the given dropout rate. + if layer_type == "D": + dropout_rate, = layer_args + hidden = keras.layers.Dropout(rate=float(dropout_rate))(hidden) + return hidden + + class Model(keras.Model): def __init__(self, args: argparse.Namespace) -> None: - # TODO: Create the model. The template uses the functional API, but + # Create the model. The template uses the functional API, but # feel free to use subclassing if you want. inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]) hidden = keras.layers.Rescaling(1 / 255)(inputs) - # TODO: Add CNN layers specified by `args.cnn`, which contains - # a comma-separated list of the following layers: - # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU - # activation and specified number of filters, kernel size, stride and padding. - # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. - # In detail, start with a convolutional layer **without bias** and activation, - # then add a batch normalization layer, and finally the ReLU activation. - # - `M-pool_size-stride`: Add max pooling with specified size and stride, using - # the default "valid" padding. - # - `R-[layers]`: Add a residual connection. The `layers` contain a specification - # of at least one convolutional layer (but not a recursive residual connection `R`). - # The input to the `R` layer should be processed sequentially by `layers`, and the - # produced output (after the ReLU nonlinearity of the last layer) should be added - # to the input (of this `R` layer). - # - `F`: Flatten inputs. Must appear exactly once in the architecture. - # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size. - # - `D-dropout_rate`: Apply dropout with the given dropout rate. + cnn_args = re.split(r",(?![^\[]*\])", args.cnn) + + for layer in cnn_args: + layer_type, *layer_args = layer.split("-") + + hidden = create_layer(layer_type, layer_args, hidden) + # You can assume the resulting network is valid; it is fine to crash if it is not. # # Produce the results in the variable `hidden`. - hidden = ... # Add the final output layer outputs = keras.layers.Dense(MNIST.LABELS, activation="softmax")(hidden) @@ -73,13 +151,19 @@ def main(args: argparse.Namespace) -> dict[str, float]: model = Model(args) logs = model.fit( - mnist.train.data["images"], mnist.train.data["labels"], - batch_size=args.batch_size, epochs=args.epochs, + mnist.train.data["images"], + mnist.train.data["labels"], + batch_size=args.batch_size, + epochs=args.epochs, validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]), ) # Return development metrics for ReCodEx to validate. - return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} + return { + metric: values[-1] + for metric, values in logs.history.items() + if metric.startswith("val_") + } if __name__ == "__main__": diff --git a/labs/04/mnist_multiple.ps1 b/labs/04/mnist_multiple.ps1 new file mode 100644 index 0000000..3416b36 --- /dev/null +++ b/labs/04/mnist_multiple.ps1 @@ -0,0 +1,11 @@ +"" +"👉 TEST 1" +"python3 mnist_multiple.py --epochs=1 --batch_size=50" +python3 mnist_multiple.py --epochs=1 --batch_size=50 +"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984" +"" +"👉 TEST 2" +"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5" +python3 mnist_multiple.py --epochs=1 --batch_size=100 +"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157" +"" diff --git a/labs/04/mnist_multiple.py b/labs/04/mnist_multiple.py index 06b9d9e..def13ab 100644 --- a/labs/04/mnist_multiple.py +++ b/labs/04/mnist_multiple.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 import argparse import os -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import numpy as np import keras @@ -13,9 +16,13 @@ # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") parser.add_argument("--epochs", default=5, type=int, help="Number of epochs.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) # If you add more arguments, ReCodEx will keep them with your default values. @@ -27,7 +34,7 @@ def __init__(self, args: argparse.Namespace) -> None: keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]), ) - # TODO: The model starts by passing each input image through the same + # The model starts by passing each input image through the same # subnetwork (with shared weights), which should perform # - keras.layers.Rescaling(1 / 255) to convert images to floats in [0, 1] range, # - convolution with 10 filters, 3x3 kernel size, stride 2, "valid" padding, ReLU activation, @@ -36,24 +43,49 @@ def __init__(self, args: argparse.Namespace) -> None: # - fully connected layer with 200 neurons and ReLU activation, # obtaining a 200-dimensional feature vector FV of each image. - # TODO: Using the computed representations, the model should produce four outputs: + rescale = keras.layers.Rescaling(1 / 255) + c1 = keras.layers.Conv2D( + filters=10, kernel_size=3, strides=2, padding="valid", activation="relu" + ) + c2 = keras.layers.Conv2D( + filters=20, kernel_size=3, strides=2, padding="valid", activation="relu" + ) + flat = keras.layers.Flatten() + hidden = keras.layers.Dense(200, activation="relu") + + fv1 = hidden(flat(c2(c1(rescale(images[0]))))) + fv2 = hidden(flat(c2(c1(rescale(images[1]))))) + + # Using the computed representations, the model should produce four outputs: # - first, compute _direct comparison_ whether the first digit is # greater than the second, by # - concatenating the two 200-dimensional image representations FV, # - processing them using another 200-neuron ReLU dense layer # - computing one output using a dense layer with "sigmoid" activation + concatenation = keras.layers.Concatenate()([fv1, fv2]) + hidden2 = keras.layers.Dense(200, activation="relu") + pred_layer = keras.layers.Dense(1, activation="sigmoid") + direct_comparison = pred_layer(hidden2(concatenation)) # - then, classify the computed representation FV of the first image using # a densely connected softmax layer into 10 classes; # - then, classify the computed representation FV of the second image using # the same layer (identical, i.e., with shared weights) into 10 classes; + classification_layer = keras.layers.Dense(10, activation="softmax") + d1 = classification_layer(fv1) + d2 = classification_layer(fv2) # - finally, compute _indirect comparison_ whether the first digit # is greater than second, by comparing the predictions from the above # two outputs; convert the comparison to "float32" using `keras.ops.cast`. outputs = { - "direct_comparison": ..., - "digit_1": ..., - "digit_2": ..., - "indirect_comparison": ..., + "direct_comparison": direct_comparison, + "digit_1": d1, + "digit_2": d2, + "indirect_comparison": keras.ops.cast( + keras.ops.greater( + keras.ops.argmax(d1, axis=1), keras.ops.argmax(d2, axis=1) + ), + "float32", + ), } # Finally, construct the model. @@ -65,7 +97,7 @@ def __init__(self, args: argparse.Namespace) -> None: # the keys of the `outputs` dictionary. self.output_names = sorted(outputs.keys()) - # TODO: Define the appropriate losses for the model outputs + # Define the appropriate losses for the model outputs # "direct_comparison", "digit_1", "digit_2". Regarding metrics, # the accuracy of both the direct and indirect comparisons should be # computed; name both metrics "accuracy" (i.e., pass "accuracy" as the @@ -73,19 +105,25 @@ def __init__(self, args: argparse.Namespace) -> None: self.compile( optimizer=keras.optimizers.Adam(), loss={ - "direct_comparison": ..., - "digit_1": ..., - "digit_2": ..., + "direct_comparison": keras.losses.BinaryCrossentropy(), + "digit_1": keras.losses.SparseCategoricalCrossentropy(), + "digit_2": keras.losses.SparseCategoricalCrossentropy(), }, metrics={ - "direct_comparison": [...], - "indirect_comparison": [...], + "direct_comparison": [ + keras.metrics.BinaryAccuracy(name="accuracy"), + ], + "indirect_comparison": [ + keras.metrics.BinaryAccuracy(name="accuracy"), + ], }, ) # Create an appropriate dataset using the MNIST data. def create_dataset( - self, mnist_dataset: MNIST.Dataset, args: argparse.Namespace, + self, + mnist_dataset: MNIST.Dataset, + args: argparse.Namespace, ) -> torch.utils.data.Dataset: # Original MNIST dataset. images, labels = mnist_dataset.data["images"], mnist_dataset.data["labels"] @@ -94,16 +132,27 @@ def create_dataset( # You can assume that the size of the original dataset is even. class TorchDataset(torch.utils.data.Dataset): def __len__(self) -> int: - # TODO: The new dataset has half the size of the original one. - return ... + # The new dataset has half the size of the original one. + return len(images) // 2 - def __getitem__(self, index: int) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]: - # TODO: Given an `index`, generate a dataset element suitable for our model. + def __getitem__( + self, index: int + ) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]: + # Given an `index`, generate a dataset element suitable for our model. # Notably, the element should be a pair `(input, output)`, with # - `input` being a pair of images `(images[2 * index], images[2 * index + 1])`, # - `output` being a dictionary with keys "digit_1", "digit_2", "direct_comparison", # and "indirect_comparison". - return ... + return ( + (images[2 * index], images[2 * index + 1]), + { + "digit_1": labels[2 * index], + "digit_2": labels[2 * index + 1], + "direct_comparison": labels[2 * index] > labels[2 * index + 1], + "indirect_comparison": labels[2 * index] + > labels[2 * index + 1], + }, + ) return TorchDataset() @@ -122,14 +171,22 @@ def main(args: argparse.Namespace) -> dict[str, float]: model = Model(args) # Construct suitable dataloaders from the MNIST data. - train = torch.utils.data.DataLoader(model.create_dataset(mnist.train, args), args.batch_size, shuffle=True) - dev = torch.utils.data.DataLoader(model.create_dataset(mnist.dev, args), args.batch_size) + train = torch.utils.data.DataLoader( + model.create_dataset(mnist.train, args), args.batch_size, shuffle=True + ) + dev = torch.utils.data.DataLoader( + model.create_dataset(mnist.dev, args), args.batch_size + ) # Train logs = model.fit(train, epochs=args.epochs, validation_data=dev) # Return development metrics for ReCodEx to validate. - return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} + return { + metric: values[-1] + for metric, values in logs.history.items() + if metric.startswith("val_") + } if __name__ == "__main__": diff --git a/labs/04/torch_dataset.ps1 b/labs/04/torch_dataset.ps1 new file mode 100644 index 0000000..46fa378 --- /dev/null +++ b/labs/04/torch_dataset.ps1 @@ -0,0 +1,11 @@ +# "" +# "👉 TEST 1" +# "python3 torch_dataset.py --epochs=1 --batch_size=100" +# python3 torch_dataset.py --epochs=1 --batch_size=100 +# "50/50 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.1297 - loss: 2.2519 - val_accuracy: 0.2710 - val_loss: 1.9796" +"" +"👉 TEST 2" +"python3 torch_dataset.py --epochs=1 --batch_size=50 --augment" +python3 torch_dataset.py --epochs=1 --batch_size=50 --augment +"100/100 ━━━━━━━━━━━━━━━━━━━━ 4s 34ms/step - accuracy: 0.1354 - loss: 2.2565 - val_accuracy: 0.2690 - val_loss: 1.9889" +"" diff --git a/labs/04/torch_dataset.py b/labs/04/torch_dataset.py index 5e0c330..f689e54 100644 --- a/labs/04/torch_dataset.py +++ b/labs/04/torch_dataset.py @@ -53,54 +53,67 @@ def main(args: argparse.Namespace) -> dict[str, float]: metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], ) - # TODO: Create a Torch dataset constructible from the given `CIFAR10.Dataset`. + # Create a Torch dataset constructible from the given `CIFAR10.Dataset`. # You should use only the first `size` examples of the dataset, and optional # augmentation function `augmentation_fn` may be applied to the images. class TorchDataset(torch.utils.data.Dataset): + images: np.ndarray + labels: np.ndarray + augmentation_fn: callable + def __init__(self, cifar: CIFAR10.Dataset, size: int, augmentation_fn=None) -> None: - # TODO: Note that the images and labels are available in `cifar.data["images"]` + # Note that the images and labels are available in `cifar.data["images"]` # and `cifar.data["labels"]`. - ... + self.images = cifar.data["images"][:size] + self.labels = cifar.data["labels"][:size] + self.augmentation_fn = augmentation_fn def __len__(self) -> int: - # TODO: Return the appropriate size. - ... + # Return the appropriate size. + size = len(self.images) + return size + def __getitem__(self, index: int) -> tuple[np.ndarray | torch.Tensor, int]: - # TODO: Return the `index`-th example from the dataset, with the image optionally + # Return the `index`-th example from the dataset, with the image optionally # passed through the `augmentation_fn` if it is not `None`. - ... + return self.augmentation_fn(self.images[index]) if self.augmentation_fn else self.images[index], self.labels[index] if args.augment: # Construct a sequence of augmentation transformations from `torchvision.transforms.v2`. transformation = v2.Compose([ - # TODO: Add the following transformations: + # Add the following transformations: # - first create a `v2.RandomResize` that scales the image to # random size in range [28, 36], # - then add `v2.Pad` that pads the image with 4 pixels on each side, # - then add `v2.RandomCrop` that chooses a random crop of size 32x32, # - and finally add `v2.RandomHorizontalFlip` that uniformly # randomly flips the image horizontally. - ... + v2.RandomResize(28, 36), + v2.Pad(4), + v2.RandomCrop(32), + v2.RandomHorizontalFlip(), ]) def augmentation_fn(image: np.ndarray) -> torch.Tensor: - # TODO: First, convert the numpy `images` to a PyTorch tensor of uint8s, + # First, convert the numpy `images` to a PyTorch tensor of uint8s, # preferably by using `torch.from_numpy` or `torch.as_tensor` to avoid copying. # Then, because of the channels-position mismatch, permute the axes # in the image to change the order of the axes from HWC to CHW. # Next, apply the `transformation` to the image (by calling it with # the image as an argument), and finally permute the axes back to # the original order. - return ... + + return transformation(torch.as_tensor(image).permute(2, 0, 1)).permute(1, 2, 0) + else: augmentation_fn = None - # TODO: Create `train` and `dev` instances of `TorchDataset` from the corresponding + # Create `train` and `dev` instances of `TorchDataset` from the corresponding # `cifar` datasets. Limit their sizes to 5_000 and 1_000 examples, respectively, # and use the `augmentation_fn` for the training dataset. - train = ... - dev = ... + train = TorchDataset(cifar.train, 5_000, augmentation_fn) + dev = TorchDataset(cifar.dev, 1_000) if args.show_images: from torch.utils import tensorboard @@ -114,10 +127,10 @@ def augmentation_fn(image: np.ndarray) -> torch.Tensor: tb_writer.close() print("Saved first {} training imaged to logs/{}".format(GRID * GRID, TAG)) - # TODO: Create `train` and `dev` instances of `torch.utils.data.DataLoader` from + # Create `train` and `dev` instances of `torch.utils.data.DataLoader` from # the datasets, using the given `args.batch_size` and shuffling the training dataset. - train = ... - dev = ... + train = torch.utils.data.DataLoader(train, args.batch_size, shuffle=True) + dev = torch.utils.data.DataLoader(dev, args.batch_size) # Train logs = model.fit(train, epochs=args.epochs, validation_data=dev) diff --git a/labs/team_description.py b/labs/team_description.py index 14ed5e1..1d232bc 100644 --- a/labs/team_description.py +++ b/labs/team_description.py @@ -6,4 +6,7 @@ # # You can find out ReCodEx ID in the URL bar after navigating # to your User profile page. The ID has the following format: -# 01234567-89ab-cdef-0123-456789abcdef. +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 diff --git a/pull.ps1 b/pull.ps1 new file mode 100644 index 0000000..9cadfe4 --- /dev/null +++ b/pull.ps1 @@ -0,0 +1 @@ +git pull upstream master diff --git a/setup.ps1 b/setup.ps1 new file mode 100644 index 0000000..f1f7bbe --- /dev/null +++ b/setup.ps1 @@ -0,0 +1,6 @@ +git remote rename origin upstream +git remote add origin git@github.com:joglr/npfl138.git +git fetch +git checkout master +python -m venv .venv +.venv/Scripts/pip install -r .\labs\requirements.txt