From 5f39af24e2d9d86c17ac9a9b821a17f959cca607 Mon Sep 17 00:00:00 2001
From: Kierszbaum Samuel <kierszbaums@apsys.eads.net>
Date: Wed, 9 Aug 2023 15:07:39 +0200
Subject: [PATCH] Minor correction to documentation (reformulations to clarify)

---
 docs/notebooks/Getting_started_1.ipynb | 138 ++++++++++++++++++++-----
 docs/notebooks/Getting_started_2.ipynb | 113 ++++++++++++--------
 2 files changed, 186 insertions(+), 65 deletions(-)

diff --git a/docs/notebooks/Getting_started_1.ipynb b/docs/notebooks/Getting_started_1.ipynb
index 45859867..52d5762b 100644
--- a/docs/notebooks/Getting_started_1.ipynb
+++ b/docs/notebooks/Getting_started_1.ipynb
@@ -17,7 +17,7 @@
     "\n",
     "In this first notebook, our objective is to show how to create 1-Lipschitz neural networks with `deel-lip`. \n",
     "\n",
-    "In the course of this notebook, we will cover the following: \n",
+    "In particular, we will cover the following: \n",
     "1. [📚 Theoretical background](#theoretical_background)    \n",
     "A brief theoretical background on Lipschitz continuous functions. This section can be safely skipped if one is not interested in the theory.\n",
     "2. [🧱 Creating a 1-Lipschitz neural network with `deel-lip` and `keras`](#deel_keras)       \n",
@@ -65,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "id": "1e3f0694-8547-4d06-b2aa-bfc0d008ff8b",
    "metadata": {
     "scrolled": true
@@ -126,10 +126,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "d6f7099e-a425-452d-9ed5-5328f0258a71",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"model\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " input_2 (InputLayer)        [(None, 28, 28, 1)]       0         \n",
+      "                                                                 \n",
+      " dense_3 (Dense)             (None, 28, 28, 64)        128       \n",
+      "                                                                 \n",
+      " dense_4 (Dense)             (None, 28, 28, 32)        2080      \n",
+      "                                                                 \n",
+      " dense_5 (Dense)             (None, 28, 28, 10)        330       \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 2538 (9.91 KB)\n",
+      "Trainable params: 2538 (9.91 KB)\n",
+      "Non-trainable params: 0 (0.00 Byte)\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
    "source": [
     "inputs = keras.layers.Input(input_shape)\n",
     "x = layers.Dense(64, activation='relu')(inputs)\n",
@@ -151,7 +175,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "a62d3a3f-c0e8-4a3e-9025-758afebf99bd",
    "metadata": {},
    "outputs": [],
@@ -164,10 +188,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "c26f52a6-c2ec-49b5-a99f-3353dc3f3044",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"sequential_1\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " spectral_dense (SpectralDe  (None, 28, 28, 64)        257       \n",
+      " nse)                                                            \n",
+      "                                                                 \n",
+      " spectral_dense_1 (Spectral  (None, 28, 28, 32)        4161      \n",
+      " Dense)                                                          \n",
+      "                                                                 \n",
+      " spectral_dense_2 (Spectral  (None, 28, 28, 10)        661       \n",
+      " Dense)                                                          \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 5079 (19.84 KB)\n",
+      "Trainable params: 2538 (9.91 KB)\n",
+      "Non-trainable params: 2541 (9.93 KB)\n",
+      "_________________________________________________________________\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\kierszbaums\\anaconda.related\\envs\\1_lipschitz\\deel_lip\\lib\\site-packages\\keras\\src\\initializers\\initializers.py:120: UserWarning: The initializer Orthogonal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
     "K1_model = lip.model.Sequential([    \n",
     "        keras.layers.Input(shape=input_shape),\n",
@@ -197,10 +254,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "1a98e31f-40a9-46f5-a4e6-bd91229046e7",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"model_1\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " input_4 (InputLayer)        [(None, 28, 28, 1)]       0         \n",
+      "                                                                 \n",
+      " spectral_dense_3 (Spectral  (None, 28, 28, 64)        257       \n",
+      " Dense)                                                          \n",
+      "                                                                 \n",
+      " spectral_dense_4 (Spectral  (None, 28, 28, 32)        4161      \n",
+      " Dense)                                                          \n",
+      "                                                                 \n",
+      " spectral_dense_5 (Spectral  (None, 28, 28, 10)        661       \n",
+      " Dense)                                                          \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 5079 (19.84 KB)\n",
+      "Trainable params: 2538 (9.91 KB)\n",
+      "Non-trainable params: 2541 (9.93 KB)\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
    "source": [
     "inputs = keras.layers.Input(input_shape)\n",
     "x = lip.layers.SpectralDense(64, activation=activation(),k_coef_lip=1.)(inputs)\n",
@@ -221,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "2ae5d775-fb90-4017-919d-bd34c08865cf",
    "metadata": {},
    "outputs": [],
@@ -235,7 +319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "6ff2460d-b1f3-43d6-964d-5919b24009e9",
    "metadata": {},
    "outputs": [],
@@ -248,15 +332,24 @@
    "id": "de38b069-1705-408c-8dd3-99ed17cf519f",
    "metadata": {},
    "source": [
-    "- specify the Lipschitz constant of the whole model through the `k_coef_lip` attribute of the `Sequential` object, e.g.:"
+    "- by specifying the Lipschitz constant of the whole model through the `k_coef_lip` attribute of a `Sequential` object, e.g.:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "fb882d3e-163f-4aa8-902f-83922ac8da89",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (1507601557.py, line 3)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;36m  Cell \u001b[1;32mIn[8], line 3\u001b[1;36m\u001b[0m\n\u001b[1;33m    ],\u001b[0m\n\u001b[1;37m    ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
    "source": [
     "K1_model = lip.model.Sequential([    \n",
     "        ....\n",
@@ -273,7 +366,7 @@
    "source": [
     "💡\n",
     "Keep in mind that all the classes above inherit from their respective `keras` equivalent (e.g. `Dense` for `SpectralDense`).   <br>\n",
-    "As a result, these objects conveniently use the same interface and the same parameters as their keras equivalent, with the additional parameter `k_coef_lip` that controls the Lipschitz constant of the layers.\n",
+    "As a result, these objects conveniently use the same interface and the same parameters as their keras equivalent, with the additional parameter `k_coef_lip` that controls the Lipschitz constant.\n",
     "\n",
     "## 🔨 Design rules for 1-Lipschitz neural networks with `deel-lip`  <a id='design'></a> <a name='design'></a>\n",
     "**Layer selection: `deel-lip` vs `keras`**  \n",
@@ -284,7 +377,7 @@
     "\n",
     "This is the case for the following keras layers: `MaxPooling`, `GlobalMaxPooling`, `Flatten` and `Input`.\n",
     "\n",
-    "Here is the full list of `keras` layers for which there is a Lipschitz equivalent supported by `deel-lip`, that must be used in order to enforce Lipschitz continuity of the model that incorporates the layer:\n",
+    "Below is the full list of `keras` layers for which `deel-lip` provides a Lipschitz equivalent. If one wants to ensure a model's Lipschitz continuity, the alternative `deel-lip` layers must be employed instead of the original `keras` counterparts.\n",
     "\n",
     "| tensorflow.keras.layers | deel.lip.layers |\n",
     "| --------------- | --------------- |\n",
@@ -294,17 +387,14 @@
     "\n",
     "<br/>\n",
     "\n",
-    "💡 Although there are additional layers available, the ones mentioned above are perfectly suitable and recommended for practical use. Interested readers can find information about the other layers [here](#documentation)\n",
+    "💡 Although there are additional Lipschitz continuous layers available in `deel-lip`, the ones mentioned above are perfectly suitable and recommended for practical use. Interested readers can find information about the other layers [here](#documentation).\n",
     "\n",
     "<br>  \n",
     "\n",
     "\n",
-    "<div style=\"border: 1px solid #f2c94c; background-color: #fff3cd; padding: 10px; border-radius: 5px; margin-bottom: 10px;\">\n",
-    "<b>🚨 Warning:</b> <br>   \n",
-    "When creating a 1-Lipschitz neural network, one should avoid using the following layers:<br> \n",
-    "    - `Dropout`: Our recommendation is not to use it, as we have not well understood its behaviour yet during learning<br>   \n",
-    "    - `BatchNormalization`: It is not 1-Lipschitz<br></div>\n",
-    "<br/>"
+    "🚨 **Note:** *When creating a 1-Lipschitz neural network, one should avoid using the following layers:*<br> \n",
+    "- `Dropout`: Our current recommendation is to avoid using it, as we have not yet fully understood how it affects learning of 1-Lipschitz neural networks\n",
+    "- `BatchNormalization`: It is not 1-Lipschitz"
    ]
   },
   {
@@ -321,7 +411,7 @@
     "- using the `GroupSort2` activation function stored in the `activations` submodule of `deel-lip` for the intermediate layers of a 1-Lipschitz neural network.\n",
     "- not using any activation function for the last layer of 1-Lipschitz neural networks.\n",
     "\n",
-    "💡 Interested readers can find information relevant to other activation functions that exist within `deel-lip` [here](https://deel-ai.github.io/deel-lip/api/layers/).\n",
+    "💡 Interested readers can find information relevant to other 1-Lipschitz activation functions that exist within `deel-lip` [here](https://deel-ai.github.io/deel-lip/api/layers/).\n",
     "\n",
     "\n",
     "**Loss function selection:**\n",
diff --git a/docs/notebooks/Getting_started_2.ipynb b/docs/notebooks/Getting_started_2.ipynb
index 49bd79a9..85580706 100644
--- a/docs/notebooks/Getting_started_2.ipynb
+++ b/docs/notebooks/Getting_started_2.ipynb
@@ -16,10 +16,10 @@
    "source": [
     "The goal of this series of tutorials is to show the different usages of `deel-lip`.\n",
     "\n",
-    "In this first notebook, we have shown how to create 1-Lipschitz neural networks with `deel-lip`.    \n",
+    "In the first notebook, we have shown how to create 1-Lipschitz neural networks with `deel-lip`.    \n",
     "In this second notebook, we will show how to train adversarially robust 1-Lipschitz neural networks with `deel-lip`.    \n",
     "\n",
-    "In the course of this notebook, we will cover the following: \n",
+    "In particular, we will cover the following: \n",
     "1. [📚 Theoretical background](#theoretical_background)    \n",
     "A brief theoretical background on adversarial robustness. This section can be safely skipped if one is not interested in the theory.\n",
     "2. [💪 Training adversarially robust 1-Lipschitz neural networks on the MNIST dataset](#deel_keras)       \n",
@@ -80,11 +80,10 @@
     "\n",
     "Practically, training an adversarial robust predictive model consists in obtaining a formal guarantee that it remains accurate, even when exposed against adversarial attacks that are defined within certain specified constraints.\n",
     "\n",
-    "A major advantage of 1-Lipschitz neural networks is that they can offer provable guarantees on their robustness for any particular input $x$.\n",
+    "A major advantage of 1-Lipschitz neural networks is that they can offer provable guarantees on their robustness for any particular input $x$, by providing a *certificate* $\\epsilon_x$.   \n",
+    "Such a guarantee can be understood by using the following terminology:\n",
     "\n",
-    "One can formulate such a guarantee, in terms such as follow:\n",
-    "\n",
-    "> \"For an input $x$, we can certify that are no adversarial perturbations constrained to be under the certificate $\\epsilon_x$ that will change our model's prediction.\"\n",
+    "> \"For an input $x$, we can certify that there are no adversarial perturbations constrained to be under the certificate $\\epsilon_x$ that will change our model's prediction.\"\n",
     "\n",
     "In simple mathematical terms:  \n",
     "\n",
@@ -94,15 +93,25 @@
     "then:\n",
     "$$y_{\\epsilon}=y.$$\n",
     "\n",
-    "💡 The *certificate* $\\epsilon_x$ attached to an input $x$ can be deduced from the logits of the 1-Lipschitz neural network used. This will be the topic of another notebook. In this notebook, robustness will be approximated by the `MulticlassKR` loss, as shown later.\n",
-    "\n",
-    "💡 Depending on the type of norm you choose (e.g. L1 or L2), the guarantee you can offer will differ, as $||\\epsilon||_{L2}<\\epsilon_x$ and $||\\epsilon||_{L1}<\\epsilon_x$ are not equivalent.\n",
+    "💡 We will not generate certificates in this notebook. However, we feel that understanding certificates offers an effective avenue for acquiring insights into the nature of adversarial robustness in the context of 1-Lipschitz neural networks. This is why we present it here.    \n",
     "\n",
-    "As such, other examples of guarantees with a more precise formulation would be:\n",
+    "💡 Depending on the type of norm you choose (e.g. L1 or L2), the guarantee you can offer will differ, as $||\\epsilon||_{L2}<\\epsilon_x$ and $||\\epsilon||_{L1}<\\epsilon_x$ are not equivalent.  \n",
+    "As such, two additional examples of guarantees with a more precise formulation would be:\n",
     "> \"For an input $x$, we can certify that are no adversarial perturbations constrained to be within a $\\text{L2}$-norm ball of certificate $\\epsilon_{x,\\text{L2}}$ that will change our model's prediction.\"\n",
     "\n",
+    "For a given $x$, $\\forall \\epsilon$ such that $||\\epsilon||_{L2}<\\epsilon_{x,\\text{L2}}$, we obtain that:\n",
+    "$$A(x)=y,$$\n",
+    "$$A(x+\\epsilon)=y_{\\epsilon},$$\n",
+    "then:\n",
+    "$$y_{\\epsilon}=y.$$\n",
+    "\n",
     "> \"For an input $x$, we can certify that are no adversarial perturbations constrained to be within a $\\text{L1}$-norm ball of certificate $\\epsilon_{x,\\text{L1}}$ that will change our model's prediction.\"\n",
     "\n",
+    "For a given $x$, $\\forall \\epsilon$ such that $||\\epsilon||_{L1}<\\epsilon_{x,\\text{L1}}$, we obtain that:\n",
+    "$$A(x)=y,$$\n",
+    "$$A(x+\\epsilon)=y_{\\epsilon},$$\n",
+    "then:\n",
+    "$$y_{\\epsilon}=y.$$\n",
     "\n",
     "\n",
     "## 💪 Training adversarially robust 1-Lipschitz neural networks on the MNIST dataset <a id='deel_keras'></a> <a name='deel_keras'></a>\n",
@@ -156,11 +165,11 @@
    "id": "df14bc65-3ebb-4f3c-925b-9a21cefa7e23",
    "metadata": {},
    "source": [
-    "We show two cases. In the first case, we use `deel-lip`'s `TauCategoricalCrossentropy` from the `losses` submodule. In the second case, we use this other loss function from `deel-lip`: `MulticlassHKR`.\n",
+    "We show two cases. In the first case, we use `deel-lip`'s `TauCategoricalCrossentropy` from the `losses` submodule. In the second case, we use another loss function from `deel-lip`: `MulticlassHKR`.\n",
     "\n",
     "In particular, we will show how these functions can be parametrized to increase the robustness of our predictive models. We will also see that generally, there is a compromise between the robustness and the accuracy of our models (i.e. better robustness generally comes at the price of a decrease in performance).\n",
     "\n",
-    "Because we will need to instanciate four times the same model in our examples, we encapsulate the code to create our model in a function for conciseness:"
+    "Since we will be instantiating the same model four times within our examples, we encapsulate the code for creating the model within a function to enhance conciseness:"
    ]
   },
   {
@@ -241,14 +250,14 @@
    "id": "c8aa1c60-0271-4b76-a0b7-7d2bf89f4550",
    "metadata": {},
    "source": [
-    "Similar to the classes we have seen in \"Getting started 1\", the `TauCategoricalCrossentropy` class is similar to its equivalent in `keras`, but it comes with a settable temperature parameter `tau`. This parameter will allow to adjust the robustness of our model. The lower the temperature is, the more robust our model becomes, but it also becomes less accurate.\n",
+    "Similar to the classes we have seen in \"Getting started 1\", the `TauCategoricalCrossentropy` class inherits from its equivalent in `keras`, but it comes with an additional settable parameter named 'temperature'  and denoted as: `tau`. This parameter will allow to adjust the robustness of our model. The lower the temperature is, the more robust our model becomes, but it also becomes less accurate.\n",
     "\n",
-    "To show the impact of the `tau` on both the performance and robustness of our model, we will train two models on the MNIST dataset. The first model will have a temperature of 100, the second model will have a temperature of 3."
+    "To show the impact of the parameter `tau` on both the performance and robustness of our model, we will train two models on the MNIST dataset. The first model will have a temperature of 100, the second model will have a temperature of 3."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "fc8a3115-ccb4-4ce3-86a2-b3c0a22866b3",
    "metadata": {},
    "outputs": [],
@@ -268,7 +277,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "id": "795bf0ad-5495-4b55-aa24-baa6205415b5",
    "metadata": {},
    "outputs": [],
@@ -290,7 +299,7 @@
    "id": "13859a2a-cd4a-4cc5-bc90-f61a07cd22ad",
    "metadata": {},
    "source": [
-    "💡 Notice that we use the accuracy metric to measure the performance, and we use the `MulticlassKR` loss to measure adversarial robustness. The latter acts as a proxy of our model's average certificates, **: the higher this measure is, the more robust our model is**.   \n",
+    "💡 Notice that we use the accuracy metric to measure the performance, and we use the `MulticlassKR` loss to measure adversarial robustness. The latter acts as a proxy of our model's average certificates: **the higher this measure is, the more robust our model is**.   \n",
     "\n",
     "**🚨 Note:** *This is true only for 1-Lipschitz neural networks*"
    ]
@@ -305,10 +314,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "bba315c1-c4df-47aa-ae13-202831555355",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/2\n",
+      "235/235 [==============================] - 22s 87ms/step - loss: 0.0097 - accuracy: 0.7976 - MulticlassKR: 0.1047 - val_loss: 0.0029 - val_accuracy: 0.9181 - val_MulticlassKR: 0.1252\n",
+      "Epoch 2/2\n",
+      "235/235 [==============================] - 20s 87ms/step - loss: 0.0023 - accuracy: 0.9318 - MulticlassKR: 0.1247 - val_loss: 0.0017 - val_accuracy: 0.9504 - val_MulticlassKR: 0.1319\n"
+     ]
+    }
+   ],
    "source": [
     "# fit the high-temperature model\n",
     "result_1=model_1.fit(\n",
@@ -324,10 +344,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "id": "914ac450-e529-406e-8233-79facc4c1190",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/2\n",
+      "235/235 [==============================] - 23s 91ms/step - loss: 0.3521 - accuracy: 0.7762 - MulticlassKR: 0.7967 - val_loss: 0.1592 - val_accuracy: 0.8998 - val_MulticlassKR: 1.4270\n",
+      "Epoch 2/2\n",
+      "235/235 [==============================] - 22s 93ms/step - loss: 0.1342 - accuracy: 0.9106 - MulticlassKR: 1.6038 - val_loss: 0.1077 - val_accuracy: 0.9305 - val_MulticlassKR: 1.7749\n"
+     ]
+    }
+   ],
    "source": [
     "# fit the low-temperature model\n",
     "result_2=model_2.fit(\n",
@@ -343,7 +374,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 8,
    "id": "af5c9695-023b-4fd4-a1f8-d630f3298f43",
    "metadata": {},
    "outputs": [
@@ -351,8 +382,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model accuracy: 0.9514\n",
-      "Model MulticlassKR: 0.1277\n",
+      "Model accuracy: 0.9504\n",
+      "Model MulticlassKR: 0.1319\n",
       "Loss' temperature: 100.0\n"
      ]
     }
@@ -366,7 +397,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 9,
    "id": "f5ed957c-4308-4f75-9a41-dcbc88dcc341",
    "metadata": {},
    "outputs": [
@@ -374,8 +405,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model accuracy: 0.9276\n",
-      "Model MulticlassKR: 1.7535\n",
+      "Model accuracy: 0.9305\n",
+      "Model MulticlassKR: 1.7749\n",
       "Loss' temperature: 3.0\n"
      ]
     }
@@ -417,7 +448,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "id": "a964983d-4347-4f57-8489-98e6a05cfd0c",
    "metadata": {},
    "outputs": [],
@@ -437,7 +468,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "id": "5b57e8ad-19fc-4791-8e28-d0e613cbddc5",
    "metadata": {},
    "outputs": [],
@@ -465,7 +496,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
    "id": "b976968d-4494-4935-8338-06ebd99b6c78",
    "metadata": {},
    "outputs": [
@@ -474,9 +505,9 @@
      "output_type": "stream",
      "text": [
       "Epoch 1/2\n",
-      "235/235 [==============================] - 23s 91ms/step - loss: 0.8637 - accuracy: 0.8007 - MulticlassKR: 0.2155 - val_loss: 0.1633 - val_accuracy: 0.9228 - val_MulticlassKR: 0.3141\n",
+      "235/235 [==============================] - 23s 89ms/step - loss: 0.9145 - accuracy: 0.7922 - MulticlassKR: 0.2037 - val_loss: 0.1653 - val_accuracy: 0.9289 - val_MulticlassKR: 0.2987\n",
       "Epoch 2/2\n",
-      "235/235 [==============================] - 21s 90ms/step - loss: 0.0387 - accuracy: 0.9298 - MulticlassKR: 0.3854 - val_loss: -0.1238 - val_accuracy: 0.9444 - val_MulticlassKR: 0.4765\n"
+      "235/235 [==============================] - 20s 87ms/step - loss: 0.0325 - accuracy: 0.9351 - MulticlassKR: 0.3733 - val_loss: -0.1407 - val_accuracy: 0.9475 - val_MulticlassKR: 0.4661\n"
      ]
     }
    ],
@@ -495,7 +526,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 13,
    "id": "8f19da69-9869-463e-8fac-b7f179e00315",
    "metadata": {},
    "outputs": [
@@ -504,9 +535,9 @@
      "output_type": "stream",
      "text": [
       "Epoch 1/2\n",
-      "235/235 [==============================] - 23s 89ms/step - loss: 10.4944 - accuracy: 0.7172 - MulticlassKR: 0.8307 - val_loss: 3.8939 - val_accuracy: 0.8901 - val_MulticlassKR: 1.3772\n",
+      "235/235 [==============================] - 22s 87ms/step - loss: 10.9350 - accuracy: 0.6967 - MulticlassKR: 0.8041 - val_loss: 3.9802 - val_accuracy: 0.8884 - val_MulticlassKR: 1.3766\n",
       "Epoch 2/2\n",
-      "235/235 [==============================] - 21s 89ms/step - loss: 2.9442 - accuracy: 0.8982 - MulticlassKR: 1.5552 - val_loss: 1.9326 - val_accuracy: 0.9199 - val_MulticlassKR: 1.7427\n"
+      "235/235 [==============================] - 21s 87ms/step - loss: 2.9807 - accuracy: 0.9025 - MulticlassKR: 1.5747 - val_loss: 1.8917 - val_accuracy: 0.9229 - val_MulticlassKR: 1.7802\n"
      ]
     }
    ],
@@ -525,7 +556,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
    "id": "5bfe7641-f7af-4ac3-9ff4-fca3300448ff",
    "metadata": {},
    "outputs": [
@@ -533,8 +564,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model accuracy: 0.9444\n",
-      "Model MulticlassKR: 0.4765\n",
+      "Model accuracy: 0.9475\n",
+      "Model MulticlassKR: 0.4661\n",
       "Loss' minimum margin: 0.1\n",
       "Loss' alpha: 50.0\n"
      ]
@@ -550,7 +581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 15,
    "id": "7ce20cb8-797b-4e03-8600-d0030adfccc1",
    "metadata": {},
    "outputs": [
@@ -558,8 +589,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model accuracy: 0.9199\n",
-      "Model MulticlassKR: 1.7427\n",
+      "Model accuracy: 0.9229\n",
+      "Model MulticlassKR: 1.7802\n",
       "Loss' minimum margin: 1.0\n",
       "Loss' alpha: 30.0\n"
      ]