Merge branch 'rvankoert:master' into master

knaw-huc · Apr 10, 2024 · 718a115 · 718a115
2 parents 0b8402b + 5d369ed
commit 718a115
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -272,6 +272,8 @@ LOGHI_MAX_QUEUE_SIZE     # Default: "10000": Maximum size of the processing queu
 LOGHI_PATIENCE           # Default: "0.5": Maximum time to wait for new images before predicting current batch
 ```
 
+**Important Note:** The `LOGHI_MODEL_PATH` must include a `config.json` file that contains at least the `channels` key, along with its corresponding model value. This file is expected to be automatically generated during the training or fine-tuning process of a model. Older versions of Loghi-HTR (< 1.2.10) did not do this automatically, so please be aware that our `generic-2023-02-15` model lacks this file by default and is configured to use 1 channel.
+
 **GPU Options:**
 
 ```bash

diff --git a/src/data/manager.py b/src/data/manager.py
@@ -461,7 +461,7 @@ def _create_dataset(self,
         dataset = tf.data.Dataset.from_tensor_slices(data)
         if is_training:
             # Add additional repeat and shuffle for training
-            dataset = dataset.repeat().shuffle(len(files))
+            dataset = dataset.repeat().shuffle(len(data))
 
         dataset = (dataset
                    .map(data_loader.load_images,

diff --git a/src/model/losses.py b/src/model/losses.py
@@ -39,8 +39,10 @@ def ctc_batch_cost(y_true: tf.Tensor, y_pred: tf.Tensor,
     """
 
     # Squeeze the label and input length tensors to remove the last dimension
-    label_length = tf.cast(array_ops.squeeze(label_length), dtype="int32")
-    input_length = tf.cast(array_ops.squeeze(input_length), dtype="int32")
+    label_length = tf.cast(array_ops.squeeze(label_length, axis=-1),
+                           dtype="int32")
+    input_length = tf.cast(array_ops.squeeze(input_length, axis=-1),
+                           dtype="int32")
     sparse_labels = tf.cast(K.ctc_label_dense_to_sparse(y_true, label_length),
                             dtype="int32")