diff --git a/README.md b/README.md index 84c395c..7a1836e 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,8 @@ LOGHI_MAX_QUEUE_SIZE # Default: "10000": Maximum size of the processing queu LOGHI_PATIENCE # Default: "0.5": Maximum time to wait for new images before predicting current batch ``` +**Important Note:** The `LOGHI_MODEL_PATH` must include a `config.json` file that contains at least the `channels` key, along with its corresponding model value. This file is expected to be automatically generated during the training or fine-tuning process of a model. Older versions of Loghi-HTR (< 1.2.10) did not do this automatically, so please be aware that our `generic-2023-02-15` model lacks this file by default and is configured to use 1 channel. + **GPU Options:** ```bash diff --git a/src/data/manager.py b/src/data/manager.py index cec403a..9fceca6 100644 --- a/src/data/manager.py +++ b/src/data/manager.py @@ -461,7 +461,7 @@ def _create_dataset(self, dataset = tf.data.Dataset.from_tensor_slices(data) if is_training: # Add additional repeat and shuffle for training - dataset = dataset.repeat().shuffle(len(files)) + dataset = dataset.repeat().shuffle(len(data)) dataset = (dataset .map(data_loader.load_images, diff --git a/src/model/losses.py b/src/model/losses.py index 3c12b92..8db8015 100644 --- a/src/model/losses.py +++ b/src/model/losses.py @@ -39,8 +39,10 @@ def ctc_batch_cost(y_true: tf.Tensor, y_pred: tf.Tensor, """ # Squeeze the label and input length tensors to remove the last dimension - label_length = tf.cast(array_ops.squeeze(label_length), dtype="int32") - input_length = tf.cast(array_ops.squeeze(input_length), dtype="int32") + label_length = tf.cast(array_ops.squeeze(label_length, axis=-1), + dtype="int32") + input_length = tf.cast(array_ops.squeeze(input_length, axis=-1), + dtype="int32") sparse_labels = tf.cast(K.ctc_label_dense_to_sparse(y_true, label_length), dtype="int32")