unicode-org · arulkolla · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 Data/
 __pycache__/
 .idea/
+.venv/
 README.html
 Embeddings-Discussion.html
 Models-Specifications.html
@@ -14,3 +15,4 @@ Models/Burmese_temp_genvec/
 Models/Burmese_model4_version2/
 Models/Other/
 *~
+**/.DS_Store
diff --git a/event_log.txt b/event_log.txt
diff --git a/lstm_word_segmentation/helpers.py b/lstm_word_segmentation/helpers.py
@@ -31,24 +31,11 @@ def sigmoid(inp):
         inp: the input which can be a scalar or a 1d numpy array
     """
     inp = np.asarray(inp)
-    scalar_input = False
-    if inp.ndim == 0:
-        inp = inp[None]
-        scalar_input = True
     # Checking for case when the input is an array/np.array of arrays. In this case only the first element of inp is
     # used. A common example is when A = np.array([np.array([1, 2, 3])]).
-    if type(inp[0]) == np.ndarray:
+    if inp.ndim == 2:
         inp = inp[0]
-    out = []
-    for x in inp:
-        if x < -20:
-            out.append(0)
-        else:
-            out.append(1.0/(1.0 + np.exp(-x)))
-    out = np.array(out)
-    if scalar_input:
-        return np.squeeze(out)
-    return out
+    return 1.0 / (1.0 + np.exp(-np.clip(inp, -709.78, 709.78)))
 
 
 def print_grapheme_clusters(thrsh, language, exclusive):

diff --git a/lstm_word_segmentation/word_segmenter.py b/lstm_word_segmentation/word_segmenter.py
@@ -701,4 +701,4 @@ def pick_lstm_model(model_name, embedding, train_data, eval_data):
                                    input_epochs=15, input_training_data=train_data, input_evaluation_data=eval_data,
                                    input_language=language, input_embedding_type=embedding)
     word_segmenter.set_model(model)
-    return word_segmenter
+    return word_segmenter
diff --git a/test/test_helpers.py b/test/test_helpers.py
@@ -67,7 +67,8 @@ def test_sigmoid(self):
             TestCase(0, np.array(0.5)),
             TestCase(np.array([0, -1000]), np.array([0.5, 0])),
             TestCase(np.array([[0, 1, 100, -1, -10]]), np.array([0.5, 0.73105858, 1, 0.26894142, 0.00004540])),
-            TestCase(np.array([np.array([0, 1, 100, -1, -10]), np.array([1, 2, 3])]), np.array([0.5, 0.73105858, 1, 0.26894142, 0.00004540])),
+            TestCase(np.array([np.array([0, 1, 100, -1, -10]), np.array([1, 2, 3, 4, 5])]), np.array([0.5, 0.73105858, 1, 0.26894142, 0.00004540])),
+            TestCase(np.array([np.array([1, 2, 3])]), [0.73105858, 0.88079708, 0.95257413]),
         ]
         for cas in cases:
             computed = sigmoid(inp=cas.input)

diff --git a/train_thai.py b/train_thai.py
@@ -33,4 +33,4 @@
                                                                 word_segmenter.hunits))
 # word_segmenter.save_model()
 word_segmenter.test_model_line_by_line(verbose=True, fast=True)
-# '''
+# '''