From f0287092a53c4a27f0e6fe3940768bd25a8835fc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 11:57:47 -0600
Subject: [PATCH 001/416] Add naive implementation of stump_topk

---
 tests/naive.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/tests/naive.py b/tests/naive.py
index 4089e603e..8f3a05da8 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1716,3 +1716,74 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
         )
 
     return total_ndists
+
+
+def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
+    """
+    Traverse distance matrix along the diagonals and update the top-k
+    nearest neigbors matrix profile and matrix profile indices
+    """
+    if T_B is None:  # self-join:
+        ignore_trivial = True
+        distance_matrix = np.array(
+            [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
+        )
+        T_B = T_A.copy()
+    else:
+        ignore_trivial = False
+        distance_matrix = np.array(
+            [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)]
+        )
+
+    distance_matrix[np.isnan(distance_matrix)] = np.inf
+
+    n_A = T_A.shape[0]
+    n_B = T_B.shape[0]
+    l = n_A - m + 1
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
+    if ignore_trivial:
+        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
+    else:
+        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
+
+    # the last two columns in P and I are to keep track of right and left mp for 1NN
+    P = np.full((l, k + 2), np.inf)
+    I = np.full((l, k + 2), -1, dtype=np.int64)
+
+    for g in diags:
+        if g >= 0:
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
+        else:
+            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - g))
+
+        for i in iter_range:
+            D = distance_matrix[i, i + g]
+            if D < P[i, k - 1]:
+                idx = np.searchsorted(P[i, :k], D, side='right')
+                P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
+                I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
+
+            if ignore_trivial:  # Self-joins only
+                if D < P[i + g, k - 1]:
+                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
+                    I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
+
+                if i < i + g:
+                    # Left matrix profile and left matrix profile index
+                    if D < P[i + g, k]:
+                        P[i + g, k] = D
+                        I[i + g, k] = i
+
+                    if D < P[i, k + 1]:
+                        # right matrix profile and right matrix profile index
+                        P[i, k + 1] = D
+                        I[i, k + 1] = i + g
+
+    result = np.empty((l, 2 * k + 2), dtype=object)
+    result[:, :k] = P[:, :k]
+    result[:, k:] = I[:, :]
+
+    return result

From e893873fc763a944b3d7e414d23e116762ee6693 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 12:10:32 -0600
Subject: [PATCH 002/416] Copy test_stump code to test_stump_topk

---
 tests/test_stump_topk.py | 242 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 tests/test_stump_topk.py

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
new file mode 100644
index 000000000..d3475122f
--- /dev/null
+++ b/tests/test_stump_topk.py
@@ -0,0 +1,242 @@
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+from stumpy import stump, config
+import pytest
+import naive
+
+
+test_data = [
+    (
+        np.array([9, 8100, -60, 7], dtype=np.float64),
+        np.array([584, -11, 23, 79, 1001, 0, -19], dtype=np.float64),
+    ),
+    (
+        np.random.uniform(-1000, 1000, [8]).astype(np.float64),
+        np.random.uniform(-1000, 1000, [64]).astype(np.float64),
+    ),
+]
+
+substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])]
+substitution_values = [np.nan, np.inf]
+
+
+def test_stump_int_input():
+    with pytest.raises(TypeError):
+        stump(np.arange(10), 5)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_self_join(T_A, T_B):
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_A_B_join(T_A, T_B):
+    m = 3
+    ref_mp = naive.stump(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+def test_stump_constant_subsequence_self_join():
+    T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_A, m, exclusion_zone=zone)
+    comp_mp = stump(T_A, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+
+def test_stump_one_constant_subsequence_A_B_join():
+    T_A = np.random.rand(20)
+    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    m = 3
+    ref_mp = naive.stamp(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    # Swap inputs
+    ref_mp = naive.stamp(T_B, m, T_B=T_A)
+    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+
+def test_stump_two_constant_subsequences_A_B_join():
+    T_A = np.concatenate(
+        (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64))
+    )
+    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    m = 3
+    ref_mp = naive.stamp(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    # Swap inputs
+    ref_mp = naive.stamp(T_B, m, T_B=T_A)
+    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+
+def test_stump_identical_subsequence_self_join():
+    identical = np.random.rand(8)
+    T_A = np.random.rand(20)
+    T_A[1 : 1 + identical.shape[0]] = identical
+    T_A[11 : 11 + identical.shape[0]] = identical
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stamp(T_A, m, exclusion_zone=zone)
+    comp_mp = stump(T_A, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+
+def test_stump_identical_subsequence_A_B_join():
+    identical = np.random.rand(8)
+    T_A = np.random.rand(20)
+    T_B = np.random.rand(20)
+    T_A[1 : 1 + identical.shape[0]] = identical
+    T_B[11 : 11 + identical.shape[0]] = identical
+    m = 3
+    ref_mp = naive.stamp(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+    # Swap inputs
+    ref_mp = naive.stamp(T_B, m, T_B=T_A)
+    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("substitute_B", substitution_values)
+@pytest.mark.parametrize("substitution_locations", substitution_locations)
+def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations):
+    m = 3
+
+    T_B_sub = T_B.copy()
+
+    for substitution_location_B in substitution_locations:
+        T_B_sub[:] = T_B[:]
+        T_B_sub[substitution_location_B] = substitute_B
+
+        zone = int(np.ceil(m / 4))
+        ref_mp = naive.stamp(T_B_sub, m, exclusion_zone=zone)
+        comp_mp = stump(T_B_sub, m, ignore_trivial=True)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
+
+        comp_mp = stump(pd.Series(T_B_sub), m, ignore_trivial=True)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("substitute_A", substitution_values)
+@pytest.mark.parametrize("substitute_B", substitution_values)
+@pytest.mark.parametrize("substitution_locations", substitution_locations)
+def test_stump_nan_inf_A_B_join(
+    T_A, T_B, substitute_A, substitute_B, substitution_locations
+):
+    m = 3
+
+    T_A_sub = T_A.copy()
+    T_B_sub = T_B.copy()
+
+    for substitution_location_B in substitution_locations:
+        for substitution_location_A in substitution_locations:
+            T_A_sub[:] = T_A[:]
+            T_B_sub[:] = T_B[:]
+            T_A_sub[substitution_location_A] = substitute_A
+            T_B_sub[substitution_location_B] = substitute_B
+
+            ref_mp = naive.stamp(T_A_sub, m, T_B=T_B_sub)
+            comp_mp = stump(T_A_sub, m, T_B_sub, ignore_trivial=False)
+            naive.replace_inf(ref_mp)
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)
+
+            comp_mp = stump(
+                pd.Series(T_A_sub), m, pd.Series(T_B_sub), ignore_trivial=False
+            )
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+def test_stump_nan_zero_mean_self_join():
+    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
+    m = 3
+
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stamp(T, m, exclusion_zone=zone)
+    comp_mp = stump(T, m, ignore_trivial=True)
+
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From 986311f78dae7ca90db29a793d43fa23b0a3afe4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 12:15:46 -0600
Subject: [PATCH 003/416] change replace naive.stump with naive.stump_topk

---
 tests/test_stump_topk.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index d3475122f..290487460 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -28,9 +28,10 @@ def test_stump_int_input():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join(T_A, T_B):
+    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
+    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
     comp_mp = stump(T_B, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
@@ -43,8 +44,9 @@ def test_stump_self_join(T_A, T_B):
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_A_B_join(T_A, T_B):
+    k = 3
     m = 3
-    ref_mp = naive.stump(T_A, m, T_B=T_B)
+    ref_mp = naive.stump_topk(T_A, m, T_B=T_B, k=k)
     comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
@@ -57,9 +59,10 @@ def test_stump_A_B_join(T_A, T_B):
 
 def test_stump_constant_subsequence_self_join():
     T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_A, m, exclusion_zone=zone)
+    ref_mp = naive.stump_topk(T_A, m, exclusion_zone=zone, k=k)
     comp_mp = stump(T_A, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)

From 9d8aafc3b75a051dee64aa72112dc8a3050b13b9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:04:36 -0600
Subject: [PATCH 004/416] Add self-join tests for 1NN and KNN

---
 tests/test_stump_topk.py | 202 ++-------------------------------------
 1 file changed, 7 insertions(+), 195 deletions(-)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index 290487460..b3276b85b 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -27,8 +27,8 @@ def test_stump_int_input():
 
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join(T_A, T_B):
-    k = 3
+def test_stump_self_join_1NN(T_A, T_B):
+    k = 1
     m = 3
     zone = int(np.ceil(m / 4))
     ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
@@ -42,204 +42,16 @@ def test_stump_self_join(T_A, T_B):
     npt.assert_almost_equal(ref_mp, comp_mp)
 
 
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_A_B_join(T_A, T_B):
-    k = 3
-    m = 3
-    ref_mp = naive.stump_topk(T_A, m, T_B=T_B, k=k)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-def test_stump_constant_subsequence_self_join():
-    T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+def test_stump_self_join_KNN(T_A, T_B):
     k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_A, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_A, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-
-def test_stump_one_constant_subsequence_A_B_join():
-    T_A = np.random.rand(20)
-    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
-    m = 3
-    ref_mp = naive.stamp(T_A, m, T_B=T_B)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    # Swap inputs
-    ref_mp = naive.stamp(T_B, m, T_B=T_A)
-    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-
-def test_stump_two_constant_subsequences_A_B_join():
-    T_A = np.concatenate(
-        (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64))
-    )
-    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
-    m = 3
-    ref_mp = naive.stamp(T_A, m, T_B=T_B)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    # Swap inputs
-    ref_mp = naive.stamp(T_B, m, T_B=T_A)
-    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-
-def test_stump_identical_subsequence_self_join():
-    identical = np.random.rand(8)
-    T_A = np.random.rand(20)
-    T_A[1 : 1 + identical.shape[0]] = identical
-    T_A[11 : 11 + identical.shape[0]] = identical
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stamp(T_A, m, exclusion_zone=zone)
-    comp_mp = stump(T_A, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-
-def test_stump_identical_subsequence_A_B_join():
-    identical = np.random.rand(8)
-    T_A = np.random.rand(20)
-    T_B = np.random.rand(20)
-    T_A[1 : 1 + identical.shape[0]] = identical
-    T_B[11 : 11 + identical.shape[0]] = identical
-    m = 3
-    ref_mp = naive.stamp(T_A, m, T_B=T_B)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-    # Swap inputs
-    ref_mp = naive.stamp(T_B, m, T_B=T_A)
-    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-@pytest.mark.parametrize("substitute_B", substitution_values)
-@pytest.mark.parametrize("substitution_locations", substitution_locations)
-def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations):
-    m = 3
-
-    T_B_sub = T_B.copy()
-
-    for substitution_location_B in substitution_locations:
-        T_B_sub[:] = T_B[:]
-        T_B_sub[substitution_location_B] = substitute_B
-
-        zone = int(np.ceil(m / 4))
-        ref_mp = naive.stamp(T_B_sub, m, exclusion_zone=zone)
-        comp_mp = stump(T_B_sub, m, ignore_trivial=True)
-        naive.replace_inf(ref_mp)
-        naive.replace_inf(comp_mp)
-        npt.assert_almost_equal(ref_mp, comp_mp)
-
-        comp_mp = stump(pd.Series(T_B_sub), m, ignore_trivial=True)
-        naive.replace_inf(comp_mp)
-        npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-@pytest.mark.parametrize("substitute_A", substitution_values)
-@pytest.mark.parametrize("substitute_B", substitution_values)
-@pytest.mark.parametrize("substitution_locations", substitution_locations)
-def test_stump_nan_inf_A_B_join(
-    T_A, T_B, substitute_A, substitute_B, substitution_locations
-):
-    m = 3
-
-    T_A_sub = T_A.copy()
-    T_B_sub = T_B.copy()
-
-    for substitution_location_B in substitution_locations:
-        for substitution_location_A in substitution_locations:
-            T_A_sub[:] = T_A[:]
-            T_B_sub[:] = T_B[:]
-            T_A_sub[substitution_location_A] = substitute_A
-            T_B_sub[substitution_location_B] = substitute_B
-
-            ref_mp = naive.stamp(T_A_sub, m, T_B=T_B_sub)
-            comp_mp = stump(T_A_sub, m, T_B_sub, ignore_trivial=False)
-            naive.replace_inf(ref_mp)
-            naive.replace_inf(comp_mp)
-            npt.assert_almost_equal(ref_mp, comp_mp)
-
-            comp_mp = stump(
-                pd.Series(T_A_sub), m, pd.Series(T_B_sub), ignore_trivial=False
-            )
-            naive.replace_inf(comp_mp)
-            npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-def test_stump_nan_zero_mean_self_join():
-    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
-    m = 3
-
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stamp(T, m, exclusion_zone=zone)
-    comp_mp = stump(T, m, ignore_trivial=True)
+    npt.assert_almost_equal(ref_mp, comp_mp)
 
-    naive.replace_inf(ref_mp)
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)

From 121686b43187f053f23c09f07f2cf88f0ab1c238 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:09:15 -0600
Subject: [PATCH 005/416] remove variable k in 1NN test

---
 tests/test_stump_topk.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index b3276b85b..3f277a0ad 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -28,10 +28,9 @@ def test_stump_int_input():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_1NN(T_A, T_B):
-    k = 1
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
+    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=1)
     comp_mp = stump(T_B, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)

From 730bfbbee7e867b2373e5060503492bab533efd8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:11:49 -0600
Subject: [PATCH 006/416] Fixed passing input to test function

---
 tests/test_stump_topk.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index 3f277a0ad..4b722fd8f 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -41,6 +41,7 @@ def test_stump_self_join_1NN(T_A, T_B):
     npt.assert_almost_equal(ref_mp, comp_mp)
 
 
+@pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
     k = 3
     m = 3

From f78348f3fadaface820e558c909e19cb0803503c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:19:43 -0600
Subject: [PATCH 007/416] Fixed minor bug

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 8f3a05da8..6dd4bcb99 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1756,11 +1756,11 @@ def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
         if g >= 0:
             iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
         else:
-            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - g))
+            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
         for i in iter_range:
             D = distance_matrix[i, i + g]
-            if D < P[i, k - 1]:
+            if D < P[i, k - 1]: #less than k-th smallest value of T[i:i+m]
                 idx = np.searchsorted(P[i, :k], D, side='right')
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]

From e09b5f05d16c4506ded15df432fcd27b2fc822df Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:31:18 -0600
Subject: [PATCH 008/416] Correct format

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 6dd4bcb99..91a88cea7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1760,14 +1760,14 @@ def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
         for i in iter_range:
             D = distance_matrix[i, i + g]
-            if D < P[i, k - 1]: #less than k-th smallest value of T[i:i+m]
-                idx = np.searchsorted(P[i, :k], D, side='right')
+            if D < P[i, k - 1]:  # less than k-th smallest value of T[i:i+m]
+                idx = np.searchsorted(P[i, :k], D, side="right")
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
                 if D < P[i + g, k - 1]:
-                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    idx = np.searchsorted(P[i + g, :k], D, side="right")
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 95a8c081f745ea8781da5b4eaefceea936559471 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 01:15:01 -0600
Subject: [PATCH 009/416] Erase function stump_topk

---
 tests/naive.py | 71 --------------------------------------------------
 1 file changed, 71 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 91a88cea7..4089e603e 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1716,74 +1716,3 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
         )
 
     return total_ndists
-
-
-def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
-    """
-    Traverse distance matrix along the diagonals and update the top-k
-    nearest neigbors matrix profile and matrix profile indices
-    """
-    if T_B is None:  # self-join:
-        ignore_trivial = True
-        distance_matrix = np.array(
-            [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
-        )
-        T_B = T_A.copy()
-    else:
-        ignore_trivial = False
-        distance_matrix = np.array(
-            [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)]
-        )
-
-    distance_matrix[np.isnan(distance_matrix)] = np.inf
-
-    n_A = T_A.shape[0]
-    n_B = T_B.shape[0]
-    l = n_A - m + 1
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-
-    if ignore_trivial:
-        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
-    else:
-        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
-
-    # the last two columns in P and I are to keep track of right and left mp for 1NN
-    P = np.full((l, k + 2), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64)
-
-    for g in diags:
-        if g >= 0:
-            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
-        else:
-            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
-
-        for i in iter_range:
-            D = distance_matrix[i, i + g]
-            if D < P[i, k - 1]:  # less than k-th smallest value of T[i:i+m]
-                idx = np.searchsorted(P[i, :k], D, side="right")
-                P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
-                I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
-
-            if ignore_trivial:  # Self-joins only
-                if D < P[i + g, k - 1]:
-                    idx = np.searchsorted(P[i + g, :k], D, side="right")
-                    P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
-                    I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
-
-                if i < i + g:
-                    # Left matrix profile and left matrix profile index
-                    if D < P[i + g, k]:
-                        P[i + g, k] = D
-                        I[i + g, k] = i
-
-                    if D < P[i, k + 1]:
-                        # right matrix profile and right matrix profile index
-                        P[i, k + 1] = D
-                        I[i, k + 1] = i + g
-
-    result = np.empty((l, 2 * k + 2), dtype=object)
-    result[:, :k] = P[:, :k]
-    result[:, k:] = I[:, :]
-
-    return result

From d0701fedd3060dcb0b97a266ceaae4beacae52e8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 16:18:58 -0600
Subject: [PATCH 010/416] Revise naive.stump to return topk NN matrix profile

---
 tests/naive.py | 64 +++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 4089e603e..0c49c5746 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -156,7 +156,7 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
     return result
 
 
-def stump(T_A, m, T_B=None, exclusion_zone=None):
+def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
     Traverse distance matrix along the diagonals and update the matrix profile and
     matrix profile indices
@@ -181,45 +181,35 @@ def stump(T_A, m, T_B=None, exclusion_zone=None):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
+    is_included = np.ones_like(distance_matrix, dtype=bool)
     if ignore_trivial:
-        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
-    else:
-        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
+        for i in range(l):
+            apply_exclusion_zone(is_included[i], i, exclusion_zone, False)
 
-    P = np.full((l, 3), np.inf)
-    I = np.full((l, 3), -1, dtype=np.int64)
+    P = np.full((l, k), np.inf)
+    I = np.full((l, k + 2), -1, dtype=np.int64)
 
-    for k in diags:
-        if k >= 0:
-            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
-        else:
-            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
-
-        for i in iter_range:
-            D = distance_matrix[i, i + k]
-            if D < P[i, 0]:
-                P[i, 0] = D
-                I[i, 0] = i + k
-
-            if ignore_trivial:  # Self-joins only
-                if D < P[i + k, 0]:
-                    P[i + k, 0] = D
-                    I[i + k, 0] = i
-
-                if i < i + k:
-                    # Left matrix profile and left matrix profile index
-                    if D < P[i + k, 1]:
-                        P[i + k, 1] = D
-                        I[i + k, 1] = i
-
-                    if D < P[i, 2]:
-                        # right matrix profile and right matrix profile index
-                        P[i, 2] = D
-                        I[i, 2] = i + k
-
-    result = np.empty((l, 4), dtype=object)
-    result[:, 0] = P[:, 0]
-    result[:, 1:4] = I[:, :]
+    for i in range(l):
+        mask = is_included[i]
+        IDX = np.argsort(distance_matrix[i][mask])
+        nn_indices_sorted = np.flatnonzero(mask)[IDX]
+
+        topk_indices = nn_indices_sorted[:k]
+        P[i, :k] = distance_matrix[i][topk_indices]
+        I[i, :k] = topk_indices
+
+        if ignore_trivial:
+            left_indices = nn_indices_sorted[nn_indices_sorted < i]
+            if len(left_indices) > 0:
+                I[i, k] = left_indices[0]
+
+            right_indices = nn_indices_sorted[nn_indices_sorted > i]
+            if len(right_indices) > 0:
+                I[i, k + 1] = right_indices[0]
+
+    result = np.empty((l, 2 * k + 2), dtype=object)
+    result[:, :k] = P[:, :]
+    result[:, k:] = I[:, :]
 
     return result
 

From 54445994ac87bccecf2a4252044d7e5cd0434718 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 16:29:53 -0600
Subject: [PATCH 011/416] Added a few comments

---
 tests/naive.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 0c49c5746..f9c9226ef 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -185,9 +185,13 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     if ignore_trivial:
         for i in range(l):
             apply_exclusion_zone(is_included[i], i, exclusion_zone, False)
+            # replacing values of distanc matrix to np.inf in excluion zone
+            # can cause problem later if there is nan/np.inf in data. So,
+            # it is better to use mask.
 
     P = np.full((l, k), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64)
+    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns in I are
+    # to store left and right matrix profile indices.
 
     for i in range(l):
         mask = is_included[i]

From 9ebb08a4f274cd7c4e1f5a5f11c5c92cb5839721 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 17:17:06 -0600
Subject: [PATCH 012/416] Add one new test case for topk matrix profile

---
 tests/test_stump.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index d3475122f..67a6ec704 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -240,3 +240,19 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_self_join_KNN(T_A, T_B):
+    k = 2
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From d83e8e6355813c15dbfc111a1e853ce1879c3027 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 17:20:17 -0600
Subject: [PATCH 013/416] Removed unnecessary test file

---
 tests/test_stump_topk.py | 57 ----------------------------------------
 1 file changed, 57 deletions(-)
 delete mode 100644 tests/test_stump_topk.py

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
deleted file mode 100644
index 4b722fd8f..000000000
--- a/tests/test_stump_topk.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import numpy as np
-import numpy.testing as npt
-import pandas as pd
-from stumpy import stump, config
-import pytest
-import naive
-
-
-test_data = [
-    (
-        np.array([9, 8100, -60, 7], dtype=np.float64),
-        np.array([584, -11, 23, 79, 1001, 0, -19], dtype=np.float64),
-    ),
-    (
-        np.random.uniform(-1000, 1000, [8]).astype(np.float64),
-        np.random.uniform(-1000, 1000, [64]).astype(np.float64),
-    ),
-]
-
-substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])]
-substitution_values = [np.nan, np.inf]
-
-
-def test_stump_int_input():
-    with pytest.raises(TypeError):
-        stump(np.arange(10), 5)
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join_1NN(T_A, T_B):
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=1)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join_KNN(T_A, T_B):
-    k = 3
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)

From 9c8f019353991898bd8ad248053353af19e7c288 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 20:58:31 -0600
Subject: [PATCH 014/416] Set I to -1 if its corresponding P is not finite

---
 tests/naive.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index f9c9226ef..d3640b66c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -158,8 +158,8 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
-    Traverse distance matrix along the diagonals and update the matrix profile and
-    matrix profile indices
+    Traverse distance matrix in a row-wise manner and store topk nearest neighbor
+    matrix profile and matrix profile indices
     """
     if T_B is None:  # self-join:
         ignore_trivial = True
@@ -181,35 +181,36 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
-    is_included = np.ones_like(distance_matrix, dtype=bool)
     if ignore_trivial:
         for i in range(l):
-            apply_exclusion_zone(is_included[i], i, exclusion_zone, False)
-            # replacing values of distanc matrix to np.inf in excluion zone
-            # can cause problem later if there is nan/np.inf in data. So,
-            # it is better to use mask.
+            apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
 
     P = np.full((l, k), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns in I are
+    I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns in I are
     # to store left and right matrix profile indices.
 
     for i in range(l):
-        mask = is_included[i]
-        IDX = np.argsort(distance_matrix[i][mask])
-        nn_indices_sorted = np.flatnonzero(mask)[IDX]
-
-        topk_indices = nn_indices_sorted[:k]
+        indices = np.argsort(distance_matrix[i])
+        topk_indices = indices[:k]
         P[i, :k] = distance_matrix[i][topk_indices]
-        I[i, :k] = topk_indices
+        I[i, :k] = np.where(distance_matrix[i][topk_indices] != np.inf, topk_indices, -1)
 
         if ignore_trivial:
-            left_indices = nn_indices_sorted[nn_indices_sorted < i]
+            IL = -1
+            left_indices = indices[indices < i]
             if len(left_indices) > 0:
-                I[i, k] = left_indices[0]
+                IL = left_indices[0]
+            if distance_matrix[i][IL] == np.inf:
+                IL = -1
+            I[i, k] = IL
 
-            right_indices = nn_indices_sorted[nn_indices_sorted > i]
+            IR = -1
+            right_indices = indices[indices > i]
             if len(right_indices) > 0:
-                I[i, k + 1] = right_indices[0]
+                IR = right_indices[0]
+            if distance_matrix[i][IR] == np.inf:
+                IR = -1
+            I[i, k + 1] = IR
 
     result = np.empty((l, 2 * k + 2), dtype=object)
     result[:, :k] = P[:, :]

From 0ce959549502e8091d1d017da8c95df73ae45401 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:04:16 -0600
Subject: [PATCH 015/416] Removed new test function

---
 tests/test_stump.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index 67a6ec704..4d2bf312b 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -242,17 +242,17 @@ def test_stump_nan_zero_mean_self_join():
     npt.assert_almost_equal(ref_mp, comp_mp)
 
 
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join_KNN(T_A, T_B):
-    k = 2
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+#@pytest.mark.parametrize("T_A, T_B", test_data)
+#def test_stump_self_join_KNN(T_A, T_B):
+#    k = 2
+#    m = 3
+#    zone = int(np.ceil(m / 4))
+#    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+#    comp_mp = stump(T_B, m, ignore_trivial=True)
+#    naive.replace_inf(ref_mp)
+#    naive.replace_inf(comp_mp)
+#    npt.assert_almost_equal(ref_mp, comp_mp)
+
+#    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+#    naive.replace_inf(comp_mp)
+#    npt.assert_almost_equal(ref_mp, comp_mp)

From a9726984574deca4eb79c74b622581036604635c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:06:59 -0600
Subject: [PATCH 016/416] Fixed format

---
 tests/naive.py      |  4 +++-
 tests/test_stump.py | 16 ----------------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index d3640b66c..98f639a08 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -193,7 +193,9 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         indices = np.argsort(distance_matrix[i])
         topk_indices = indices[:k]
         P[i, :k] = distance_matrix[i][topk_indices]
-        I[i, :k] = np.where(distance_matrix[i][topk_indices] != np.inf, topk_indices, -1)
+        I[i, :k] = np.where(
+            distance_matrix[i][topk_indices] != np.inf, topk_indices, -1
+        )
 
         if ignore_trivial:
             IL = -1
diff --git a/tests/test_stump.py b/tests/test_stump.py
index 4d2bf312b..d3475122f 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -240,19 +240,3 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-#@pytest.mark.parametrize("T_A, T_B", test_data)
-#def test_stump_self_join_KNN(T_A, T_B):
-#    k = 2
-#    m = 3
-#    zone = int(np.ceil(m / 4))
-#    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-#    comp_mp = stump(T_B, m, ignore_trivial=True)
-#    naive.replace_inf(ref_mp)
-#    naive.replace_inf(comp_mp)
-#    npt.assert_almost_equal(ref_mp, comp_mp)
-
-#    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-#    naive.replace_inf(comp_mp)
-#    npt.assert_almost_equal(ref_mp, comp_mp)

From e2d3061e132316cad0e4bbb74d0ff8f5bf0e52ce Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:14:07 -0600
Subject: [PATCH 017/416] minor change

---
 tests/naive.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 98f639a08..429b2ac99 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -161,7 +161,10 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     Traverse distance matrix in a row-wise manner and store topk nearest neighbor
     matrix profile and matrix profile indices
     """
-    if T_B is None:  # self-join:
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
+    if T_B is None: # self-join:
         ignore_trivial = True
         distance_matrix = np.array(
             [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
@@ -175,12 +178,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
     distance_matrix[np.isnan(distance_matrix)] = np.inf
 
-    n_A = T_A.shape[0]
-    n_B = T_B.shape[0]
-    l = n_A - m + 1
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-
+    l = T_A.shape[0] - m + 1
     if ignore_trivial:
         for i in range(l):
             apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)

From 1938f63363dc873a7c00300c66c54742ec9b0010 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:16:46 -0600
Subject: [PATCH 018/416] minor change

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 429b2ac99..ff50eecf7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -164,7 +164,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
-    if T_B is None: # self-join:
+    if T_B is None:  # self-join:
         ignore_trivial = True
         distance_matrix = np.array(
             [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]

From 0e25a347ad7a3fa50d63144e32df771d9ad57545 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 22:32:24 -0600
Subject: [PATCH 019/416] Add new test function for topk matrix profile

---
 tests/test_stump.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index d3475122f..ea4bae3c9 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -240,3 +240,18 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_self_join_KNN(T_A, T_B):
+    k = 2
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From e3935851485cc4ecd9c097c915ab37c3946530fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 22:34:13 -0600
Subject: [PATCH 020/416] Fixed format

---
 tests/test_stump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index ea4bae3c9..67a6ec704 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -241,6 +241,7 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
 
+
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
     k = 2

From 850a5946c88465a4fa93fd91b113015752860ff2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 12:53:45 -0600
Subject: [PATCH 021/416] Use diagonal traversal to get top-k matrix profile -
 change naive.stump from row-wise to traversal - add a note to docstring to
 inform reader of row-wise traversal - use numpy.searchsort(side='right')

---
 tests/naive.py | 83 ++++++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 554c6f9fd..552c85cee 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -158,12 +158,11 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
-    Traverse distance matrix in a row-wise manner and store topk nearest neighbor
-    matrix profile and matrix profile indices
-    """
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+    Traverse distance matrix along the diagonals and update the top-k nearest
+    neighbor  matrix profile and matrix profile indices
 
+    NOTE: For row-wise traversal, please use function `stamp`
+    """
     if T_B is None:  # self-join:
         ignore_trivial = True
         distance_matrix = np.array(
@@ -178,42 +177,54 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
     distance_matrix[np.isnan(distance_matrix)] = np.inf
 
-    l = T_A.shape[0] - m + 1
+    n_A = T_A.shape[0]
+    n_B = T_B.shape[0]
+    l = n_A - m + 1
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
     if ignore_trivial:
-        for i in range(l):
-            apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
+        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
+    else:
+        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
 
-    P = np.full((l, k), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns in I are
-    # to store left and right matrix profile indices.
+    P = np.full((l, k + 2), np.inf)
+    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns are to store
+    # ... left and right top-1 matrix profile indices.
 
-    for i in range(l):
-        indices = np.argsort(distance_matrix[i])
-        topk_indices = indices[:k]
-        P[i, :k] = distance_matrix[i][topk_indices]
-        I[i, :k] = np.where(
-            distance_matrix[i][topk_indices] != np.inf, topk_indices, -1
-        )
+    for g in diags:
+        if g >= 0:
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
+        else:
+            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
-        if ignore_trivial:
-            IL = -1
-            left_indices = indices[indices < i]
-            if len(left_indices) > 0:
-                IL = left_indices[0]
-            if distance_matrix[i][IL] == np.inf:
-                IL = -1
-            I[i, k] = IL
+        for i in iter_range:
+            D = distance_matrix[i, i + g]
+            if D < P[i, k-1]:
+                idx = np.searchsorted(P[i, :k], D, side='right')
+                # to keep the top-k, we need to the get rid of the last element.
+                P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
+                I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
-            IR = -1
-            right_indices = indices[indices > i]
-            if len(right_indices) > 0:
-                IR = right_indices[0]
-            if distance_matrix[i][IR] == np.inf:
-                IR = -1
-            I[i, k + 1] = IR
-
-    result = np.empty((l, 2 * k + 2), dtype=object)
-    result[:, :k] = P[:, :]
+            if ignore_trivial:  # Self-joins only
+                if D < P[i + g, k-1]:
+                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
+                    I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
+
+                if i < i + g:
+                    # Left matrix profile and left matrix profile index
+                    if D < P[i + g, k]:
+                        P[i + g, k] = D
+                        I[i + g, k] = i
+
+                    if D < P[i, k + 1]:
+                        # right matrix profile and right matrix profile index
+                        P[i, k + 1] = D
+                        I[i, k + 1] = i + g
+
+    result = np.empty((2 * k + 2, 4), dtype=object)
+    result[:, :k] = P[:, :k]
     result[:, k:] = I[:, :]
 
     return result

From 278e76ca5e74c53276b1e20cc6d4ab3efd8bc078 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:00:21 -0600
Subject: [PATCH 022/416] Fixed shape of naive.stump output

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 552c85cee..871d52024 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -223,7 +223,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
                         P[i, k + 1] = D
                         I[i, k + 1] = i + g
 
-    result = np.empty((2 * k + 2, 4), dtype=object)
+    result = np.empty((l, 2 * k + 2), dtype=object)
     result[:, :k] = P[:, :k]
     result[:, k:] = I[:, :]
 

From a864662b41f8553df6fcc1f1b9b3b341beb5cc31 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:29:52 -0600
Subject: [PATCH 023/416] Add naive version of numpy.searchsorted

---
 tests/naive.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/naive.py b/tests/naive.py
index 871d52024..010836639 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -156,6 +156,14 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
     return result
 
 
+def searchsorted(a, v):
+    indices = np.flatnonzero(v < a)
+    if len(indices):
+        return indices.min()
+    else:
+        return len(a)
+
+
 def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
     Traverse distance matrix along the diagonals and update the top-k nearest

From f0c022da2fb61b1c9840d59e3a2034222dae65c4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:30:41 -0600
Subject: [PATCH 024/416] Replace numpy.searchsorted with its naive version

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 010836639..24ca851c7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -209,14 +209,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         for i in iter_range:
             D = distance_matrix[i, i + g]
             if D < P[i, k-1]:
-                idx = np.searchsorted(P[i, :k], D, side='right')
+                idx = searchsorted(P[i, :k], D, side='right')
                 # to keep the top-k, we need to the get rid of the last element.
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
                 if D < P[i + g, k-1]:
-                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    idx = searchsorted(P[i + g, :k], D, side='right')
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 81701ba3620abb480b3852909ffe6fd0b46874ec Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:33:35 -0600
Subject: [PATCH 025/416] Fixed calling function searchsorted

---
 tests/naive.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 24ca851c7..a282d49c0 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -157,6 +157,9 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
 
 
 def searchsorted(a, v):
+    """
+    naive version of numpy.searchsorted(..., side='right')
+    """
     indices = np.flatnonzero(v < a)
     if len(indices):
         return indices.min()
@@ -209,14 +212,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         for i in iter_range:
             D = distance_matrix[i, i + g]
             if D < P[i, k-1]:
-                idx = searchsorted(P[i, :k], D, side='right')
+                idx = searchsorted(P[i, :k], D)
                 # to keep the top-k, we need to the get rid of the last element.
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
                 if D < P[i + g, k-1]:
-                    idx = searchsorted(P[i + g, :k], D, side='right')
+                    idx = searchsorted(P[i + g, :k], D)
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From e244341a9291119a6f3f48ca07f9b7a11203c545 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:36:51 -0600
Subject: [PATCH 026/416] Fixed format

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index a282d49c0..0f70ae7b4 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -200,7 +200,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
 
     P = np.full((l, k + 2), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns are to store
+    I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns are to store
     # ... left and right top-1 matrix profile indices.
 
     for g in diags:
@@ -211,14 +211,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
         for i in iter_range:
             D = distance_matrix[i, i + g]
-            if D < P[i, k-1]:
+            if D < P[i, k - 1]:
                 idx = searchsorted(P[i, :k], D)
                 # to keep the top-k, we need to the get rid of the last element.
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
-                if D < P[i + g, k-1]:
+                if D < P[i + g, k - 1]:
                     idx = searchsorted(P[i + g, :k], D)
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]

From 1806c66241547cbdd9ac02c0313d16157b5f700e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:13:05 -0600
Subject: [PATCH 027/416] minor changes

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 5592af064..3028dd15c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -240,7 +240,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
                 D = distance_matrix[i, i + g] # D: a single element
                 if D < P[i, k - 1]:
                     idx = searchsorted(P[i, :k], D)
-                    # to keep the top-k, we need to the get rid of the last element.
+                    # to keep the top-k, we must get rid of the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 

From ad29c19cc83d6388a1caab1136fdb4fbf82596fb Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:14:10 -0600
Subject: [PATCH 028/416] Correct format

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 3028dd15c..849c8d080 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -203,11 +203,11 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
             for i in range(l):
                 apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
 
-        for i, D in enumerate(distance_matrix): # D: distance profile
+        for i, D in enumerate(distance_matrix):  # D: distance profile
             # self-join / AB-join: matrix proifle and indices
             indices = np.argsort(D)[:k]
             P[i, :k] = D[indices]
-            indices[P[i,:k] == np.inf] = -1
+            indices[P[i, :k] == np.inf] = -1
             I[i, :k] = indices
 
             # self-join: left matrix profile index (top-1)
@@ -237,7 +237,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
                 iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
             for i in iter_range:
-                D = distance_matrix[i, i + g] # D: a single element
+                D = distance_matrix[i, i + g]  # D: a single element
                 if D < P[i, k - 1]:
                     idx = searchsorted(P[i, :k], D)
                     # to keep the top-k, we must get rid of the last element.

From 448d65d69d10c03063c29062cf6c09124281eb78 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:35:49 -0600
Subject: [PATCH 029/416] Correct flake8 style

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 849c8d080..dacba3075 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -158,7 +158,7 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):  # pragma: no cover
 
 def searchsorted(a, v):
     """
-    naive version of numpy.searchsorted(..., side='right')
+    Naive version of numpy.searchsorted(..., side='right')
     """
     indices = np.flatnonzero(v < a)
     if len(indices):

From e3ebcb5885085ab25e58ddc98acd8a7bfb7afac0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:46:10 -0600
Subject: [PATCH 030/416] Avoid unnecessary slicing

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index dacba3075..67d1fb27c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -239,14 +239,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
             for i in iter_range:
                 D = distance_matrix[i, i + g]  # D: a single element
                 if D < P[i, k - 1]:
-                    idx = searchsorted(P[i, :k], D)
+                    idx = searchsorted(P[i], D)
                     # to keep the top-k, we must get rid of the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
                 if ignore_trivial:  # Self-joins only
                     if D < P[i + g, k - 1]:
-                        idx = searchsorted(P[i + g, :k], D)
+                        idx = searchsorted(P[i + g], D)
                         P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                         I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 3cee5d85749eaa0987697e10e937fe5db65c9604 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 19:28:08 -0600
Subject: [PATCH 031/416] pass parameter k to function stump

---
 tests/test_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index 783163453..1ce70acc5 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -248,7 +248,7 @@ def test_stump_self_join_KNN(T_A, T_B):
     m = 3
     zone = int(np.ceil(m / 4))
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
+    comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)

From a1bc6a4182207f68050da74511d78f46b469b778 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 19:38:44 -0600
Subject: [PATCH 032/416] Add parameter k to function stump

---
 stumpy/stump.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 97334eb5a..115752113 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -433,7 +433,7 @@ def _stump(
 
 
 @core.non_normalized(aamp)
-def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
+def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     """
     Compute the z-normalized matrix profile
 
@@ -467,6 +467,10 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of smallest elements in distance profile that should be stored
+        for constructing top-k matrix profile
+
     Returns
     -------
     out : numpy.ndarray
@@ -587,7 +591,6 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     l = n_A - m + 1
 
     excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-    out = np.empty((l, 4), dtype=object)
 
     if ignore_trivial:
         diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
@@ -612,8 +615,9 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
         ignore_trivial,
     )
 
-    out[:, 0] = P[:, 0]
-    out[:, 1:] = I
+    out = np.empty((l, 2 * k + 2), dtype=object)
+    out[:, :k] = P[:, :k]
+    out[:, k:] = I
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From 384690cc6492019d66d8b9104a9297c5a0fbcc11 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 20:21:19 -0600
Subject: [PATCH 033/416] Add parameter k to function _stump

---
 stumpy/stump.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 115752113..bedd5bf6b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -235,6 +235,7 @@ def _stump(
     T_B_subseq_isconstant,
     diags,
     ignore_trivial,
+    k,
 ):
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
@@ -294,6 +295,10 @@ def _stump(
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
         `False`. Default is `True`.
 
+    k : int
+        The number of smallest elements in distance profile that should be stored
+        for constructing top-k matrix profile.
+
     Returns
     -------
     profile : numpy.ndarray
@@ -353,8 +358,8 @@ def _stump(
     n_B = T_B.shape[0]
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
-    ρ = np.full((n_threads, l, 3), -np.inf, dtype=np.float64)
-    I = np.full((n_threads, l, 3), -1, dtype=np.int64)
+    ρ = np.full((n_threads, l, k + 2), -np.inf, dtype=np.float64)
+    I = np.full((n_threads, l, k + 2), -1, dtype=np.int64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)
@@ -406,27 +411,18 @@ def _stump(
     # Reduction of results from all threads
     for thread_idx in range(1, n_threads):
         for i in prange(l):
-            if ρ[0, i, 0] < ρ[thread_idx, i, 0]:
-                ρ[0, i, 0] = ρ[thread_idx, i, 0]
-                I[0, i, 0] = I[thread_idx, i, 0]
-            # left pearson correlation and left matrix profile indices
-            if ρ[0, i, 1] < ρ[thread_idx, i, 1]:
-                ρ[0, i, 1] = ρ[thread_idx, i, 1]
-                I[0, i, 1] = I[thread_idx, i, 1]
-            # right pearson correlation and right matrix profile indices
-            if ρ[0, i, 2] < ρ[thread_idx, i, 2]:
-                ρ[0, i, 2] = ρ[thread_idx, i, 2]
-                I[0, i, 2] = I[thread_idx, i, 2]
+            for j in range(k + 2): # alternative: use mask
+                if ρ[0, i, j] < ρ[thread_idx, i, j]:
+                    ρ[0, i, j] = ρ[thread_idx, i, j]
+                    I[0, i, j] = I[thread_idx, i, j]
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        if p_norm[i, 0] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 0] = 0.0
-        if p_norm[i, 1] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 1] = 0.0
-        if p_norm[i, 2] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 2] = 0.0
+        for j in range(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
+            if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
+                p_norm[i, j] = 0.0
+
     P = np.sqrt(p_norm)
 
     return P[:, :], I[0, :, :]
@@ -469,7 +465,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     k : int, default 1
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile
+        for constructing top-k matrix profile.
 
     Returns
     -------
@@ -613,6 +609,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         T_B_subseq_isconstant,
         diags,
         ignore_trivial,
+        k,
     )
 
     out = np.empty((l, 2 * k + 2), dtype=object)

From d246736717bac279d87970a8627e3c222d8fefa9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 20:45:08 -0600
Subject: [PATCH 034/416] Fixed update of top-k rho and indices in _stump

---
 stumpy/stump.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index bedd5bf6b..cc70e76c4 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -411,15 +411,30 @@ def _stump(
     # Reduction of results from all threads
     for thread_idx in range(1, n_threads):
         for i in prange(l):
-            for j in range(k + 2): # alternative: use mask
-                if ρ[0, i, j] < ρ[thread_idx, i, j]:
-                    ρ[0, i, j] = ρ[thread_idx, i, j]
-                    I[0, i, j] = I[thread_idx, i, j]
+            # top-k
+            for j in range(k):
+                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
+                    idx = k - np.searchsorted(
+                    ρ[0, i, :k][::-1], ρ[thread_idx, i, j]
+                    )
+                    ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
+                    ρ[0, i, idx] = ρ[thread_idx, i, j]
+
+                    I[0, i, idx + 1 : k] = I[0, i, idx : k - 1]
+                    I[0, i, idx] = I[thread_idx, i, j]
+
+            if ρ[0, i, k] < ρ[thread_idx, i, k]:
+                ρ[0, i, k] = ρ[thread_idx, i, k]
+                I[0, i, k] = I[thread_idx, i, k]
+
+            if ρ[0, i, k + 1] < ρ[thread_idx, i, k + 1]:
+                ρ[0, i, k + 1] = ρ[thread_idx, i, k + 1]
+                I[0, i, k + 1] = I[thread_idx, i, k + 1]
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        for j in range(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
+        for j in prange(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 

From fdff040c1324fb7c804862a02ee0cf207edad8b4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 20:59:11 -0600
Subject: [PATCH 035/416] Add parameter k to function _compute_diagonal

---
 stumpy/stump.py | 68 +++++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index cc70e76c4..f0f09e083 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -125,6 +125,10 @@ def _compute_diagonal(
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
         `False`. Default is `True`.
 
+    k : int
+        The number of smallest elements in distance profile that should be stored
+        for constructing top-k matrix profile.
+
     Returns
     -------
     None
@@ -154,18 +158,18 @@ def _compute_diagonal(
     constant = (m - 1) * m_inverse * m_inverse  # (m - 1)/(m * m)
 
     for diag_idx in range(diags_start_idx, diags_stop_idx):
-        k = diags[diag_idx]
+        g = diags[diag_idx]
 
-        if k >= 0:
-            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
+        if g >= 0:
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
         else:
-            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
+            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
         for i in iter_range:
-            if i == 0 or (k < 0 and i == -k):
+            if i == 0 or (g < 0 and i == -g):
                 cov = (
                     np.dot(
-                        (T_B[i + k : i + k + m] - M_T[i + k]), (T_A[i : i + m] - μ_Q[i])
+                        (T_B[i + g : i + g + m] - M_T[i + g]), (T_A[i : i + m] - μ_Q[i])
                     )
                     * m_inverse
                 )
@@ -177,38 +181,51 @@ def _compute_diagonal(
                 #     - (T_B[i + k - 1] - M_T_m_1[i + k]) * (T_A[i - 1] - μ_Q_m_1[i])
                 # )
                 cov = cov + constant * (
-                    cov_a[i + k] * cov_b[i] - cov_c[i + k] * cov_d[i]
+                    cov_a[i + g] * cov_b[i] - cov_c[i + g] * cov_d[i]
                 )
 
-            if T_B_subseq_isfinite[i + k] and T_A_subseq_isfinite[i]:
+            if T_B_subseq_isfinite[i + g] and T_A_subseq_isfinite[i]:
                 # Neither subsequence contains NaNs
-                if T_B_subseq_isconstant[i + k] or T_A_subseq_isconstant[i]:
+                if T_B_subseq_isconstant[i + g] or T_A_subseq_isconstant[i]:
                     pearson = 0.5
                 else:
-                    pearson = cov * Σ_T_inverse[i + k] * σ_Q_inverse[i]
+                    pearson = cov * Σ_T_inverse[i + g] * σ_Q_inverse[i]
 
-                if T_B_subseq_isconstant[i + k] and T_A_subseq_isconstant[i]:
+                if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                if pearson > ρ[thread_idx, i, 0]:
-                    ρ[thread_idx, i, 0] = pearson
-                    I[thread_idx, i, 0] = i + k
+                if pearson > ρ[thread_idx, i, k - 1]:
+                    idx = k - np.searchsorted(
+                    ρ[thread_idx, i, :k][::-1], pearson
+                    )
+                    ρ[thread_idx, i, idx + 1 : k] = ρ[thread_idx, i, idx : k - 1]
+                    ρ[thread_idx, i, idx] = pearson
+                    I[thread_idx, i, idx + 1 : k] = I[thread_idx, i, idx : k - 1]
+                    I[thread_idx, i, idx] = i + g
 
                 if ignore_trivial:  # self-joins only
-                    if pearson > ρ[thread_idx, i + k, 0]:
-                        ρ[thread_idx, i + k, 0] = pearson
-                        I[thread_idx, i + k, 0] = i
-
-                    if i < i + k:
+                    if pearson > ρ[thread_idx, i + g, k - 1]:
+                        idx = k - np.searchsorted(
+                        ρ[thread_idx, i + g, :k][::-1], pearson
+                        )
+                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[thread_idx, i + g, idx : k - 1]
+                        ρ[thread_idx, i + g, idx] = pearson
+                        I[thread_idx, i + g, idx + 1 : k] = I[thread_idx, i + g, idx : k - 1]
+                        I[thread_idx, i + g, idx] = i
+                        # for top-1 case:
+                        #ρ[thread_idx, i + g, 0] = pearson
+                        #I[thread_idx, i + g, 0] = i
+
+                    if i < i + g:
                         # left pearson correlation and left matrix profile index
-                        if pearson > ρ[thread_idx, i + k, 1]:
-                            ρ[thread_idx, i + k, 1] = pearson
-                            I[thread_idx, i + k, 1] = i
+                        if pearson > ρ[thread_idx, i + g, k]:
+                            ρ[thread_idx, i + g, k] = pearson
+                            I[thread_idx, i + g, k] = i
 
                         # right pearson correlation and right matrix profile index
-                        if pearson > ρ[thread_idx, i, 2]:
-                            ρ[thread_idx, i, 2] = pearson
-                            I[thread_idx, i, 2] = i + k
+                        if pearson > ρ[thread_idx, i, k + 1]:
+                            ρ[thread_idx, i, k + 1] = pearson
+                            I[thread_idx, i, k + 1] = i + g
 
     return
 
@@ -406,6 +423,7 @@ def _stump(
             ρ,
             I,
             ignore_trivial,
+            k,
         )
 
     # Reduction of results from all threads

From 9d721982f4a10d3e01dbe3fdf0403fb33372aec7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:08:13 -0600
Subject: [PATCH 036/416] consider parameter k in non normalized function,
 decorator

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2a30c043..391ce6b57 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,7 +121,7 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p"]
+        exclude = ["normalize", "p", "k"]
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From 995559ffe6f49aa20ab71f3b33846b3717ce4e1d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:11:37 -0600
Subject: [PATCH 037/416] Fixed missing input parameter k in function
 _compute_diagonal

---
 stumpy/stump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index f0f09e083..45c4e533c 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -42,6 +42,7 @@ def _compute_diagonal(
     ρ,
     I,
     ignore_trivial,
+    k
 ):
     """
     Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I

From a047dd002a93b387f664189ca401405b19fdec4f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:22:13 -0600
Subject: [PATCH 038/416] minor change

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 45c4e533c..5f701b9a5 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -648,10 +648,10 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     out = np.empty((l, 2 * k + 2), dtype=object)
     out[:, :k] = P[:, :k]
-    out[:, k:] = I
+    out[:, k:] = I[:, :]
 
     threshold = 10e-6
-    if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
+    if core.are_distances_too_small(out[:, :k].ravel(), threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial = True`.")
 

From c6370b6da6e438bdd16e4eefffb8a3e4f71a8c93 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:50:06 -0600
Subject: [PATCH 039/416] Add verbose

---
 stumpy/stump.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 5f701b9a5..ce5988662 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -646,7 +646,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         k,
     )
 
-    out = np.empty((l, 2 * k + 2), dtype=object)
+    out = np.empty((l, (2 * k) + 2), dtype=object)
+    print(out.shape)
     out[:, :k] = P[:, :k]
     out[:, k:] = I[:, :]
 

From 816441596cbc2d1d85454bdbcba939132d1677b2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:03:28 -0600
Subject: [PATCH 040/416] minor changes

---
 stumpy/stump.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index ce5988662..2ae3046be 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -42,7 +42,7 @@ def _compute_diagonal(
     ρ,
     I,
     ignore_trivial,
-    k
+    k,
 ):
     """
     Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I
@@ -646,13 +646,12 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         k,
     )
 
-    out = np.empty((l, (2 * k) + 2), dtype=object)
-    print(out.shape)
+    out = np.empty((l, 2 * k + 2), dtype=object)
     out[:, :k] = P[:, :k]
-    out[:, k:] = I[:, :]
+    out[:, k:] = I
 
     threshold = 10e-6
-    if core.are_distances_too_small(out[:, :k].ravel(), threshold=threshold):  # pragma: no cover
+    if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial = True`.")
 

From 7007953f700dd41cae95d1ea834d0e5850b245b7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:17:23 -0600
Subject: [PATCH 041/416] Fixed unit test for top-k matrix profile

---
 tests/test_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index 1ce70acc5..25b9c5283 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -253,6 +253,6 @@ def test_stump_self_join_KNN(T_A, T_B):
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
 
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)

From 5b5f21ada054f9d26780199c34f248f034874fe2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:21:13 -0600
Subject: [PATCH 042/416] Remove parameter k in function non_normalized
 decorator

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 391ce6b57..a2a30c043 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,7 +121,7 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p", "k"]
+        exclude = ["normalize", "p"]
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From f7ee854f733eba01412ed17f6a3cdf8f747d842a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:56:39 -0600
Subject: [PATCH 043/416] Corret format by black

---
 stumpy/stump.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 2ae3046be..eb18b7e8a 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -196,9 +196,7 @@ def _compute_diagonal(
                     pearson = 1.0
 
                 if pearson > ρ[thread_idx, i, k - 1]:
-                    idx = k - np.searchsorted(
-                    ρ[thread_idx, i, :k][::-1], pearson
-                    )
+                    idx = k - np.searchsorted(ρ[thread_idx, i, :k][::-1], pearson)
                     ρ[thread_idx, i, idx + 1 : k] = ρ[thread_idx, i, idx : k - 1]
                     ρ[thread_idx, i, idx] = pearson
                     I[thread_idx, i, idx + 1 : k] = I[thread_idx, i, idx : k - 1]
@@ -207,15 +205,19 @@ def _compute_diagonal(
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, k - 1]:
                         idx = k - np.searchsorted(
-                        ρ[thread_idx, i + g, :k][::-1], pearson
+                            ρ[thread_idx, i + g, :k][::-1], pearson
                         )
-                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[thread_idx, i + g, idx : k - 1]
+                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[
+                            thread_idx, i + g, idx : k - 1
+                        ]
                         ρ[thread_idx, i + g, idx] = pearson
-                        I[thread_idx, i + g, idx + 1 : k] = I[thread_idx, i + g, idx : k - 1]
+                        I[thread_idx, i + g, idx + 1 : k] = I[
+                            thread_idx, i + g, idx : k - 1
+                        ]
                         I[thread_idx, i + g, idx] = i
                         # for top-1 case:
-                        #ρ[thread_idx, i + g, 0] = pearson
-                        #I[thread_idx, i + g, 0] = i
+                        # ρ[thread_idx, i + g, 0] = pearson
+                        # I[thread_idx, i + g, 0] = i
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
@@ -432,10 +434,8 @@ def _stump(
         for i in prange(l):
             # top-k
             for j in range(k):
-                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
-                    idx = k - np.searchsorted(
-                    ρ[0, i, :k][::-1], ρ[thread_idx, i, j]
-                    )
+                if ρ[0, i, k - 1] < ρ[thread_idx, i, j]:
+                    idx = k - np.searchsorted(ρ[0, i, :k][::-1], ρ[thread_idx, i, j])
                     ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
                     ρ[0, i, idx] = ρ[thread_idx, i, j]
 
@@ -453,7 +453,7 @@ def _stump(
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        for j in prange(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
+        for j in prange(p_norm.shape[1]):  # p_norm.shape[1] is `k + 2`
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 

From 485dba3da38398f27b237142f29adebd870ac003 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 17:45:26 -0600
Subject: [PATCH 044/416] Use seperate variaboles for left and right profiles

---
 stumpy/stump.py | 87 +++++++++++++++++++++++++++++--------------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index eb18b7e8a..9921a5e7c 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -41,6 +41,10 @@ def _compute_diagonal(
     thread_idx,
     ρ,
     I,
+    ρL,
+    IL,
+    ρR,
+    IR,
     ignore_trivial,
     k,
 ):
@@ -221,14 +225,14 @@ def _compute_diagonal(
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
-                        if pearson > ρ[thread_idx, i + g, k]:
-                            ρ[thread_idx, i + g, k] = pearson
-                            I[thread_idx, i + g, k] = i
+                        if pearson > ρL[thread_idx, i + g]:
+                            ρL[thread_idx, i + g] = pearson
+                            IL[thread_idx, i + g] = i
 
                         # right pearson correlation and right matrix profile index
-                        if pearson > ρ[thread_idx, i, k + 1]:
-                            ρ[thread_idx, i, k + 1] = pearson
-                            I[thread_idx, i, k + 1] = i + g
+                        if pearson > ρR[thread_idx, i]:
+                            ρR[thread_idx, i] = pearson
+                            IR[thread_idx, i] = i + g
 
     return
 
@@ -378,8 +382,15 @@ def _stump(
     n_B = T_B.shape[0]
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
-    ρ = np.full((n_threads, l, k + 2), -np.inf, dtype=np.float64)
-    I = np.full((n_threads, l, k + 2), -1, dtype=np.int64)
+
+    ρ = np.full((n_threads, l, k), -np.inf, dtype=np.float64)
+    I = np.full((n_threads, l, k), -1, dtype=np.int64)
+
+    ρL = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    IL = np.full((n_threads, l), -1, dtype=np.float64)
+
+    ρR = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    IR = np.full((n_threads, l), -1, dtype=np.float64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)
@@ -425,6 +436,10 @@ def _stump(
             thread_idx,
             ρ,
             I,
+            ρL,
+            IL,
+            ρR,
+            IR,
             ignore_trivial,
             k,
         )
@@ -434,7 +449,7 @@ def _stump(
         for i in prange(l):
             # top-k
             for j in range(k):
-                if ρ[0, i, k - 1] < ρ[thread_idx, i, j]:
+                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
                     idx = k - np.searchsorted(ρ[0, i, :k][::-1], ρ[thread_idx, i, j])
                     ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
                     ρ[0, i, idx] = ρ[thread_idx, i, j]
@@ -442,24 +457,24 @@ def _stump(
                     I[0, i, idx + 1 : k] = I[0, i, idx : k - 1]
                     I[0, i, idx] = I[thread_idx, i, j]
 
-            if ρ[0, i, k] < ρ[thread_idx, i, k]:
-                ρ[0, i, k] = ρ[thread_idx, i, k]
-                I[0, i, k] = I[thread_idx, i, k]
+            if ρL[0, i] < ρL[thread_idx, i]:
+                ρL[0, i] = ρL[thread_idx, i]
+                IL[0, i] = IL[thread_idx, i]
 
-            if ρ[0, i, k + 1] < ρ[thread_idx, i, k + 1]:
-                ρ[0, i, k + 1] = ρ[thread_idx, i, k + 1]
-                I[0, i, k + 1] = I[thread_idx, i, k + 1]
+            if ρR[0, i] < ρR[thread_idx, i]:
+                ρR[0, i] = ρR[thread_idx, i]
+                IR[0, i] = IR[thread_idx, i]
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        for j in prange(p_norm.shape[1]):  # p_norm.shape[1] is `k + 2`
+        for j in prange(p_norm.shape[1]):
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 
     P = np.sqrt(p_norm)
 
-    return P[:, :], I[0, :, :]
+    return P, I[0, :, :], IL[0, :], IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -627,28 +642,28 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I = _stump(
-        T_A,
-        T_B,
-        m,
-        M_T,
-        μ_Q,
-        Σ_T_inverse,
-        σ_Q_inverse,
-        M_T_m_1,
-        μ_Q_m_1,
-        T_A_subseq_isfinite,
-        T_B_subseq_isfinite,
-        T_A_subseq_isconstant,
-        T_B_subseq_isconstant,
-        diags,
-        ignore_trivial,
-        k,
+    P, I, IL, IR = _stump(
+    T_A,
+    T_B,
+    m,
+    M_T,
+    μ_Q,
+    Σ_T_inverse,
+    σ_Q_inverse,
+    M_T_m_1,
+    μ_Q_m_1,
+    T_A_subseq_isfinite,
+    T_B_subseq_isfinite,
+    T_A_subseq_isconstant,
+    T_B_subseq_isconstant,
+    diags,
+    ignore_trivial,
+    k,
     )
 
     out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = P[:, :k]
-    out[:, k:] = I
+    out[:, :k] = P
+    out[:, k:] = np.c_[I, IL, IR]
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From bc133ca638df71c4542b2351e07297b04b8b6269 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:10:06 -0600
Subject: [PATCH 045/416] store top-k rho in ascending order

---
 stumpy/stump.py | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 9921a5e7c..56b2118ca 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -199,26 +199,26 @@ def _compute_diagonal(
                 if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                if pearson > ρ[thread_idx, i, k - 1]:
-                    idx = k - np.searchsorted(ρ[thread_idx, i, :k][::-1], pearson)
-                    ρ[thread_idx, i, idx + 1 : k] = ρ[thread_idx, i, idx : k - 1]
-                    ρ[thread_idx, i, idx] = pearson
-                    I[thread_idx, i, idx + 1 : k] = I[thread_idx, i, idx : k - 1]
-                    I[thread_idx, i, idx] = i + g
+                if pearson > ρ[thread_idx, i, 0]:
+                    idx = np.searchsorted(ρ[thread_idx, i], pearson)
+                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1 : idx]
+                    ρ[thread_idx, i, idx - 1] = pearson
+
+                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1 : idx]
+                    I[thread_idx, i, idx - 1] = i + g
 
                 if ignore_trivial:  # self-joins only
-                    if pearson > ρ[thread_idx, i + g, k - 1]:
-                        idx = k - np.searchsorted(
-                            ρ[thread_idx, i + g, :k][::-1], pearson
-                        )
-                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[
-                            thread_idx, i + g, idx : k - 1
+                    if pearson > ρ[thread_idx, i + g, 0]:
+                        idx = np.searchsorted(ρ[thread_idx, i + g], pearson)
+                        ρ[thread_idx, i + g, : idx - 1] = ρ[
+                            thread_idx, i + g, 1 : idx
                         ]
-                        ρ[thread_idx, i + g, idx] = pearson
-                        I[thread_idx, i + g, idx + 1 : k] = I[
-                            thread_idx, i + g, idx : k - 1
+                        ρ[thread_idx, i + g, idx - 1] = pearson
+
+                        I[thread_idx, i + g, : idx - 1] = I[
+                            thread_idx, i + g, 1 : idx
                         ]
-                        I[thread_idx, i + g, idx] = i
+                        I[thread_idx, i + g, idx - 1] = i
                         # for top-1 case:
                         # ρ[thread_idx, i + g, 0] = pearson
                         # I[thread_idx, i + g, 0] = i
@@ -449,13 +449,14 @@ def _stump(
         for i in prange(l):
             # top-k
             for j in range(k):
-                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
-                    idx = k - np.searchsorted(ρ[0, i, :k][::-1], ρ[thread_idx, i, j])
-                    ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
-                    ρ[0, i, idx] = ρ[thread_idx, i, j]
+                j = k - 1 - j
+                if ρ[0, i, 0] < ρ[thread_idx, i, j]:
+                    idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
+                    ρ[0, i, : idx - 1] = ρ[0, i, 1 : idx]
+                    ρ[0, i, idx - 1] = ρ[thread_idx, i, j]
 
-                    I[0, i, idx + 1 : k] = I[0, i, idx : k - 1]
-                    I[0, i, idx] = I[thread_idx, i, j]
+                    I[0, i, : idx - 1] = I[0, i, 1 : idx]
+                    I[0, i, idx - 1] = I[thread_idx, i, j]
 
             if ρL[0, i] < ρL[thread_idx, i]:
                 ρL[0, i] = ρL[thread_idx, i]
@@ -474,7 +475,7 @@ def _stump(
 
     P = np.sqrt(p_norm)
 
-    return P, I[0, :, :], IL[0, :], IR[0, :]
+    return P[:, ::-1], I[0, :, ::-1], IL[0, :], IR[0, :]
 
 
 @core.non_normalized(aamp)

From 47a61b2f202e3f2864460086ccf92100168b8f1e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:23:08 -0600
Subject: [PATCH 046/416] Revise docstrings

---
 stumpy/stump.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 56b2118ca..bdf8c85b7 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -326,12 +326,16 @@ def _stump(
     Returns
     -------
     profile : numpy.ndarray
-        Matrix profile
+        Top-k Matrix profile
 
     indices : numpy.ndarray
-        The first column consists of the matrix profile indices, the second
-        column consists of the left matrix profile indices, and the third
-        column consists of the right matrix profile indices.
+        The top-k matrix profile indices
+
+    left indices : numpy.ndarray
+        The top-1 left matrix profile indices
+
+    right indices : numpy.ndarray
+        The top-1 right matrix profile indices
 
     Notes
     -----
@@ -520,10 +524,10 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     Returns
     -------
     out : numpy.ndarray
-        The first column consists of the matrix profile, the second column
-        consists of the matrix profile indices, the third column consists of
-        the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices.
+        The first k columns consists of the top-k matrix profile, the next k columns
+        consists of their corresponding matrix profile indices, the one before
+        last column consists of the top-1 left matrix profile indices, and the
+        last column consists of the top-1 right matrix profile indices.
 
     See Also
     --------

From d4dc04a5caea088cd6a9a619830af7c517f5348d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:37:27 -0600
Subject: [PATCH 047/416] Correct docstrings

---
 stumpy/stump.py | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index bdf8c85b7..d49296ac5 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -49,9 +49,9 @@ def _compute_diagonal(
     k,
 ):
     """
-    Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I
-    sequentially along individual diagonals using a single thread and avoiding race
-    conditions
+    Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation, ρ, and I,
+    and, the left ρ and the left I, the right ρ and the right I sequentially along
+    individual diagonals using a single thread and avoiding race conditions.
 
     Parameters
     ----------
@@ -121,10 +121,22 @@ def _compute_diagonal(
         The thread index
 
     ρ : numpy.ndarray
-        The Pearson correlations
+        The top-k Pearson correlations, sorted in ascending order per row
 
     I : numpy.ndarray
-        The matrix profile indices
+        The top-k matrix profile indices
+
+    ρL : numpy.ndarray
+        The top-1 left Pearson correlations
+
+    IL : numpy.ndarray
+        The top-1 left matrix profile indices
+
+    ρR : numpy.ndarray
+        The top-1 left Pearson correlations
+
+    IR : numpy.ndarray
+        The top-1 right matrix profile indices
 
     ignore_trivial : bool
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
@@ -263,8 +275,8 @@ def _stump(
 ):
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
-    computation of the matrix profile, matrix profile indices, left matrix profile
-    indices, and right matrix profile indices.
+    computation of the top-k matrix profile, top-k matrix profile indices, top-1
+    left matrix profile indices, and top-1 right matrix profile indices.
 
     Parameters
     ----------
@@ -326,16 +338,16 @@ def _stump(
     Returns
     -------
     profile : numpy.ndarray
-        Top-k Matrix profile
+        Top-k matrix profile
 
     indices : numpy.ndarray
-        The top-k matrix profile indices
+        Top-k matrix profile indices
 
     left indices : numpy.ndarray
-        The top-1 left matrix profile indices
+        Top-1 left matrix profile indices
 
     right indices : numpy.ndarray
-        The top-1 right matrix profile indices
+        Top-1 right matrix profile indices
 
     Notes
     -----
@@ -417,7 +429,8 @@ def _stump(
     cov_d[:] = cov_d - μ_Q_m_1
 
     for thread_idx in prange(n_threads):
-        # Compute and update cov, I within a single thread to avoiding race conditions
+        # Compute and update pearson correlations and matrix profile indices
+        # within a single thread to avoid race conditions
         _compute_diagonal(
             T_A,
             T_B,

From a123540664c93cacc5cf1b006422b42fba9c9069 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:38:47 -0600
Subject: [PATCH 048/416] Correct formats

---
 stumpy/stump.py | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index d49296ac5..f31d0c0f7 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -213,23 +213,19 @@ def _compute_diagonal(
 
                 if pearson > ρ[thread_idx, i, 0]:
                     idx = np.searchsorted(ρ[thread_idx, i], pearson)
-                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1 : idx]
+                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1:idx]
                     ρ[thread_idx, i, idx - 1] = pearson
 
-                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1 : idx]
+                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1:idx]
                     I[thread_idx, i, idx - 1] = i + g
 
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, 0]:
                         idx = np.searchsorted(ρ[thread_idx, i + g], pearson)
-                        ρ[thread_idx, i + g, : idx - 1] = ρ[
-                            thread_idx, i + g, 1 : idx
-                        ]
+                        ρ[thread_idx, i + g, : idx - 1] = ρ[thread_idx, i + g, 1:idx]
                         ρ[thread_idx, i + g, idx - 1] = pearson
 
-                        I[thread_idx, i + g, : idx - 1] = I[
-                            thread_idx, i + g, 1 : idx
-                        ]
+                        I[thread_idx, i + g, : idx - 1] = I[thread_idx, i + g, 1:idx]
                         I[thread_idx, i + g, idx - 1] = i
                         # for top-1 case:
                         # ρ[thread_idx, i + g, 0] = pearson
@@ -469,10 +465,10 @@ def _stump(
                 j = k - 1 - j
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
-                    ρ[0, i, : idx - 1] = ρ[0, i, 1 : idx]
+                    ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
                     ρ[0, i, idx - 1] = ρ[thread_idx, i, j]
 
-                    I[0, i, : idx - 1] = I[0, i, 1 : idx]
+                    I[0, i, : idx - 1] = I[0, i, 1:idx]
                     I[0, i, idx - 1] = I[thread_idx, i, j]
 
             if ρL[0, i] < ρL[thread_idx, i]:
@@ -661,22 +657,22 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
     P, I, IL, IR = _stump(
-    T_A,
-    T_B,
-    m,
-    M_T,
-    μ_Q,
-    Σ_T_inverse,
-    σ_Q_inverse,
-    M_T_m_1,
-    μ_Q_m_1,
-    T_A_subseq_isfinite,
-    T_B_subseq_isfinite,
-    T_A_subseq_isconstant,
-    T_B_subseq_isconstant,
-    diags,
-    ignore_trivial,
-    k,
+        T_A,
+        T_B,
+        m,
+        M_T,
+        μ_Q,
+        Σ_T_inverse,
+        σ_Q_inverse,
+        M_T_m_1,
+        μ_Q_m_1,
+        T_A_subseq_isfinite,
+        T_B_subseq_isfinite,
+        T_A_subseq_isconstant,
+        T_B_subseq_isconstant,
+        diags,
+        ignore_trivial,
+        k,
     )
 
     out = np.empty((l, 2 * k + 2), dtype=object)

From 1dff66f983346ae23430f76cf5c1f16b2c46ea98 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:40:00 -0600
Subject: [PATCH 049/416] Full coverage of test_stump unit test


From cf48b6961eab3c01180a84a476dcd5e8fcd626ee Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:37:44 -0600
Subject: [PATCH 050/416] Change function considering new input/output
 structure

---
 stumpy/scrump.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 002847507..75790c70a 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -609,7 +609,7 @@ def update(self):
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
 
-            P, I = _stump(
+            P, I, IL, IR = _stump(
                 self._T_A,
                 self._T_B,
                 self._m,
@@ -625,8 +625,11 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
+                k=1,
             )
 
+            I = np.c_[I, IL, IR]
+            
             # Update matrix profile and indices
             for i in range(self._P.shape[0]):
                 if self._P[i, 0] > P[i, 0]:

From 7d16ce6a883b38808a7e6f93c41c82755500465a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:45:24 -0600
Subject: [PATCH 051/416] Add two more outputs returned by _stump

---
 stumpy/stump.py | 51 +++++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index f31d0c0f7..348085a4e 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -481,14 +481,26 @@ def _stump(
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
+    p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
+    p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
+
     for i in prange(p_norm.shape[0]):
         for j in prange(p_norm.shape[1]):
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 
+        if p_norm_L[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm_L[i] = 0.0
+
+        if p_norm_R[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm_R[i] = 0.0
+
     P = np.sqrt(p_norm)
+    PL = np.sqrt(p_norm_L)
+    PR = np.sqrt(p_norm_R)
+
 
-    return P[:, ::-1], I[0, :, ::-1], IL[0, :], IR[0, :]
+    return P[:, ::-1], I[0, :, ::-1], PL, IL[0, :], PR, IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -656,26 +668,27 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I, IL, IR = _stump(
-        T_A,
-        T_B,
-        m,
-        M_T,
-        μ_Q,
-        Σ_T_inverse,
-        σ_Q_inverse,
-        M_T_m_1,
-        μ_Q_m_1,
-        T_A_subseq_isfinite,
-        T_B_subseq_isfinite,
-        T_A_subseq_isconstant,
-        T_B_subseq_isconstant,
-        diags,
-        ignore_trivial,
-        k,
+    P, I, PL, IL, PR, IR = _stump(
+                T_A,
+                T_B,
+                m,
+                M_T,
+                μ_Q,
+                Σ_T_inverse,
+                σ_Q_inverse,
+                M_T_m_1,
+                μ_Q_m_1,
+                T_A_subseq_isfinite,
+                T_B_subseq_isfinite,
+                T_A_subseq_isconstant,
+                T_B_subseq_isconstant,
+                diags,
+                ignore_trivial,
+                k,
     )
 
-    out = np.empty((l, 2 * k + 2), dtype=object)
+    out = np.empty((l, 2 * k + 2), dtype=object) # last two columns are to
+    # store left and right matrix profile indices
     out[:, :k] = P
     out[:, k:] = np.c_[I, IL, IR]
 

From 61d38b6b747ff96820140335163b5d02c76f0eaf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:50:48 -0600
Subject: [PATCH 052/416] Update/Correct docstrings

---
 stumpy/stump.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 348085a4e..b9743613b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -133,7 +133,7 @@ def _compute_diagonal(
         The top-1 left matrix profile indices
 
     ρR : numpy.ndarray
-        The top-1 left Pearson correlations
+        The top-1 right Pearson correlations
 
     IR : numpy.ndarray
         The top-1 right matrix profile indices
@@ -272,7 +272,8 @@ def _stump(
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
     computation of the top-k matrix profile, top-k matrix profile indices, top-1
-    left matrix profile indices, and top-1 right matrix profile indices.
+    left matrix profile and matrix profile indices, and top-1 right matrix profile
+    and matrix profile indices.
 
     Parameters
     ----------
@@ -339,9 +340,15 @@ def _stump(
     indices : numpy.ndarray
         Top-k matrix profile indices
 
+    left profile : numpy.ndarray
+        Top-1 left matrix profile
+
     left indices : numpy.ndarray
         Top-1 left matrix profile indices
 
+    right profile : numpy.ndarray
+        Top-1 right matrix profile
+
     right indices : numpy.ndarray
         Top-1 right matrix profile indices
 
@@ -499,7 +506,6 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-
     return P[:, ::-1], I[0, :, ::-1], PL, IL[0, :], PR, IR[0, :]
 
 
@@ -546,9 +552,9 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     -------
     out : numpy.ndarray
         The first k columns consists of the top-k matrix profile, the next k columns
-        consists of their corresponding matrix profile indices, the one before
-        last column consists of the top-1 left matrix profile indices, and the
-        last column consists of the top-1 right matrix profile indices.
+        consists of their corresponding matrix profile indices, the column at
+        numpy indexing 2k contains top-1 left matrix profile indices and the last
+        column, at numpy indexing 2k+1, contains top-1 right matrix profile indices.
 
     See Also
     --------

From 1a469a5230720bdc4d86287db174c0196fd9cf8d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:53:16 -0600
Subject: [PATCH 053/416] Correct callee function _stump

---
 stumpy/scrump.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 75790c70a..df53d8244 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -609,7 +609,7 @@ def update(self):
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
 
-            P, I, IL, IR = _stump(
+            P, I, PL, IL, PR, IR = _stump(
                 self._T_A,
                 self._T_B,
                 self._m,
@@ -628,8 +628,9 @@ def update(self):
                 k=1,
             )
 
+            P = np.c_[P, PL, PR]
             I = np.c_[I, IL, IR]
-            
+
             # Update matrix profile and indices
             for i in range(self._P.shape[0]):
                 if self._P[i, 0] > P[i, 0]:

From 2149abf0f4d2b0f109246b1a90d1106fa4d76f89 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:53:58 -0600
Subject: [PATCH 054/416] Fix format

---
 stumpy/stump.py      | 34 +++++++++++++++++-----------------
 stumpy/test_stump.py |  0
 2 files changed, 17 insertions(+), 17 deletions(-)
 create mode 100644 stumpy/test_stump.py

diff --git a/stumpy/stump.py b/stumpy/stump.py
index b9743613b..cb10e65c4 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -675,25 +675,25 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
     P, I, PL, IL, PR, IR = _stump(
-                T_A,
-                T_B,
-                m,
-                M_T,
-                μ_Q,
-                Σ_T_inverse,
-                σ_Q_inverse,
-                M_T_m_1,
-                μ_Q_m_1,
-                T_A_subseq_isfinite,
-                T_B_subseq_isfinite,
-                T_A_subseq_isconstant,
-                T_B_subseq_isconstant,
-                diags,
-                ignore_trivial,
-                k,
+        T_A,
+        T_B,
+        m,
+        M_T,
+        μ_Q,
+        Σ_T_inverse,
+        σ_Q_inverse,
+        M_T_m_1,
+        μ_Q_m_1,
+        T_A_subseq_isfinite,
+        T_B_subseq_isfinite,
+        T_A_subseq_isconstant,
+        T_B_subseq_isconstant,
+        diags,
+        ignore_trivial,
+        k,
     )
 
-    out = np.empty((l, 2 * k + 2), dtype=object) # last two columns are to
+    out = np.empty((l, 2 * k + 2), dtype=object)  # last two columns are to
     # store left and right matrix profile indices
     out[:, :k] = P
     out[:, k:] = np.c_[I, IL, IR]
diff --git a/stumpy/test_stump.py b/stumpy/test_stump.py
new file mode 100644
index 000000000..e69de29bb

From 364f280d7a4db08ede32151b201e856d344bdef6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:19:02 -0600
Subject: [PATCH 055/416] Fixed number of inputs passed to _stump

---
 stumpy/stumped.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 09557e318..7f1f67e51 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -248,6 +248,7 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
                 T_B_subseq_isconstant_future,
                 diags_futures[i],
                 ignore_trivial,
+                1,
             )
         )
 

From e983e1fbda3ca017d453a2acb97d997314ad9a70 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:29:00 -0600
Subject: [PATCH 056/416] Fixed number of outputs returned by the function

---
 stumpy/stumped.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 7f1f67e51..db30eea59 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -253,7 +253,11 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
         )
 
     results = dask_client.gather(futures)
-    profile, indices = results[0]
+    profile, indices, profile_L, indices_L, profile_R, indices_R  = results[0]
+
+    profile = np.c_[profile, profile_L, profile_R]
+    indices = np.c_[indices, indices_L, indices_R]
+
     for i in range(1, len(hosts)):
         P, I = results[i]
         for col in range(P.shape[1]):  # pragma: no cover

From ef2bc6578bfb4f7e04c74dcda3563d32fd76497a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:36:56 -0600
Subject: [PATCH 057/416] Fixed number of returned outputs

---
 stumpy/stumped.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index db30eea59..1fbd7be49 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -259,7 +259,9 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
     indices = np.c_[indices, indices_L, indices_R]
 
     for i in range(1, len(hosts)):
-        P, I = results[i]
+        P, I, PL, IL, PR, IR = results[i]
+        P = np.c_[P, PL, PR]
+        I = np.c_[I, IL, IR]
         for col in range(P.shape[1]):  # pragma: no cover
             cond = P[:, col] < profile[:, col]
             profile[:, col] = np.where(cond, P[:, col], profile[:, col])

From f7d4a8fcd298600c7a51fe8178020a675b349349 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:39:31 -0600
Subject: [PATCH 058/416] Correct format

---
 stumpy/stumped.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 1fbd7be49..6ca40707c 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -253,7 +253,7 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
         )
 
     results = dask_client.gather(futures)
-    profile, indices, profile_L, indices_L, profile_R, indices_R  = results[0]
+    profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
 
     profile = np.c_[profile, profile_L, profile_R]
     indices = np.c_[indices, indices_L, indices_R]

From 3dccc9a244797c3324cfef54a0b3e1d07c36d6e5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 12:21:00 -0600
Subject: [PATCH 059/416] Exclude parameter 'k' in non-normalized decorator

After updating non-normalized functions to return top-k matrix  profile,
the parameter "k" will be removed from such exclusion.
---
 stumpy/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2a30c043..9c4296ab9 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,7 +121,8 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p"]
+        exclude = ["normalize", "p", "k"] # remove "k" after updating
+        # non-normalized function to accept "k" for top-k matrix profile 
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From a430364aa2cfc77263f7328386dc5c9ea0048945 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 12:25:17 -0600
Subject: [PATCH 060/416] Correct format

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 9c4296ab9..f9a77a07f 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,8 +121,8 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p", "k"] # remove "k" after updating
-        # non-normalized function to accept "k" for top-k matrix profile 
+        exclude = ["normalize", "p", "k"]  # remove "k" after updating
+        # non-normalized function to accept "k" for top-k matrix profile
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From 4f0194384b38e38a6b76e949d6aac0bd06fa441f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 15:17:22 -0600
Subject: [PATCH 061/416] Fixed dtype of matrix profile indices

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index cb10e65c4..683194e9b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -406,10 +406,10 @@ def _stump(
     I = np.full((n_threads, l, k), -1, dtype=np.int64)
 
     ρL = np.full((n_threads, l), -np.inf, dtype=np.float64)
-    IL = np.full((n_threads, l), -1, dtype=np.float64)
+    IL = np.full((n_threads, l), -1, dtype=np.int64)
 
     ρR = np.full((n_threads, l), -np.inf, dtype=np.float64)
-    IR = np.full((n_threads, l), -1, dtype=np.float64)
+    IR = np.full((n_threads, l), -1, dtype=np.int64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)

From aebe5a31920fed46be8cac8f46c50cbc58315e0c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 16:33:19 -0600
Subject: [PATCH 062/416] Add pagam no cover

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 67d1fb27c..b2d8894f7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -164,7 +164,7 @@ def searchsorted(a, v):
     if len(indices):
         return indices.min()
     else:
-        return len(a)
+        return len(a)  # pragma: no cover
 
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):

From de295af807c8b114cdce77ee254e62ed34bcf485 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 16:37:23 -0600
Subject: [PATCH 063/416] Minor change

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index b2d8894f7..4a5ed789a 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -163,8 +163,8 @@ def searchsorted(a, v):
     indices = np.flatnonzero(v < a)
     if len(indices):
         return indices.min()
-    else:
-        return len(a)  # pragma: no cover
+    else:  # pragma: no cover
+        return len(a)
 
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):

From 1d35aea6326fab28d4d099d1b6e40db7d4fd037c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 15 May 2022 21:48:13 -0600
Subject: [PATCH 064/416] Use range to move in reverse

---
 stumpy/stump.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 683194e9b..6f47fe698 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -468,8 +468,7 @@ def _stump(
     for thread_idx in range(1, n_threads):
         for i in prange(l):
             # top-k
-            for j in range(k):
-                j = k - 1 - j
+            for j in range(k - 1, -1, -1):
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]

From e817e5f0dd1316105b93a96d9be28b659a58367d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 21:02:08 -0600
Subject: [PATCH 065/416] Remove a wrongly created file

---
 stumpy/test_stump.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 stumpy/test_stump.py

diff --git a/stumpy/test_stump.py b/stumpy/test_stump.py
deleted file mode 100644
index e69de29bb..000000000

From c1e39256972a03f0ee1b014e1b8e20efa2d811ba Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 21:04:59 -0600
Subject: [PATCH 066/416] Remove parameter k in non normalized decorator

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f9a77a07f..753b0affa 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,9 +121,9 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p", "k"]  # remove "k" after updating
-        # non-normalized function to accept "k" for top-k matrix profile
+        exclude = ["normalize", "p"]
 
+    
     @functools.wraps(non_norm)
     def outer_wrapper(norm):
         @functools.wraps(norm)

From aa08176e4cc1ecd90dc47e3ef851103088136a11 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 21:09:58 -0600
Subject: [PATCH 067/416] Add parameter k to arguments of non normalized
 function

Temporarily, the parameter k is added to the arguments of non-normalized
function `aamp` so that the tests can be passed for now. This will be
handled after completing the normalized version `stump`.
---
 stumpy/aamp.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 201e4413b..b00c8cbf1 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -240,7 +240,8 @@ def _aamp(
     return np.power(P[0, :, :], 1.0 / p), I[0, :, :]
 
 
-def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0):
+def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): # k=1 is temporary
+    # and this function needs to be changed to return top-k
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 

From 37a9f2c91979fbd2db35d27d4c946eb1ca31c08f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 22:12:34 -0600
Subject: [PATCH 068/416] Replace numpy c_ with column_stack

---
 stumpy/scrump.py  | 4 ++--
 stumpy/stump.py   | 2 +-
 stumpy/stumped.py | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index df53d8244..25c4e4e3f 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -628,8 +628,8 @@ def update(self):
                 k=1,
             )
 
-            P = np.c_[P, PL, PR]
-            I = np.c_[I, IL, IR]
+            P = np.column_stack((P, PL, PR))
+            I = np.column_stack((I, IL, IR))
 
             # Update matrix profile and indices
             for i in range(self._P.shape[0]):
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 6f47fe698..449c35200 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -695,7 +695,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     out = np.empty((l, 2 * k + 2), dtype=object)  # last two columns are to
     # store left and right matrix profile indices
     out[:, :k] = P
-    out[:, k:] = np.c_[I, IL, IR]
+    out[:, k:] = np.column_stack((I, IL, IR))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 6ca40707c..0c1c34e07 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -255,13 +255,13 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
     results = dask_client.gather(futures)
     profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
 
-    profile = np.c_[profile, profile_L, profile_R]
-    indices = np.c_[indices, indices_L, indices_R]
+    profile = np.column_stack((profile, profile_L, profile_R))
+    indices = np.column_stack((indices, indices_L, indices_R))
 
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        P = np.c_[P, PL, PR]
-        I = np.c_[I, IL, IR]
+        P = np.column_stack((P, PL, PR))
+        I = np.column_stack((I, IL, IR))
         for col in range(P.shape[1]):  # pragma: no cover
             cond = P[:, col] < profile[:, col]
             profile[:, col] = np.where(cond, P[:, col], profile[:, col])

From 8c0e76ecd2eeea875fb7c80eb4cfe5703740a333 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 23:29:21 -0600
Subject: [PATCH 069/416] Minor changes

- Improve docstrings
- Reverse rho and I before return
- Improve comments
---
 stumpy/stump.py | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 449c35200..6fe2b7e41 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -49,9 +49,9 @@ def _compute_diagonal(
     k,
 ):
     """
-    Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation, ρ, and I,
-    and, the left ρ and the left I, the right ρ and the right I sequentially along
-    individual diagonals using a single thread and avoiding race conditions.
+    Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation (ρ),
+    ρL, ρR, I, IL, and IR sequentially along individual diagonals using a single
+    thread and avoiding race conditions.
 
     Parameters
     ----------
@@ -121,10 +121,10 @@ def _compute_diagonal(
         The thread index
 
     ρ : numpy.ndarray
-        The top-k Pearson correlations, sorted in ascending order per row
+        The (top-k) Pearson correlations, sorted in ascending order per row
 
     I : numpy.ndarray
-        The top-k matrix profile indices
+        The (top-k) matrix profile indices
 
     ρL : numpy.ndarray
         The top-1 left Pearson correlations
@@ -144,7 +144,7 @@ def _compute_diagonal(
 
     k : int
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile.
+        for constructing the top-k matrix profile.
 
     Returns
     -------
@@ -227,9 +227,6 @@ def _compute_diagonal(
 
                         I[thread_idx, i + g, : idx - 1] = I[thread_idx, i + g, 1:idx]
                         I[thread_idx, i + g, idx - 1] = i
-                        # for top-1 case:
-                        # ρ[thread_idx, i + g, 0] = pearson
-                        # I[thread_idx, i + g, 0] = i
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
@@ -271,9 +268,9 @@ def _stump(
 ):
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
-    computation of the top-k matrix profile, top-k matrix profile indices, top-1
-    left matrix profile and matrix profile indices, and top-1 right matrix profile
-    and matrix profile indices.
+    computation of the (top-k) matrix profile, the (top-k) matrix profile indices,
+    the top-1 left matrix profile and matrix profile indices, and the top-1 right
+    matrix profile and matrix profile indices.
 
     Parameters
     ----------
@@ -468,7 +465,7 @@ def _stump(
     for thread_idx in range(1, n_threads):
         for i in prange(l):
             # top-k
-            for j in range(k - 1, -1, -1):
+            for j in range(k - 1, -1, -1): # reverse iteration to preserve order in ties
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
@@ -485,8 +482,12 @@ def _stump(
                 ρR[0, i] = ρR[thread_idx, i]
                 IR[0, i] = IR[thread_idx, i]
 
-    # Convert pearson correlations to distances
-    p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
+    # The arrays ρ (and so I) should be reversed since ρ is in ascending order.
+    ρ = ρ[0, :, ::-1]
+    I = I[0, :, ::-1]
+
+    # Convert pearson correlations to distances.
+    p_norm = np.abs(2 * m * (1 -  ρ))
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
@@ -505,7 +506,7 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-    return P[:, ::-1], I[0, :, ::-1], PL, IL[0, :], PR, IR[0, :]
+    return P, I, PL, IL[0, :], PR, IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -514,8 +515,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     Compute the z-normalized matrix profile
 
     This is a convenience wrapper around the Numba JIT-compiled parallelized
-    `_stump` function which computes the matrix profile according to STOMPopt with
-    Pearson correlations.
+    `_stump` function which computes the (top-k) matrix profile according to
+    STOMPopt with Pearson correlations.
 
     Parameters
     ----------
@@ -545,15 +546,15 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     k : int, default 1
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile.
+        for constructing the top-k matrix profile.
 
     Returns
     -------
     out : numpy.ndarray
-        The first k columns consists of the top-k matrix profile, the next k columns
-        consists of their corresponding matrix profile indices, the column at
-        numpy indexing 2k contains top-1 left matrix profile indices and the last
-        column, at numpy indexing 2k+1, contains top-1 right matrix profile indices.
+        The first k columns contain the top-k matrix profile, the next k columns
+        contain their corresponding matrix profile indices, the column at
+        numpy indexing 2k contains the top-1 left matrix profile indices and the last
+        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
 
     See Also
     --------

From df4c5d1ad8db3109eb8316c99314785cb02f5325 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 23:47:54 -0600
Subject: [PATCH 070/416] Correct Format

---
 stumpy/aamp.py  | 2 +-
 stumpy/core.py  | 1 -
 stumpy/stump.py | 6 ++++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index b00c8cbf1..807c3164b 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -240,7 +240,7 @@ def _aamp(
     return np.power(P[0, :, :], 1.0 / p), I[0, :, :]
 
 
-def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): # k=1 is temporary
+def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):  # k=1 is temporary
     # and this function needs to be changed to return top-k
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
diff --git a/stumpy/core.py b/stumpy/core.py
index 753b0affa..a2a30c043 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -123,7 +123,6 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
     if exclude is None:
         exclude = ["normalize", "p"]
 
-    
     @functools.wraps(non_norm)
     def outer_wrapper(norm):
         @functools.wraps(norm)
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 6fe2b7e41..3e241a11e 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -465,7 +465,9 @@ def _stump(
     for thread_idx in range(1, n_threads):
         for i in prange(l):
             # top-k
-            for j in range(k - 1, -1, -1): # reverse iteration to preserve order in ties
+            for j in range(
+                k - 1, -1, -1
+            ):  # reverse iteration to preserve order in ties
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
@@ -487,7 +489,7 @@ def _stump(
     I = I[0, :, ::-1]
 
     # Convert pearson correlations to distances.
-    p_norm = np.abs(2 * m * (1 -  ρ))
+    p_norm = np.abs(2 * m * (1 - ρ))
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 

From c5c881bebc2ebffb9d55a1491ebff6f239b73553 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:22:26 -0600
Subject: [PATCH 071/416] minor improvement of docstring

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 3e241a11e..ae6a21a15 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -553,8 +553,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     Returns
     -------
     out : numpy.ndarray
-        The first k columns contain the top-k matrix profile, the next k columns
-        contain their corresponding matrix profile indices, the column at
+        The first k columns consist of the top-k matrix profile, the next k columns
+        consist of their corresponding matrix profile indices, the column at
         numpy indexing 2k contains the top-1 left matrix profile indices and the last
         column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
 

From d9dcdc037168ef4f7cd4a9ef4cda491a94f24495 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:24:49 -0600
Subject: [PATCH 072/416] Add parameter k to the arguments of function

the function will be revised to return top-k matrix profile
---
 stumpy/aamped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index d6bf6d97b..c158c9423 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -12,7 +12,8 @@
 logger = logging.getLogger(__name__)
 
 
-def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0):
+def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
+    # function needs to be revised to return top-k matix profile
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 

From c6b81f0410769cd700cf68dbbb8f473dd50bfabf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:52:31 -0600
Subject: [PATCH 073/416] Add parameter k to arguments

Temporarily add parameter k to avoid non-normalized decorator test failure
---
 stumpy/stumped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 0c1c34e07..a48f6a957 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -14,7 +14,8 @@
 
 
 @core.non_normalized(aamped)
-def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
+def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
+    # the function needs to be revisd to return top-k matrix profile
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 

From 4ffc7fca9733cccb6dddab528a1a5d2ca996089c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:53:55 -0600
Subject: [PATCH 074/416] Correct format

---
 stumpy/stumped.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index a48f6a957..e922536f3 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -14,7 +14,9 @@
 
 
 @core.non_normalized(aamped)
-def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
+def stumped(
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
+):
     # the function needs to be revisd to return top-k matrix profile
     """
     Compute the z-normalized matrix profile with a distributed dask cluster

From 102c627f64eb5736f528cc31bba8bb01f8645628 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 02:03:34 -0600
Subject: [PATCH 075/416] Remove parameter k from arguements

---
 stumpy/aamped.py  | 3 +--
 stumpy/stumped.py | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index c158c9423..d6bf6d97b 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -12,8 +12,7 @@
 logger = logging.getLogger(__name__)
 
 
-def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
-    # function needs to be revised to return top-k matix profile
+def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0):
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index e922536f3..6cdfc5aed 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -15,9 +15,7 @@
 
 @core.non_normalized(aamped)
 def stumped(
-    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
-):
-    # the function needs to be revisd to return top-k matrix profile
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 

From a37f793306d54123af0660c428ec845a880b3930 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 02:24:53 -0600
Subject: [PATCH 076/416] Add one new unit test

---
 tests/test_stumped.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index ca53829fc..02e914436 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -608,3 +608,20 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap(
         naive.replace_inf(ref_mp)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore:numpy.dtype size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed")
+@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning")
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
+    with Client(dask_cluster) as dask_client:
+        k = 3
+        m = 3
+        zone = int(np.ceil(m / 4))
+        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+        comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)

From 0755af4ddcdf5ad5a331a1b535af53f879dfc160 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 11:31:12 -0600
Subject: [PATCH 077/416] Add parameter k=1 to arguments

This is to avoid unit test failure in non-normalized decorator. After
finalizing the normalized function, the non normalized functions will
be revised to return top-k matrix profile.
---
 stumpy/aamped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index d6bf6d97b..d833ee8b3 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -12,7 +12,8 @@
 logger = logging.getLogger(__name__)
 
 
-def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0):
+def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
+    # function needs to be revised to return top-k matrix profile
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 

From ca9fdcffcf94d5f0541b74e845fc5e11ee9481ae Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 14:16:06 -0600
Subject: [PATCH 078/416] Revise stumped to return top-k matrix profile

---
 stumpy/stumped.py | 50 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 6cdfc5aed..2b826ba71 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -15,7 +15,7 @@
 
 @core.non_normalized(aamped)
 def stumped(
-    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 
@@ -55,6 +55,10 @@ def stumped(
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int
+        The number of smallest elements in distance profile that should be stored
+        for constructing the top-k matrix profile.
+
     Returns
     -------
     out : numpy.ndarray
@@ -184,7 +188,6 @@ def stumped(
     l = n_A - m + 1
 
     excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-    out = np.empty((l, 4), dtype=object)
 
     hosts = list(dask_client.ncores().keys())
     nworkers = len(hosts)
@@ -249,27 +252,44 @@ def stumped(
                 T_B_subseq_isconstant_future,
                 diags_futures[i],
                 ignore_trivial,
-                1,
+                k,
             )
         )
 
     results = dask_client.gather(futures)
     profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
 
-    profile = np.column_stack((profile, profile_L, profile_R))
-    indices = np.column_stack((indices, indices_L, indices_R))
-
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        P = np.column_stack((P, PL, PR))
-        I = np.column_stack((I, IL, IR))
-        for col in range(P.shape[1]):  # pragma: no cover
-            cond = P[:, col] < profile[:, col]
-            profile[:, col] = np.where(cond, P[:, col], profile[:, col])
-            indices[:, col] = np.where(cond, I[:, col], indices[:, col])
-
-    out[:, 0] = profile[:, 0]
-    out[:, 1:4] = indices
+        # Update top-k matrix profile, alternative approach:
+        # np.argsort(np.concatenate(profile, P), kind='mergesort')
+        prof = profile.copy()
+        ind = indices.copy()
+        for j in range(l):
+            u, w = 0, 0
+            for idx in range(k):
+                if prof[j, u] <= P[j, w]:
+                    profile[j, idx] = prof[j, u]
+                    indices[j, idx] = ind[j, u]
+                    u += 1
+                else:
+                    profile[j, idx] = P[j, w]
+                    indices[j, idx] = I[j, w]
+                    w += 1
+
+        # Update top-1 left matrix profile and matrix profile index
+        cond = PL < profile_L
+        profile_L = np.where(cond, PL, profile_L)
+        indices_L = np.where(cond, IL, indices_L)
+
+        # Update top-1 right matrix profile and matrix profile index
+        cond = PR < profile_R
+        profile_R = np.where(cond, PR, profile_R)
+        indices_R = np.where(cond, IR, indices_R)
+
+    out = np.empty((l, 2 * k + 2), dtype=object)
+    out[:, :k] = profile
+    out[:, k:] = np.column_stack((indices, indices_L, indices_R))
 
     # Delete data from Dask cluster
     dask_client.cancel(T_A_future)

From 9408631f397ef6578dc2d21205e44bb5a45c38f6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 14:16:56 -0600
Subject: [PATCH 079/416] Correct format

---
 stumpy/stumped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 2b826ba71..037c4ba52 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -15,7 +15,8 @@
 
 @core.non_normalized(aamped)
 def stumped(
-    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
+):
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 

From 435d9b88ed52bfd8800ff5055375661287b3871d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:19:44 -0600
Subject: [PATCH 080/416] several minor changes

---
 stumpy/aamp.py      |  4 ++--
 stumpy/scrump.py    |  2 +-
 stumpy/stump.py     | 16 ++++++++--------
 stumpy/stumped.py   | 12 ++++++------
 tests/test_stump.py |  2 +-
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 807c3164b..87568f365 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -240,8 +240,8 @@ def _aamp(
     return np.power(P[0, :, :], 1.0 / p), I[0, :, :]
 
 
-def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):  # k=1 is temporary
-    # and this function needs to be changed to return top-k
+def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
+    # function needs to be changed to return top-k matrix profile
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 25c4e4e3f..9b26478c2 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -454,6 +454,7 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
+        k=1, # class needs to be revised to return (top-k) matrix profile
     ):
         """
         Initialize the `scrump` object
@@ -625,7 +626,6 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
-                k=1,
             )
 
             P = np.column_stack((P, PL, PR))
diff --git a/stumpy/stump.py b/stumpy/stump.py
index ae6a21a15..60d965590 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -269,8 +269,8 @@ def _stump(
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
     computation of the (top-k) matrix profile, the (top-k) matrix profile indices,
-    the top-1 left matrix profile and matrix profile indices, and the top-1 right
-    matrix profile and matrix profile indices.
+    the top-1 left matrix profile and its matrix profile index, and the top-1 right
+    matrix profile and its matrix profile index.
 
     Parameters
     ----------
@@ -327,7 +327,7 @@ def _stump(
 
     k : int
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile.
+        for constructing the top-k matrix profile.
 
     Returns
     -------
@@ -430,7 +430,7 @@ def _stump(
 
     for thread_idx in prange(n_threads):
         # Compute and update pearson correlations and matrix profile indices
-        # within a single thread to avoid race conditions
+        # within a single thread and avoiding race conditions
         _compute_diagonal(
             T_A,
             T_B,
@@ -484,12 +484,12 @@ def _stump(
                 ρR[0, i] = ρR[thread_idx, i]
                 IR[0, i] = IR[thread_idx, i]
 
-    # The arrays ρ (and so I) should be reversed since ρ is in ascending order.
-    ρ = ρ[0, :, ::-1]
+    # Convert top-k pearson correlations to distances. The arrays ρ (and so I) should
+    # be reversed since ρ is in ascending order.
+    p_norm = np.abs(2 * m * (1 - ρ[0, :, ::-1]))
     I = I[0, :, ::-1]
 
-    # Convert pearson correlations to distances.
-    p_norm = np.abs(2 * m * (1 - ρ))
+    # Convert top-1 left/right pearson correlations to distances.
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 037c4ba52..9aa815e6e 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -18,10 +18,10 @@ def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
 ):
     """
-    Compute the z-normalized matrix profile with a distributed dask cluster
+    Compute the z-normalized (top-k) matrix profile with a distributed dask cluster
 
     This is a highly distributed implementation around the Numba JIT-compiled
-    parallelized `_stump` function which computes the matrix profile according
+    parallelized `_stump` function which computes the (top-k) matrix profile according
     to STOMPopt with Pearson correlations.
 
     Parameters
@@ -63,10 +63,10 @@ def stumped(
     Returns
     -------
     out : numpy.ndarray
-        The first column consists of the matrix profile, the second column
-        consists of the matrix profile indices, the third column consists of
-        the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices.
+        The first k columns consist of the top-k matrix profile, the next k columns
+        consist of their corresponding matrix profile indices, the column at
+        numpy indexing 2k contains the top-1 left matrix profile indices and the last
+        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
 
     See Also
     --------
diff --git a/tests/test_stump.py b/tests/test_stump.py
index 25b9c5283..af2a2315e 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -244,7 +244,7 @@ def test_stump_nan_zero_mean_self_join():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
-    k = 2
+    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)

From c6580c8a8dc1d2cdc49ca4724c16d0649ed95028 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:20:45 -0600
Subject: [PATCH 081/416] Correct Format

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 9b26478c2..6a4f7b534 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -454,7 +454,7 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
-        k=1, # class needs to be revised to return (top-k) matrix profile
+        k=1,  # class needs to be revised to return (top-k) matrix profile
     ):
         """
         Initialize the `scrump` object

From e4b0473e0fa38f696a84aac3f2da9938eaeb198d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:26:25 -0600
Subject: [PATCH 082/416] Remove k from arguments

---
 stumpy/scrump.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 6a4f7b534..e62658fc9 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -454,7 +454,6 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
-        k=1,  # class needs to be revised to return (top-k) matrix profile
     ):
         """
         Initialize the `scrump` object

From 8bf05ee3d7534488f0769ff2b4bf95eb1f818fc7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:33:27 -0600
Subject: [PATCH 083/416] Pass 1 as value of parameter k to a class method to
 avoid unit test failure

---
 stumpy/scrump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index e62658fc9..ea8808696 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -625,6 +625,7 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
+                1 # revise module to accept parameter k for top-k matrix profile
             )
 
             P = np.column_stack((P, PL, PR))

From f12261cafd9637a1253444d0c321f61c8ee59b23 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:34:42 -0600
Subject: [PATCH 084/416] Pass 1 as the value of parameter k to avoid unit test
 failure

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index ea8808696..9fcb51e4b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -625,7 +625,7 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
-                1 # revise module to accept parameter k for top-k matrix profile
+                1,  # revise module to accept parameter k for top-k matrix profile
             )
 
             P = np.column_stack((P, PL, PR))

From 695343e4e7ff927b1793de418bc0b2d3dc45b5df Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 22:38:25 -0600
Subject: [PATCH 085/416] Use np searchsort to avoid copying arrays into new
 memory

---
 stumpy/stumped.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 9aa815e6e..f6932325b 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -262,21 +262,18 @@ def stumped(
 
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        # Update top-k matrix profile, alternative approach:
-        # np.argsort(np.concatenate(profile, P), kind='mergesort')
-        prof = profile.copy()
-        ind = indices.copy()
         for j in range(l):
-            u, w = 0, 0
-            for idx in range(k):
-                if prof[j, u] <= P[j, w]:
-                    profile[j, idx] = prof[j, u]
-                    indices[j, idx] = ind[j, u]
-                    u += 1
-                else:
-                    profile[j, idx] = P[j, w]
-                    indices[j, idx] = I[j, w]
-                    w += 1
+            # Uodate profie[j]
+            for D, ind in zip(P[j], I[j]):
+                if D >= profile[j, -1]:
+                    break  # no need to update profile[j] from this point.
+                idx = np.searchsorted(profile[j], D, side="right")  # might be optimized
+                # with help of checkpoint idx from previous iteration.
+                profile[j, idx + 1 :] = profile[j, idx : k - 1]
+                profile[j, idx] = D
+
+                indices[j, idx + 1 :] = indices[j, idx : k - 1]
+                indices[j, idx] = ind
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L

From f4a37faa71b57cbd6258c981a513ec8a3d2e20b3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 23:42:43 -0600
Subject: [PATCH 086/416] All tests passed


From b6d376319b027706dd771693e63811524605b1be Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 16:11:29 -0600
Subject: [PATCH 087/416] Replace nested for loops with numpy operations

---
 stumpy/stumped.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index f6932325b..34d665fc7 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -257,23 +257,27 @@ def stumped(
             )
         )
 
+    profile = np.empty((l, 2 * k))
+    indices = np.empty((l, 2 * k))
+
     results = dask_client.gather(futures)
-    profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
+    (
+        profile[:, :k],
+        indices[:, :k],
+        profile_L,
+        indices_L,
+        profile_R,
+        indices_R,
+    ) = results[0]
 
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        for j in range(l):
-            # Uodate profie[j]
-            for D, ind in zip(P[j], I[j]):
-                if D >= profile[j, -1]:
-                    break  # no need to update profile[j] from this point.
-                idx = np.searchsorted(profile[j], D, side="right")  # might be optimized
-                # with help of checkpoint idx from previous iteration.
-                profile[j, idx + 1 :] = profile[j, idx : k - 1]
-                profile[j, idx] = D
-
-                indices[j, idx + 1 :] = indices[j, idx : k - 1]
-                indices[j, idx] = ind
+
+        profile[:, k:] = P
+        indices[:, k:] = I
+        idx = np.argsort(profile, axis=1)
+        profile = np.take_along_axis(profile, idx, axis=1)
+        indices = np.take_along_axis(indices, idx, axis=1)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L
@@ -286,8 +290,8 @@ def stumped(
         indices_R = np.where(cond, IR, indices_R)
 
     out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = profile
-    out[:, k:] = np.column_stack((indices, indices_L, indices_R))
+    out[:, :k] = profile[:, :k]
+    out[:, k:] = np.column_stack((indices[:, :k], indices_L, indices_R))
 
     # Delete data from Dask cluster
     dask_client.cancel(T_A_future)

From cc9c0769fde6e270ff903d69459e241207e57da2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 18:28:46 -0600
Subject: [PATCH 088/416] Change the order of some variables in inputs and
 outputs

---
 stumpy/stump.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 60d965590..9f37edc8b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -40,10 +40,10 @@ def _compute_diagonal(
     diags_stop_idx,
     thread_idx,
     ρ,
-    I,
     ρL,
-    IL,
     ρR,
+    I,
+    IL,
     IR,
     ignore_trivial,
     k,
@@ -123,18 +123,18 @@ def _compute_diagonal(
     ρ : numpy.ndarray
         The (top-k) Pearson correlations, sorted in ascending order per row
 
-    I : numpy.ndarray
-        The (top-k) matrix profile indices
-
     ρL : numpy.ndarray
         The top-1 left Pearson correlations
 
-    IL : numpy.ndarray
-        The top-1 left matrix profile indices
-
     ρR : numpy.ndarray
         The top-1 right Pearson correlations
 
+    I : numpy.ndarray
+        The (top-k) matrix profile indices
+
+    IL : numpy.ndarray
+        The top-1 left matrix profile indices
+
     IR : numpy.ndarray
         The top-1 right matrix profile indices
 
@@ -452,10 +452,10 @@ def _stump(
             diags_ranges[thread_idx, 1],
             thread_idx,
             ρ,
-            I,
             ρL,
-            IL,
             ρR,
+            I,
+            IL,
             IR,
             ignore_trivial,
             k,
@@ -508,7 +508,7 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-    return P, I, PL, IL[0, :], PR, IR[0, :]
+    return P, PL, PR, I, IL[0, :], IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -676,7 +676,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I, PL, IL, PR, IR = _stump(
+    P, PL, PR, I, IL, IR = _stump(
         T_A,
         T_B,
         m,

From a4d456691dacf788739db9dfdf3796ddc568f794 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 18:47:50 -0600
Subject: [PATCH 089/416] Revise docstrings and comments

---
 stumpy/aamp.py    |  4 ++++
 stumpy/aamped.py  |  4 ++++
 stumpy/stump.py   | 42 +++++++++++++++++++++++-------------------
 stumpy/stumped.py | 19 ++++++++++++-------
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 87568f365..428c3d4bd 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -268,6 +268,10 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     out : numpy.ndarray
diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index d833ee8b3..ad147b42f 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -47,6 +47,10 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     out : numpy.ndarray
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 9f37edc8b..bcf0d4103 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -143,8 +143,8 @@ def _compute_diagonal(
         `False`. Default is `True`.
 
     k : int
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
@@ -326,28 +326,28 @@ def _stump(
         `False`. Default is `True`.
 
     k : int
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
     profile : numpy.ndarray
-        Top-k matrix profile
+        The (top-k) matrix profile
 
     indices : numpy.ndarray
-        Top-k matrix profile indices
+        The (top-k) matrix profile indices
 
     left profile : numpy.ndarray
-        Top-1 left matrix profile
+        The (top-1) left matrix profile
 
     left indices : numpy.ndarray
-        Top-1 left matrix profile indices
+        The (top-1) left matrix profile indices
 
     right profile : numpy.ndarray
-        Top-1 right matrix profile
+        The (top-1) right matrix profile
 
     right indices : numpy.ndarray
-        Top-1 right matrix profile indices
+        The (top-1) right matrix profile indices
 
     Notes
     -----
@@ -484,12 +484,11 @@ def _stump(
                 ρR[0, i] = ρR[thread_idx, i]
                 IR[0, i] = IR[thread_idx, i]
 
-    # Convert top-k pearson correlations to distances. The arrays ρ (and so I) should
-    # be reversed since ρ is in ascending order.
+    # Reverse top-k rho (and its associated I) to be in descending order and
+    # then convert from Pearson correlations to Euclidean distances (ascending order)
     p_norm = np.abs(2 * m * (1 - ρ[0, :, ::-1]))
     I = I[0, :, ::-1]
 
-    # Convert top-1 left/right pearson correlations to distances.
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
@@ -547,16 +546,21 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         ignored when `normalize == True`.
 
     k : int, default 1
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
-        The first k columns consist of the top-k matrix profile, the next k columns
-        consist of their corresponding matrix profile indices, the column at
-        numpy indexing 2k contains the top-1 left matrix profile indices and the last
-        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
+        When k = 1 (default), the first column consists of the matrix profile,
+        the second column consists of the matrix profile indices, the third column
+        consists of the left matrix profile indices, and the fourth column consists of
+        the right matrix profile indices. However, when k > 1, the output array will
+        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
+        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
+        of the corresponding top-k matrix profile indices, and the last two columns
+        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
+        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 34d665fc7..99a1ba0b1 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -56,17 +56,22 @@ def stumped(
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
-    k : int
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
-        The first k columns consist of the top-k matrix profile, the next k columns
-        consist of their corresponding matrix profile indices, the column at
-        numpy indexing 2k contains the top-1 left matrix profile indices and the last
-        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
+        When k = 1 (default), the first column consists of the matrix profile,
+        the second column consists of the matrix profile indices, the third column
+        consists of the left matrix profile indices, and the fourth column consists of
+        the right matrix profile indices. However, when k > 1, the output array will
+        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
+        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
+        of the corresponding top-k matrix profile indices, and the last two columns
+        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
+        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------

From 5ab2978f9c09589e7cbc6279d7c5fb27c07d9723 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 22:17:51 -0600
Subject: [PATCH 090/416] Fixed order of outputs returned in _stump

---
 stumpy/scrump.py  | 2 +-
 stumpy/stumped.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 9fcb51e4b..c547ab02b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -609,7 +609,7 @@ def update(self):
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
 
-            P, I, PL, IL, PR, IR = _stump(
+            P, PL, PR, I, IL, IR = _stump(
                 self._T_A,
                 self._T_B,
                 self._m,
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 99a1ba0b1..1c8b2cd80 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -268,15 +268,15 @@ def stumped(
     results = dask_client.gather(futures)
     (
         profile[:, :k],
-        indices[:, :k],
         profile_L,
-        indices_L,
         profile_R,
+        indices[:, :k],
+        indices_L,
         indices_R,
     ) = results[0]
 
     for i in range(1, len(hosts)):
-        P, I, PL, IL, PR, IR = results[i]
+        P, PL, PR, I, IL, IR = results[i]
 
         profile[:, k:] = P
         indices[:, k:] = I

From 6460a5bc57a4a6ecc3beff8a45f6262cfd47807b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 21 May 2022 11:23:00 -0600
Subject: [PATCH 091/416] Add new function to update TopK MatrixProfile

---
 stumpy/stumped.py | 47 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 1c8b2cd80..e9784e28f 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -5,6 +5,7 @@
 import logging
 
 import numpy as np
+from numba import njit, prange
 
 from . import core, config
 from .stump import _stump
@@ -13,6 +14,35 @@
 logger = logging.getLogger(__name__)
 
 
+@njit(parallel=True)
+def _merge_topk_profiles_indices(PA, PB, IA, IB):
+    """
+    Merge two top-k matrix profiles while prioritizing values of PA in ties
+    and update PA (and so IA)
+
+    PA : numpy.ndarray
+        a (top-k) matrix profile
+
+    PB : numpy.ndarray
+        a (top-k) matrix profile
+
+    IA : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PA
+
+    IB : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PB
+    """
+    for i in prange(PA.shape[0]):
+        for j in range(PA.shape[1]):
+            if PB[i, j] < PA[i, -1]:
+                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+
+                PA[i, idx + 1 :] = PA[i, idx:-1]
+                PA[i, idx] = PB[i, j]
+                IA[i, idx + 1 :] = IA[i, idx:-1]
+                IA[i, idx] = IB[i, j]
+
+
 @core.non_normalized(aamped)
 def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
@@ -266,23 +296,12 @@ def stumped(
     indices = np.empty((l, 2 * k))
 
     results = dask_client.gather(futures)
-    (
-        profile[:, :k],
-        profile_L,
-        profile_R,
-        indices[:, :k],
-        indices_L,
-        indices_R,
-    ) = results[0]
+    profile, profile_L, profile_R, indices, indices_L, indices_R = results[0]
 
     for i in range(1, len(hosts)):
         P, PL, PR, I, IL, IR = results[i]
-
-        profile[:, k:] = P
-        indices[:, k:] = I
-        idx = np.argsort(profile, axis=1)
-        profile = np.take_along_axis(profile, idx, axis=1)
-        indices = np.take_along_axis(indices, idx, axis=1)
+        # Update top-k matrix profile and matrix profile indices
+        _merge_topk_profiles_indices(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L

From d94db722bb2b3150a38008a323ccd117f4bfc1c2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 21 May 2022 11:34:38 -0600
Subject: [PATCH 092/416] Add .copy() to update array properly

---
 stumpy/stumped.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index e9784e28f..01606f5bf 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -37,9 +37,9 @@ def _merge_topk_profiles_indices(PA, PB, IA, IB):
             if PB[i, j] < PA[i, -1]:
                 idx = np.searchsorted(PA[i], PB[i, j], side="right")
 
-                PA[i, idx + 1 :] = PA[i, idx:-1]
+                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
                 PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1]
+                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
                 IA[i, idx] = IB[i, j]
 
 
From 72c3887b014f6be2fe89030013179bfd182bc1c1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 21 May 2022 12:31:08 -0600
Subject: [PATCH 093/416] Add new test function for TopK MatrixProfile with
 gpu_stump

---
 tests/test_gpu_stump.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 508b02a56..1a2662647 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -350,3 +350,20 @@ def test_gpu_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_gpu_stump_self_join_KNN(T_A, T_B):
+    k = 3
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
+    comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From 0068358c8c771d950090d62779a9fd30336f2bfc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 00:15:00 -0600
Subject: [PATCH 094/416] Enhance gpu_stump to return TopK MatrixProfile

---
 stumpy/gpu_stump.py | 247 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 198 insertions(+), 49 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 667dd8b56..606bf7faf 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda
+from numba import cuda, njit, prange
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,9 +15,38 @@
 logger = logging.getLogger(__name__)
 
 
+@njit(parallel=True)
+def _merge_topk_profiles_indices(PA, PB, IA, IB):
+    """
+    Merge two top-k matrix profiles while prioritizing values of PA in ties
+    and update PA (and so IA)
+
+    PA : numpy.ndarray
+        a (top-k) matrix profile
+
+    PB : numpy.ndarray
+        a (top-k) matrix profile
+
+    IA : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PA
+
+    IB : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PB
+    """
+    for i in range(PA.shape[0]):
+        for j in range(PA.shape[1]):
+            if PB[i, j] < PA[i, -1]:
+                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+
+                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
+                PA[i, idx] = PB[i, j]
+                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
+                IA[i, idx] = IB[i, j]
+
+
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
-    "f8[:], f8[:], i8, b1, i8, f8[:, :], i8[:, :], b1)"
+    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i2)"
 )
 def _compute_and_update_PI_kernel(
     i,
@@ -31,12 +60,17 @@ def _compute_and_update_PI_kernel(
     Σ_T,
     μ_Q,
     σ_Q,
-    k,
+    profile_len,
     ignore_trivial,
     excl_zone,
     profile,
+    profile_L,
+    profile_R,
     indices,
+    indices_L,
+    indices_R,
     compute_QT,
+    k,
 ):
     """
     A Numba CUDA kernel to update the matrix profile and matrix profile indices
@@ -79,7 +113,7 @@ def _compute_and_update_PI_kernel(
     σ_Q : numpy.ndarray
         Standard deviation of the query sequence, `Q`
 
-    k : int
+    profile_len : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -91,18 +125,30 @@ def _compute_and_update_PI_kernel(
         sliding window
 
     profile : numpy.ndarray
-        Matrix profile. The first column consists of the global matrix profile,
-        the second column consists of the left matrix profile, and the third
-        column consists of the right matrix profile.
+        The (top-k) matrix profile, sorted in ascending order per row
+
+    profile_L : numpy.ndarray
+        The (top-1) left matrix profile
+
+    profile_R : numpy.ndarray
+        The (top-1) right matrix profile
 
     indices : numpy.ndarray
-        The first column consists of the matrix profile indices, the second
-        column consists of the left matrix profile indices, and the third
-        column consists of the right matrix profile indices.
+        The (top-k) matrix profile indices
+
+    indices_L : numpy.ndarray
+        The (top-1) left matrix profile indices
+
+    indices_R : numpy.ndarray
+        The (top-1) right matrix profile indices
 
     compute_QT : bool
         A boolean flag for whether or not to compute QT
 
+    k : int
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     None
@@ -126,7 +172,7 @@ def _compute_and_update_PI_kernel(
 
     for j in range(start, QT_out.shape[0], stride):
         zone_start = max(0, j - excl_zone)
-        zone_stop = min(k, j + excl_zone)
+        zone_stop = min(profile_len, j + excl_zone)
 
         if compute_QT:
             QT_out[j] = (
@@ -157,16 +203,22 @@ def _compute_and_update_PI_kernel(
         if ignore_trivial:
             if i <= zone_stop and i >= zone_start:
                 p_norm = np.inf
-            if p_norm < profile[j, 1] and i < j:
-                profile[j, 1] = p_norm
-                indices[j, 1] = i
-            if p_norm < profile[j, 2] and i > j:
-                profile[j, 2] = p_norm
-                indices[j, 2] = i
-
-        if p_norm < profile[j, 0]:
-            profile[j, 0] = p_norm
-            indices[j, 0] = i
+            if p_norm < profile_L[j] and i < j:
+                profile_L[j] = p_norm
+                indices_L[j] = i
+            if p_norm < profile_R[j] and i > j:
+                profile_R[j] = p_norm
+                indices_R[j] = i
+            
+        for idx in range(k, -1, -1):
+            if (p_norm < profile[j, idx - 1]) and (idx > 0):
+                profile[j, idx - 1] = profile[j, idx - 2]
+                indices[j, idx - 1] = indices[j, idx - 2]
+            else:
+                break
+        if idx < k:
+            profile[j, idx] = p_norm
+            indices[j, idx] = i
 
 
 def _gpu_stump(
@@ -181,10 +233,11 @@ def _gpu_stump(
     QT_first_fname,
     μ_Q_fname,
     σ_Q_fname,
-    k,
+    profile_len,
     ignore_trivial=True,
     range_start=1,
     device_id=0,
+    k=1,
 ):
     """
     A Numba CUDA version of STOMP for parallel computation of the
@@ -235,7 +288,7 @@ def _gpu_stump(
         The file name for the standard deviation of the query sequence, `Q`,
         relative to the current sliding window
 
-    k : int
+    profile_len : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -249,6 +302,10 @@ def _gpu_stump(
     device_id : int
         The (GPU) device number to use. The default value is `0`.
 
+    k : int
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     profile_fname : str
@@ -316,11 +373,22 @@ def _gpu_stump(
             device_M_T = cuda.to_device(M_T)
             device_Σ_T = cuda.to_device(Σ_T)
 
-        profile = np.full((k, 3), np.inf, dtype=np.float64)
-        indices = np.full((k, 3), -1, dtype=np.int64)
+        profile = np.full((profile_len, k), np.inf, dtype=np.float64)
+        indices = np.full((profile_len, k), -1, dtype=np.int64)
+
+        profile_L = np.full(profile_len, np.inf, dtype=np.float64)
+        indices_L = np.full(profile_len, -1, dtype=np.int64)
+
+        profile_R = np.full(profile_len, np.inf, dtype=np.float64)
+        indices_R = np.full(profile_len, -1, dtype=np.int64)
 
         device_profile = cuda.to_device(profile)
+        device_profile_L = cuda.to_device(profile_L)
+        device_profile_R = cuda.to_device(profile_R)
         device_indices = cuda.to_device(indices)
+        device_indices_L = cuda.to_device(indices_L)
+        device_indices_R = cuda.to_device(indices_R)
+
         _compute_and_update_PI_kernel[blocks_per_grid, threads_per_block](
             range_start - 1,
             device_T_A,
@@ -333,12 +401,17 @@ def _gpu_stump(
             device_Σ_T,
             device_μ_Q,
             device_σ_Q,
-            k,
+            profile_len,
             ignore_trivial,
             excl_zone,
             device_profile,
+            device_profile_L,
+            device_profile_R,
             device_indices,
+            device_indices_L,
+            device_indices_R,
             False,
+            k,
         )
 
         for i in range(range_start, range_stop):
@@ -354,27 +427,50 @@ def _gpu_stump(
                 device_Σ_T,
                 device_μ_Q,
                 device_σ_Q,
-                k,
+                profile_len,
                 ignore_trivial,
                 excl_zone,
                 device_profile,
+                device_profile_L,
+                device_profile_R,
                 device_indices,
+                device_indices_L,
+                device_indices_R,
                 True,
+                k,
             )
 
         profile = device_profile.copy_to_host()
+        profile_L = device_profile_L.copy_to_host()
+        profile_R = device_profile_R.copy_to_host()
         indices = device_indices.copy_to_host()
+        indices_L = device_indices_L.copy_to_host()
+        indices_R = device_indices_R.copy_to_host()
+
         profile = np.sqrt(profile)
+        profile_L = np.sqrt(profile_L)
+        profile_R = np.sqrt(profile_R)
 
         profile_fname = core.array_to_temp_file(profile)
+        profile_L_fname = core.array_to_temp_file(profile_L)
+        profile_R_fname = core.array_to_temp_file(profile_R)
         indices_fname = core.array_to_temp_file(indices)
+        indices_L_fname = core.array_to_temp_file(indices_L)
+        indices_R_fname = core.array_to_temp_file(indices_R)
 
-    return profile_fname, indices_fname
+    return (
+        profile_fname,
+        profile_L_fname,
+        profile_R_fname,
+        indices_fname,
+        indices_L_fname,
+        indices_R_fname,
+    )
 
 
 @core.non_normalized(gpu_aamp)
 def gpu_stump(
-    T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0
+    T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0, k=1
 ):
     """
     Compute the z-normalized matrix profile with one or more GPU devices
@@ -417,13 +513,22 @@ def gpu_stump(
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     out : numpy.ndarray
-        The first column consists of the matrix profile, the second column
-        consists of the matrix profile indices, the third column consists of
-        the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices.
+        When k = 1 (default), the first column consists of the matrix profile,
+        the second column consists of the matrix profile indices, the third column
+        consists of the left matrix profile indices, and the fourth column consists of
+        the right matrix profile indices. However, when k > 1, the output array will
+        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
+        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
+        of the corresponding top-k matrix profile indices, and the last two columns
+        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
+        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
@@ -505,7 +610,7 @@ def gpu_stump(
         logger.warning("Try setting `ignore_trivial = False`.")
 
     n = T_B.shape[0]
-    k = T_A.shape[0] - m + 1
+    profile_len = T_A.shape[0] - m + 1
     l = n - m + 1
     excl_zone = int(
         np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
@@ -518,8 +623,6 @@ def gpu_stump(
     μ_Q_fname = core.array_to_temp_file(μ_Q)
     σ_Q_fname = core.array_to_temp_file(σ_Q)
 
-    out = np.empty((k, 4), dtype=object)
-
     if isinstance(device_id, int):
         device_ids = [device_id]
     else:
@@ -528,6 +631,12 @@ def gpu_stump(
     profile = [None] * len(device_ids)
     indices = [None] * len(device_ids)
 
+    profile_L = [None] * len(device_ids)
+    indices_L = [None] * len(device_ids)
+
+    profile_R = [None] * len(device_ids)
+    indices_R = [None] * len(device_ids)
+
     for _id in device_ids:
         with cuda.gpus[_id]:
             if (
@@ -571,16 +680,24 @@ def gpu_stump(
                     QT_first_fname,
                     μ_Q_fname,
                     σ_Q_fname,
-                    k,
+                    profile_len,
                     ignore_trivial,
                     start + 1,
                     device_ids[idx],
+                    k,
                 ),
             )
         else:
             # Execute last chunk in parent process
             # Only parent process is executed when a single GPU is requested
-            profile[idx], indices[idx] = _gpu_stump(
+            (
+                profile[idx],
+                profile_L[idx],
+                profile_R[idx],
+                indices[idx],
+                indices_L[idx],
+                indices_R[idx],
+            ) = _gpu_stump(
                 T_A_fname,
                 T_B_fname,
                 m,
@@ -592,10 +709,11 @@ def gpu_stump(
                 QT_first_fname,
                 μ_Q_fname,
                 σ_Q_fname,
-                k,
+                profile_len,
                 ignore_trivial,
                 start + 1,
                 device_ids[idx],
+                k,
             )
 
     # Clean up process pool for multi-GPU request
@@ -606,7 +724,14 @@ def gpu_stump(
         # Collect results from spawned child processes if they exist
         for idx, result in enumerate(results):
             if result is not None:
-                profile[idx], indices[idx] = result.get()
+                (
+                    profile[idx],
+                    profile_L[idx],
+                    profile_R[idx],
+                    indices[idx],
+                    indices_L[idx],
+                    indices_R[idx],
+                ) = result.get()
 
     os.remove(T_A_fname)
     os.remove(T_B_fname)
@@ -621,22 +746,46 @@ def gpu_stump(
 
     for idx in range(len(device_ids)):
         profile_fname = profile[idx]
+        profile_L_fname = profile_L[idx]
+        profile_R_fname = profile_R[idx]
         indices_fname = indices[idx]
+        indices_L_fname = indices_L[idx]
+        indices_R_fname = indices_R[idx]
+
         profile[idx] = np.load(profile_fname, allow_pickle=False)
+        profile_L[idx] = np.load(profile_L_fname, allow_pickle=False)
+        profile_R[idx] = np.load(profile_R_fname, allow_pickle=False)
         indices[idx] = np.load(indices_fname, allow_pickle=False)
+        indices_L[idx] = np.load(indices_L_fname, allow_pickle=False)
+        indices_R[idx] = np.load(indices_R_fname, allow_pickle=False)
+
         os.remove(profile_fname)
+        os.remove(profile_L_fname)
+        os.remove(profile_R_fname)
         os.remove(indices_fname)
+        os.remove(indices_L_fname)
+        os.remove(indices_R_fname)
 
     for i in range(1, len(device_ids)):
-        # Update all matrix profiles and matrix profile indices
-        # (global, left, right) and store in profile[0] and indices[0]
-        for col in range(profile[0].shape[1]):  # pragma: no cover
-            cond = profile[0][:, col] < profile[i][:, col]
-            profile[0][:, col] = np.where(cond, profile[0][:, col], profile[i][:, col])
-            indices[0][:, col] = np.where(cond, indices[0][:, col], indices[i][:, col])
-
-    out[:, 0] = profile[0][:, 0]
-    out[:, 1:4] = indices[0][:, :]
+        # Update (top-k) matrix profile and matrix profile indices
+        _merge_topk_profiles_indices(profile[0], profile[i], indices[0], indices[i])
+
+        # Update (top-1) left matrix profile and matrix profil indices
+        cond = profile_L[0] < profile_L[i]
+        profile_L[0] = np.where(cond, profile_L[0], profile_L[i])
+        indices_L[0] = np.where(cond, indices_L[0], indices_L[i])
+
+        # Update (top-1) right matrix profile and matrix profil indices
+        cond = profile_R[0] < profile_R[i]
+        profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
+        indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
+
+    out = np.empty(
+        (profile_len, 2 * k + 2), dtype=object
+    )  # last two columns are to store
+    # (top-1) left/right matrix profile indices
+    out[:, :k] = profile[0]
+    out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From 1e7c05e0dce914ca2fc8fbc39cb5411b4fd5fb03 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 14:16:40 -0600
Subject: [PATCH 095/416] Refactored function for merging two TopK
 MatrixProfile

---
 stumpy/core.py      | 37 ++++++++++++++++++++++++++++++++++++-
 stumpy/gpu_stump.py | 41 +++++++----------------------------------
 stumpy/stumped.py   | 33 +--------------------------------
 3 files changed, 44 insertions(+), 67 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2a30c043..64dee293c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg
@@ -2494,3 +2494,38 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
             MPdist = partition[k]
 
     return MPdist
+
+
+@njit(parallel=True)
+def _merge_topk_profiles_indices(PA, PB, IA, IB):
+    """
+    Merge two top-k matrix profiles PA and PB, and update PA (in place) while
+    prioritizing values of PA in ties. Also, update IA accordingly.
+
+    Parameters
+    ----------
+    PA : numpy.ndarray
+        a (top-k) matrix profile
+
+    PB : numpy.ndarray
+        a (top-k) matrix profile
+
+    IA : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PA
+
+    IB : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PB
+
+    Returns
+    -------
+    None
+    """
+    for i in prange(PA.shape[0]):
+        for j in range(PA.shape[1]):
+            if PB[i, j] < PA[i, -1]:
+                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+
+                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
+                PA[i, idx] = PB[i, j]
+                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
+                IA[i, idx] = IB[i, j]
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 606bf7faf..2df5b14b1 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda, njit, prange
+from numba import cuda
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,35 +15,6 @@
 logger = logging.getLogger(__name__)
 
 
-@njit(parallel=True)
-def _merge_topk_profiles_indices(PA, PB, IA, IB):
-    """
-    Merge two top-k matrix profiles while prioritizing values of PA in ties
-    and update PA (and so IA)
-
-    PA : numpy.ndarray
-        a (top-k) matrix profile
-
-    PB : numpy.ndarray
-        a (top-k) matrix profile
-
-    IA : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PA
-
-    IB : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PB
-    """
-    for i in range(PA.shape[0]):
-        for j in range(PA.shape[1]):
-            if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i], PB[i, j], side="right")
-
-                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
-                PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
-                IA[i, idx] = IB[i, j]
-
-
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
     "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i2)"
@@ -209,7 +180,7 @@ def _compute_and_update_PI_kernel(
             if p_norm < profile_R[j] and i > j:
                 profile_R[j] = p_norm
                 indices_R[j] = i
-            
+
         for idx in range(k, -1, -1):
             if (p_norm < profile[j, idx - 1]) and (idx > 0):
                 profile[j, idx - 1] = profile[j, idx - 2]
@@ -766,9 +737,11 @@ def gpu_stump(
         os.remove(indices_L_fname)
         os.remove(indices_R_fname)
 
+    profile_0 = profile[0].copy()
+    indices_0 = indices[0].copy()
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        _merge_topk_profiles_indices(profile[0], profile[i], indices[0], indices[i])
+        core._merge_topk_profiles_indices(profile_0, profile[i], indices_0, indices[i])
 
         # Update (top-1) left matrix profile and matrix profil indices
         cond = profile_L[0] < profile_L[i]
@@ -784,8 +757,8 @@ def gpu_stump(
         (profile_len, 2 * k + 2), dtype=object
     )  # last two columns are to store
     # (top-1) left/right matrix profile indices
-    out[:, :k] = profile[0]
-    out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
+    out[:, :k] = profile_0
+    out[:, k:] = np.column_stack((indices_0, indices_L[0], indices_R[0]))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 01606f5bf..0667713d3 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -5,7 +5,6 @@
 import logging
 
 import numpy as np
-from numba import njit, prange
 
 from . import core, config
 from .stump import _stump
@@ -13,36 +12,6 @@
 
 logger = logging.getLogger(__name__)
 
-
-@njit(parallel=True)
-def _merge_topk_profiles_indices(PA, PB, IA, IB):
-    """
-    Merge two top-k matrix profiles while prioritizing values of PA in ties
-    and update PA (and so IA)
-
-    PA : numpy.ndarray
-        a (top-k) matrix profile
-
-    PB : numpy.ndarray
-        a (top-k) matrix profile
-
-    IA : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PA
-
-    IB : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PB
-    """
-    for i in prange(PA.shape[0]):
-        for j in range(PA.shape[1]):
-            if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i], PB[i, j], side="right")
-
-                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
-                PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
-                IA[i, idx] = IB[i, j]
-
-
 @core.non_normalized(aamped)
 def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
@@ -301,7 +270,7 @@ def stumped(
     for i in range(1, len(hosts)):
         P, PL, PR, I, IL, IR = results[i]
         # Update top-k matrix profile and matrix profile indices
-        _merge_topk_profiles_indices(profile, P, indices, I)
+        core._merge_topk_profiles_indices(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L

From 2ebc276498eab50fb08c3f1f2ecf30db337eb80e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 14:21:33 -0600
Subject: [PATCH 096/416] Clean up code

---
 stumpy/gpu_stump.py | 10 +++++-----
 stumpy/stumped.py   |  5 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 2df5b14b1..803b020f0 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -737,11 +737,11 @@ def gpu_stump(
         os.remove(indices_L_fname)
         os.remove(indices_R_fname)
 
-    profile_0 = profile[0].copy()
-    indices_0 = indices[0].copy()
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        core._merge_topk_profiles_indices(profile_0, profile[i], indices_0, indices[i])
+        core._merge_topk_profiles_indices(
+            profile[0], profile[i], indices[0], indices[i]
+        )
 
         # Update (top-1) left matrix profile and matrix profil indices
         cond = profile_L[0] < profile_L[i]
@@ -757,8 +757,8 @@ def gpu_stump(
         (profile_len, 2 * k + 2), dtype=object
     )  # last two columns are to store
     # (top-1) left/right matrix profile indices
-    out[:, :k] = profile_0
-    out[:, k:] = np.column_stack((indices_0, indices_L[0], indices_R[0]))
+    out[:, :k] = profile[0]
+    out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 0667713d3..17e0d556c 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -12,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 @core.non_normalized(aamped)
 def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
@@ -283,8 +284,8 @@ def stumped(
         indices_R = np.where(cond, IR, indices_R)
 
     out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = profile[:, :k]
-    out[:, k:] = np.column_stack((indices[:, :k], indices_L, indices_R))
+    out[:, :k] = profile
+    out[:, k:] = np.column_stack((indices, indices_L, indices_R))
 
     # Delete data from Dask cluster
     dask_client.cancel(T_A_future)

From 1170f2ebd770ed4f70aa3048dd4e6778bb723c53 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 15:44:18 -0600
Subject: [PATCH 097/416] Add naive version of merge_topk_matrix_profile
 function

---
 tests/test_core.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 6ef78d230..c26dd449d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -82,6 +82,15 @@ def naive_bsf_indices(n):
     return np.array(out)
 
 
+def naive_merge_topk_profiles_indices(PA, PB, IA, IB):
+    profile = np.column_stack((PA, PB))
+    indices = np.column_stack((IA, IB))
+
+    idx = np.argsort(profile, axis=1)
+    PA[:, :] = np.take_along_axis(profile, idx, axis=1)[:, : PA.shape[1]]
+    IA[:, :] = np.take_along_axis(indices, idx, axis=1)[:, : PA.shape[1]]
+
+
 test_data = [
     (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),
     (

From 2a827b450df582b95d68a0578ca82ced758fe7f1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 15:48:54 -0600
Subject: [PATCH 098/416] Rename function

---
 stumpy/core.py      | 2 +-
 stumpy/gpu_stump.py | 2 +-
 stumpy/stumped.py   | 2 +-
 tests/test_core.py  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 64dee293c..89b6266fc 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2497,7 +2497,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 
 
 @njit(parallel=True)
-def _merge_topk_profiles_indices(PA, PB, IA, IB):
+def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles PA and PB, and update PA (in place) while
     prioritizing values of PA in ties. Also, update IA accordingly.
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 803b020f0..cc4537813 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -739,7 +739,7 @@ def gpu_stump(
 
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        core._merge_topk_profiles_indices(
+        core._merge_topk_PI(
             profile[0], profile[i], indices[0], indices[i]
         )
 
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 17e0d556c..0f6459db5 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -271,7 +271,7 @@ def stumped(
     for i in range(1, len(hosts)):
         P, PL, PR, I, IL, IR = results[i]
         # Update top-k matrix profile and matrix profile indices
-        core._merge_topk_profiles_indices(profile, P, indices, I)
+        core._merge_topk_PI(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L
diff --git a/tests/test_core.py b/tests/test_core.py
index c26dd449d..95dc268d3 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -82,7 +82,7 @@ def naive_bsf_indices(n):
     return np.array(out)
 
 
-def naive_merge_topk_profiles_indices(PA, PB, IA, IB):
+def naive_merge_topk_PI(PA, PB, IA, IB):
     profile = np.column_stack((PA, PB))
     indices = np.column_stack((IA, IB))
 

From cc62c74f11f229dcc7bd98aabba2759cda91260f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 16:13:24 -0600
Subject: [PATCH 099/416] Revise naive function to make it more readable

---
 tests/test_core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 95dc268d3..4585de1af 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -87,9 +87,11 @@ def naive_merge_topk_PI(PA, PB, IA, IB):
     indices = np.column_stack((IA, IB))
 
     idx = np.argsort(profile, axis=1)
-    PA[:, :] = np.take_along_axis(profile, idx, axis=1)[:, : PA.shape[1]]
-    IA[:, :] = np.take_along_axis(indices, idx, axis=1)[:, : PA.shape[1]]
+    profile = np.take_along_axis(profile, idx, axis=1)
+    indices = np.take_along_axis(indices, idx, axis=1)
 
+    PA[:, :] = profile[:, : PA.shape[1]]
+    IA[:, :] = indices[:, : PA.shape[1]]
 
 test_data = [
     (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),

From b6b74c4edaa2cb2bf5c3a45987b631cf1a76ab9e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:32:44 -0600
Subject: [PATCH 100/416] Add test function for merge_topk_PI

---
 tests/test_core.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 4585de1af..8e29c2f1a 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1039,3 +1039,51 @@ def test_select_P_ABBA_val_inf():
     p_abba.sort()
     ref = p_abba[k - 1]
     npt.assert_almost_equal(ref, comp)
+
+
+def test_merge_topk_PI():
+    PA = np.array([
+    [0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.1, 0.2, 0.3, 0.4],
+    [0.1, 0.2, np.inf, np.inf, np.inf],
+    [np.inf, np.inf, np.inf, np.inf, np.inf]
+    ])
+
+    PB = np.array([
+    [0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.15, 0.25, 0.35, 0.45],
+    [0.15, 0.25, 0.35, 0.45, 0.55],
+    [0.01, 0.02, 0.03, 0.04, 0.05],
+    [0.6, 0.7, 0.8, 0.9, 1],
+    [0.1, 0.1, 0.2, 0.3, 0.4],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.0, 0.3, np.inf, np.inf, np.inf],
+    [np.inf, np.inf, np.inf, np.inf, np.inf],
+    ])
+
+    n, k = PA.shape
+
+    IA = np.arange(n * k).reshape(n, k)
+    IB = IA.copy() + n * k
+    IA[7, 2:] = -1
+    IA[8, :] = -1
+    IB[7, 2:] = -1
+    IB[8, :] = -1
+
+    ref_P = PA.copy()
+    ref_I = IA.copy()
+
+    comp_P = PA.copy()
+    comp_I = IA.copy()
+
+    naive_merge_topk_PI(ref_P, PB, ref_I, IB)
+    core._merge_topk_PI(comp_P, PB, comp_I, IB)
+
+    ref = np.column_stack((ref_P, ref_I))
+    comp = np.column_stack((comp_P, comp_I))
+    npt.assert_array_equal(ref, comp)

From b6d6450850453bfde5c932d129e79e063435b9f8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:37:51 -0600
Subject: [PATCH 101/416] Moved naive function to naive.py

---
 tests/naive.py     | 12 ++++++++++++
 tests/test_core.py | 13 +------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 4a5ed789a..3074c2359 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1760,3 +1760,15 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
         )
 
     return total_ndists
+
+
+def merge_topk_PI(PA, PB, IA, IB):
+    profile = np.column_stack((PA, PB))
+    indices = np.column_stack((IA, IB))
+
+    idx = np.argsort(profile, axis=1)
+    profile = np.take_along_axis(profile, idx, axis=1)
+    indices = np.take_along_axis(indices, idx, axis=1)
+
+    PA[:, :] = profile[:, : PA.shape[1]]
+    IA[:, :] = indices[:, : PA.shape[1]]
diff --git a/tests/test_core.py b/tests/test_core.py
index 8e29c2f1a..e45f8c600 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -82,17 +82,6 @@ def naive_bsf_indices(n):
     return np.array(out)
 
 
-def naive_merge_topk_PI(PA, PB, IA, IB):
-    profile = np.column_stack((PA, PB))
-    indices = np.column_stack((IA, IB))
-
-    idx = np.argsort(profile, axis=1)
-    profile = np.take_along_axis(profile, idx, axis=1)
-    indices = np.take_along_axis(indices, idx, axis=1)
-
-    PA[:, :] = profile[:, : PA.shape[1]]
-    IA[:, :] = indices[:, : PA.shape[1]]
-
 test_data = [
     (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),
     (
@@ -1081,7 +1070,7 @@ def test_merge_topk_PI():
     comp_P = PA.copy()
     comp_I = IA.copy()
 
-    naive_merge_topk_PI(ref_P, PB, ref_I, IB)
+    naive.merge_topk_PI(ref_P, PB, ref_I, IB)
     core._merge_topk_PI(comp_P, PB, comp_I, IB)
 
     ref = np.column_stack((ref_P, ref_I))

From 97a04f457ca7c7542b768e504652bf2a9b0d7abf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:46:32 -0600
Subject: [PATCH 102/416] Correct Format

---
 stumpy/gpu_stump.py |  4 +---
 tests/test_core.py  | 50 ++++++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index cc4537813..26e49cbb2 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -739,9 +739,7 @@ def gpu_stump(
 
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        core._merge_topk_PI(
-            profile[0], profile[i], indices[0], indices[i]
-        )
+        core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 
         # Update (top-1) left matrix profile and matrix profil indices
         cond = profile_L[0] < profile_L[i]
diff --git a/tests/test_core.py b/tests/test_core.py
index e45f8c600..707893d14 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1031,29 +1031,33 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
-    PA = np.array([
-    [0.0, 0.0, 0.0, 0.0, 0.0],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.1, 0.2, 0.3, 0.4],
-    [0.1, 0.2, np.inf, np.inf, np.inf],
-    [np.inf, np.inf, np.inf, np.inf, np.inf]
-    ])
-
-    PB = np.array([
-    [0.0, 0.0, 0.0, 0.0, 0.0],
-    [0.0, 0.15, 0.25, 0.35, 0.45],
-    [0.15, 0.25, 0.35, 0.45, 0.55],
-    [0.01, 0.02, 0.03, 0.04, 0.05],
-    [0.6, 0.7, 0.8, 0.9, 1],
-    [0.1, 0.1, 0.2, 0.3, 0.4],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.0, 0.3, np.inf, np.inf, np.inf],
-    [np.inf, np.inf, np.inf, np.inf, np.inf],
-    ])
+    PA = np.array(
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.1, 0.2, 0.3, 0.4],
+            [0.1, 0.2, np.inf, np.inf, np.inf],
+            [np.inf, np.inf, np.inf, np.inf, np.inf],
+        ]
+    )
+
+    PB = np.array(
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.15, 0.25, 0.35, 0.45],
+            [0.15, 0.25, 0.35, 0.45, 0.55],
+            [0.01, 0.02, 0.03, 0.04, 0.05],
+            [0.6, 0.7, 0.8, 0.9, 1],
+            [0.1, 0.1, 0.2, 0.3, 0.4],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.0, 0.3, np.inf, np.inf, np.inf],
+            [np.inf, np.inf, np.inf, np.inf, np.inf],
+        ]
+    )
 
     n, k = PA.shape
 

From 50f4ee8cf84b6f5958b9691d23d961a26d5f06b5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:58:52 -0600
Subject: [PATCH 103/416] Correct Style

---
 stumpy/aamp.py      |  3 ++-
 stumpy/aamped.py    |  3 ++-
 stumpy/gpu_stump.py | 24 ++++++++++++++----------
 stumpy/stump.py     | 24 ++++++++++++++----------
 stumpy/stumped.py   | 18 ++++++++++--------
 5 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 428c3d4bd..82eb41639 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -270,7 +270,8 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index ad147b42f..4499c58b5 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -49,7 +49,8 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 26e49cbb2..15583c58e 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -118,7 +118,8 @@ def _compute_and_update_PI_kernel(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -275,7 +276,8 @@ def _gpu_stump(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -486,20 +488,22 @@ def gpu_stump(
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
         When k = 1 (default), the first column consists of the matrix profile,
         the second column consists of the matrix profile indices, the third column
-        consists of the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices. However, when k > 1, the output array will
-        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
-        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
-        of the corresponding top-k matrix profile indices, and the last two columns
-        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
-        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
+        consists of the left matrix profile indices, and the fourth column consists
+        of the right matrix profile indices. However, when k > 1, the output array
+        will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k])
+        consists of the top-k matrix profile, the next set of k columns
+        (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile
+        indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or,
+        equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left
+        matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
diff --git a/stumpy/stump.py b/stumpy/stump.py
index bcf0d4103..f5a5fe811 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -144,7 +144,8 @@ def _compute_diagonal(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -327,7 +328,8 @@ def _stump(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -547,20 +549,22 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
         When k = 1 (default), the first column consists of the matrix profile,
         the second column consists of the matrix profile indices, the third column
-        consists of the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices. However, when k > 1, the output array will
-        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
-        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
-        of the corresponding top-k matrix profile indices, and the last two columns
-        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
-        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
+        consists of the left matrix profile indices, and the fourth column consists
+        of the right matrix profile indices. However, when k > 1, the output array
+        will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k])
+        consists of the top-k matrix profile, the next set of k columns
+        (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile
+        indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or,
+        equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left
+        matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 0f6459db5..f98338ce9 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -58,20 +58,22 @@ def stumped(
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
         When k = 1 (default), the first column consists of the matrix profile,
         the second column consists of the matrix profile indices, the third column
-        consists of the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices. However, when k > 1, the output array will
-        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
-        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
-        of the corresponding top-k matrix profile indices, and the last two columns
-        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
-        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
+        consists of the left matrix profile indices, and the fourth column consists
+        of the right matrix profile indices. However, when k > 1, the output array
+        will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k])
+        consists of the top-k matrix profile, the next set of k columns
+        (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile
+        indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or,
+        equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left
+        matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------

From 5b7da52bf1a936a147d47321e06653a67da1db29 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 18:02:04 -0600
Subject: [PATCH 104/416] Add parameter k to avoid failure in non-normalized
 decorater unit test

---
 stumpy/gpu_aamp.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py
index e62be7b02..0c9a21a85 100644
--- a/stumpy/gpu_aamp.py
+++ b/stumpy/gpu_aamp.py
@@ -339,7 +339,9 @@ def _gpu_aamp(
     return profile_fname, indices_fname
 
 
-def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0):
+def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1):
+    # function needs to be revised to return (top-k) matrix profile and
+    # matrix profile indices
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile with one
     or more GPU devices
@@ -375,6 +377,11 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Returns
     -------
     out : numpy.ndarray

From e983ef0997ac2e4bcf1c14387be5ec617ec66a4d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 20:34:58 -0600
Subject: [PATCH 105/416] Skip a for-loop in unit test coverage

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 15583c58e..0d76e19b6 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -741,7 +741,7 @@ def gpu_stump(
         os.remove(indices_L_fname)
         os.remove(indices_R_fname)
 
-    for i in range(1, len(device_ids)):
+    for i in range(1, len(device_ids)):  # pragma: no cover
         # Update (top-k) matrix profile and matrix profile indices
         core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 

From b0c5cace4951f97b201f7b42ca0d9627c22bf890 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 20:37:07 -0600
Subject: [PATCH 106/416] All tests pass


From 787e3f761162475e556c7fb4bbc252796fa2f9a6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 21:20:50 -0600
Subject: [PATCH 107/416] Use randomly generated arrays for test function

---
 tests/test_core.py | 77 ++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 707893d14..21e08fd76 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1031,42 +1031,53 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
-    PA = np.array(
-        [
-            [0.0, 0.0, 0.0, 0.0, 0.0],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.1, 0.2, 0.3, 0.4],
-            [0.1, 0.2, np.inf, np.inf, np.inf],
-            [np.inf, np.inf, np.inf, np.inf, np.inf],
-        ]
-    )
-
-    PB = np.array(
-        [
-            [0.0, 0.0, 0.0, 0.0, 0.0],
-            [0.0, 0.15, 0.25, 0.35, 0.45],
-            [0.15, 0.25, 0.35, 0.45, 0.55],
-            [0.01, 0.02, 0.03, 0.04, 0.05],
-            [0.6, 0.7, 0.8, 0.9, 1],
-            [0.1, 0.1, 0.2, 0.3, 0.4],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.0, 0.3, np.inf, np.inf, np.inf],
-            [np.inf, np.inf, np.inf, np.inf, np.inf],
-        ]
-    )
-
-    n, k = PA.shape
+    n=50
+    k=5
+
+    PA = np.random.randint(0, 5, size=(n, k))
+    PA = np.sort(PA)
+
+    PB = np.random.randint(0, 5, size=(n, k))
+    PB = np.sort(PB)
+
+    #PA = np.array(
+    #    [
+    #        [0.0, 0.0, 0.0, 0.0, 0.0],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.1, 0.2, 0.3, 0.4],
+    #        [0.1, 0.2, np.inf, np.inf, np.inf],
+    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
+    #    ]
+    #)
+
+    #PB = np.array(
+    #    [
+    #        [0.0, 0.0, 0.0, 0.0, 0.0],
+    #        [0.0, 0.15, 0.25, 0.35, 0.45],
+    #        [0.15, 0.25, 0.35, 0.45, 0.55],
+    #        [0.01, 0.02, 0.03, 0.04, 0.05],
+    #        [0.6, 0.7, 0.8, 0.9, 1],
+    #        [0.1, 0.1, 0.2, 0.3, 0.4],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.0, 0.3, np.inf, np.inf, np.inf],
+    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
+    #    ]
+    #)
 
     IA = np.arange(n * k).reshape(n, k)
     IB = IA.copy() + n * k
-    IA[7, 2:] = -1
-    IA[8, :] = -1
-    IB[7, 2:] = -1
-    IB[8, :] = -1
+
+    #n, k = PA.shape
+    #IA = np.arange(n * k).reshape(n, k)
+    #IB = IA.copy() + n * k
+    #IA[7, 2:] = -1
+    #IA[8, :] = -1
+    #IB[7, 2:] = -1
+    #IB[8, :] = -1
 
     ref_P = PA.copy()
     ref_I = IA.copy()

From 2ff2b85d7a4ec6f3bbea3dd21c05681a15e62dc7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 23:01:32 -0600
Subject: [PATCH 108/416] Add minor comment

---
 stumpy/core.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 89b6266fc..7528d5f85 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2525,6 +2525,8 @@ def _merge_topk_PI(PA, PB, IA, IB):
             if PB[i, j] < PA[i, -1]:
                 idx = np.searchsorted(PA[i], PB[i, j], side="right")
 
+                # .copy() operation is needed to resolve wrong result that is
+                # caused by "prange"
                 PA[i, idx + 1 :] = PA[i, idx:-1].copy()
                 PA[i, idx] = PB[i, j]
                 IA[i, idx + 1 :] = IA[i, idx:-1].copy()

From c3060278426d583fa4a35c41b0c8758f8aa857a8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 23:04:40 -0600
Subject: [PATCH 109/416] Erase unnecessary comments

---
 tests/test_core.py | 40 ++--------------------------------------
 1 file changed, 2 insertions(+), 38 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 21e08fd76..a1efbf681 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1031,8 +1031,8 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
-    n=50
-    k=5
+    n = 50
+    k = 5
 
     PA = np.random.randint(0, 5, size=(n, k))
     PA = np.sort(PA)
@@ -1040,45 +1040,9 @@ def test_merge_topk_PI():
     PB = np.random.randint(0, 5, size=(n, k))
     PB = np.sort(PB)
 
-    #PA = np.array(
-    #    [
-    #        [0.0, 0.0, 0.0, 0.0, 0.0],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.1, 0.2, 0.3, 0.4],
-    #        [0.1, 0.2, np.inf, np.inf, np.inf],
-    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
-    #    ]
-    #)
-
-    #PB = np.array(
-    #    [
-    #        [0.0, 0.0, 0.0, 0.0, 0.0],
-    #        [0.0, 0.15, 0.25, 0.35, 0.45],
-    #        [0.15, 0.25, 0.35, 0.45, 0.55],
-    #        [0.01, 0.02, 0.03, 0.04, 0.05],
-    #        [0.6, 0.7, 0.8, 0.9, 1],
-    #        [0.1, 0.1, 0.2, 0.3, 0.4],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.0, 0.3, np.inf, np.inf, np.inf],
-    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
-    #    ]
-    #)
-
     IA = np.arange(n * k).reshape(n, k)
     IB = IA.copy() + n * k
 
-    #n, k = PA.shape
-    #IA = np.arange(n * k).reshape(n, k)
-    #IB = IA.copy() + n * k
-    #IA[7, 2:] = -1
-    #IA[8, :] = -1
-    #IB[7, 2:] = -1
-    #IB[8, :] = -1
-
     ref_P = PA.copy()
     ref_I = IA.copy()
 

From 898e9f366d3d4a0cebc1bfdddd20c722a2594f26 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 23 May 2022 11:56:42 -0600
Subject: [PATCH 110/416] Remove unnecessary copy operation

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index a1efbf681..3fa1447bd 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1041,7 +1041,7 @@ def test_merge_topk_PI():
     PB = np.sort(PB)
 
     IA = np.arange(n * k).reshape(n, k)
-    IB = IA.copy() + n * k
+    IB = IA + n * k
 
     ref_P = PA.copy()
     ref_I = IA.copy()

From 3541faec462fc0869af9bcb3b6eafc93469ebc21 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 24 May 2022 11:49:05 -0600
Subject: [PATCH 111/416] Major revision in function _merge_topk_PI

- use PB to get number of iterations for the two most outer for-loops
- improve Docstring
- use start and stop to narrow down the search space
- use for-loop instead of .copy() operation.
---
 stumpy/core.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index bce98964d..0cc858f93 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2505,10 +2505,12 @@ def _merge_topk_PI(PA, PB, IA, IB):
     Parameters
     ----------
     PA : numpy.ndarray
-        a (top-k) matrix profile
+        a (top-k) matrix profile, with ndim of 2, where values in each row are
+        sorted in ascending order. Also, it needs to be the same shape as PB.
 
     PB : numpy.ndarray
-        a (top-k) matrix profile
+        a (top-k) matrix profile, with ndim of 2, where values in each row are
+        sorted in ascending order. Also, it needs to be the same shape as PA.
 
     IA : numpy.ndarray
         a (top-k) matrix profile indices, corresponding to PA
@@ -2520,14 +2522,20 @@ def _merge_topk_PI(PA, PB, IA, IB):
     -------
     None
     """
-    for i in prange(PA.shape[0]):
-        for j in range(PA.shape[1]):
+    for i in prange(PB.shape[0]):
+        start = 0
+        stop = np.searchsorted(PA[i], PB[i, -1], side="right")
+
+        for j in range(PB.shape[1]):
             if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+                idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
+
+                for g in range(PB.shape[1] - 1, idx, -1):
+                    PA[i, g] = PA[i, g - 1]
+                    IA[i, g] = IA[i, g - 1]
 
-                # .copy() operation is needed to resolve wrong result that is
-                # caused by "prange"
-                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
                 PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
                 IA[i, idx] = IB[i, j]
+
+                start = idx
+                stop += 1  # because of shifting elements to the right by one

From ce8cd4c599b8763519b483a4c9c3f695dc445350 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 00:54:00 -0600
Subject: [PATCH 112/416] Add device function to find insertion index into
 sorted array

---
 stumpy/core.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 535471761..200980648 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,3 +2604,48 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
                 start = idx
                 stop += 1  # because of shifting elements to the right by one
+
+
+@cuda.jit("i8(f8[:], f8, i8[:], i8)", device=True)
+def _gpu_searchsorted_right(a, v, bfs, nlevel):
+    """
+    a device function in replace of numpy.searchsorted(a, v, side='right')
+
+    Parameters
+    ----------
+    a : numpy.ndarray
+        1-dim array sorted in ascending order.
+
+    v : float
+        value to insert into array `a`
+
+    bfs : numpy.ndarray
+        the level order indices from the implicit construction of a binary
+        search tree followed by a breadth first (level order) search.
+
+    nlevel : int
+        the number of levels in the binary search tree based from which the array
+        `bfs` is obtained.
+
+    Returns
+    -------
+    idx : int
+        the index of the insertion point
+    """
+    n = a.shape[0]
+    idx = 0
+    for level in range(nlevel):
+        if v < a[bfs[idx]]:
+            next_idx = 2 * idx + 1
+        else:
+            next_idx = 2 * idx + 2
+
+        if level == nlevel-1 or bfs[next_idx]<0:
+            if v < a[bfs[idx]]:
+                idx = max(bfs[idx], 0)
+            else:
+                idx = min(bfs[idx] + 1, n)
+            break
+        idx = next_idx
+
+    return idx

From 09bbe7fb689e47330aacac6737e56d5d0d416356 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 01:11:01 -0600
Subject: [PATCH 113/416] Add test function for gpu_searchsorted

---
 tests/test_core.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 4437149d8..7423718ab 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1086,3 +1086,37 @@ def test_merge_topk_PI():
     ref = np.column_stack((ref_P, ref_I))
     comp = np.column_stack((comp_P, comp_I))
     npt.assert_array_equal(ref, comp)
+
+
+def test_gpu_searchsorted():
+    # define a function the same as `core._gpu_searchsorted_right` but
+    # without cuda.jit decorator.
+    def gpu_searchsorted_right(a, v, bfs, nlevel):
+        n = a.shape[0]
+        idx = 0
+        for level in range(nlevel):
+            if v < a[bfs[idx]]:
+                next_idx = 2 * idx + 1
+            else:
+                next_idx = 2 * idx + 2
+
+            if level == nlevel-1 or bfs[next_idx]<0:
+                if v < a[bfs[idx]]:
+                    idx = max(bfs[idx], 0)
+                else:
+                    idx = min(bfs[idx] + 1, n)
+                break
+            idx = next_idx
+
+        return idx
+
+    for n in range(1, 100):
+        a = np.sort(np.random.rand(n))
+        bfs = core._bfs_indices(n, fill_value=-1)
+        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
+        for i in range(n):
+            v = a[i]
+            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
+            v = a[i] + 0.001
+            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))

From 4948667e38c3b76c1421f4ccf0aedf05c9d82f96 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 01:13:04 -0600
Subject: [PATCH 114/416] Correct format

---
 stumpy/core.py     |  2 +-
 tests/test_core.py | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 200980648..3245bd216 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2640,7 +2640,7 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         else:
             next_idx = 2 * idx + 2
 
-        if level == nlevel-1 or bfs[next_idx]<0:
+        if level == nlevel - 1 or bfs[next_idx] < 0:
             if v < a[bfs[idx]]:
                 idx = max(bfs[idx], 0)
             else:
diff --git a/tests/test_core.py b/tests/test_core.py
index 7423718ab..152a58a01 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1100,7 +1100,7 @@ def gpu_searchsorted_right(a, v, bfs, nlevel):
             else:
                 next_idx = 2 * idx + 2
 
-            if level == nlevel-1 or bfs[next_idx]<0:
+            if level == nlevel - 1 or bfs[next_idx] < 0:
                 if v < a[bfs[idx]]:
                     idx = max(bfs[idx], 0)
                 else:
@@ -1116,7 +1116,13 @@ def gpu_searchsorted_right(a, v, bfs, nlevel):
         nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
         for i in range(n):
             v = a[i]
-            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
             v = a[i] + 0.001
-            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )

From cdd7a334ac69408a2ba6810f521b5419afc9ed02 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 01:25:55 -0600
Subject: [PATCH 115/416] Fixed minor bug

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 3245bd216..e2688459e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import njit, prange
+from numba import cuda, njit, prange
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg

From 71ade4772dce47a9765c8f5081a02b523d8501fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 11:52:57 -0600
Subject: [PATCH 116/416] Fixed the name of a variable

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 0d76e19b6..1b82707fb 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -319,7 +319,7 @@ def _gpu_stump(
     Note that left and right matrix profiles are only available for self-joins.
     """
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
-    blocks_per_grid = math.ceil(k / threads_per_block)
+    blocks_per_grid = math.ceil(profile_len / threads_per_block)
 
     T_A = np.load(T_A_fname, allow_pickle=False)
     T_B = np.load(T_B_fname, allow_pickle=False)

From ac472fc2331f6ef03e2fb5b08fb0c05090d15341 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 15:02:19 -0600
Subject: [PATCH 117/416] Fixed grammatical error in docstring

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e2688459e..9a7b1012b 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2624,7 +2624,7 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         search tree followed by a breadth first (level order) search.
 
     nlevel : int
-        the number of levels in the binary search tree based from which the array
+        the number of levels in the binary search tree from which the array
         `bfs` is obtained.
 
     Returns

From fc149e688ce2f1bdac409ec66b7376a881edcd21 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 15:09:01 -0600
Subject: [PATCH 118/416] Use device function for searchsorting

---
 stumpy/gpu_stump.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 1b82707fb..d8ad43fe8 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -17,7 +17,7 @@
 
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
-    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i2)"
+    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i8[:], i8, i2)"
 )
 def _compute_and_update_PI_kernel(
     i,
@@ -41,6 +41,8 @@ def _compute_and_update_PI_kernel(
     indices_L,
     indices_R,
     compute_QT,
+    bfs,
+    nlevel,
     k,
 ):
     """
@@ -116,6 +118,14 @@ def _compute_and_update_PI_kernel(
     compute_QT : bool
         A boolean flag for whether or not to compute QT
 
+    bfs : numpy.ndarray
+        the level order indices from the implicit construction of a binary
+        search tree followed by a breadth first (level order) search.
+
+    nlevel : int
+        the number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
         Note that this will increase the total computational time and memory usage
@@ -182,13 +192,12 @@ def _compute_and_update_PI_kernel(
                 profile_R[j] = p_norm
                 indices_R[j] = i
 
-        for idx in range(k, -1, -1):
-            if (p_norm < profile[j, idx - 1]) and (idx > 0):
-                profile[j, idx - 1] = profile[j, idx - 2]
-                indices[j, idx - 1] = indices[j, idx - 2]
-            else:
-                break
-        if idx < k:
+        if p_norm < profile[j, -1]:
+            idx = core._gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
+            for g in range(k - 1, idx, -1):
+                profile[j, g] = profile[j, g - 1]
+                indices[j, g] = indices[j, g - 1]
+
             profile[j, idx] = p_norm
             indices[j, idx] = i
 
@@ -318,6 +327,10 @@ def _gpu_stump(
 
     Note that left and right matrix profiles are only available for self-joins.
     """
+    bfs = core._bfs_indices(k, fill_value=-1)
+    nlevel = np.floor(np.log2(k) + 1).astype(np.int64)  # number of levels in
+    # binary seearch tree from which `bfs` is constructed.
+
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
     blocks_per_grid = math.ceil(profile_len / threads_per_block)
 
@@ -384,6 +397,8 @@ def _gpu_stump(
             device_indices_L,
             device_indices_R,
             False,
+            bfs,
+            nlevel,
             k,
         )
 
@@ -410,6 +425,8 @@ def _gpu_stump(
                 device_indices_L,
                 device_indices_R,
                 True,
+                bfs,
+                nlevel,
                 k,
             )
 

From 7ac67a8302ddcbd0d3affc0538b891fe19a92b17 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 15:16:31 -0600
Subject: [PATCH 119/416] Correct style

---
 stumpy/core.py      | 2 +-
 stumpy/gpu_stump.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 9a7b1012b..101813759 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2609,7 +2609,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @cuda.jit("i8(f8[:], f8, i8[:], i8)", device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    a device function in replace of numpy.searchsorted(a, v, side='right')
+    Device function to replace numpy.searchsorted(a, v, side='right')
 
     Parameters
     ----------
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index d8ad43fe8..1a379eda0 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -17,7 +17,8 @@
 
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
-    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i8[:], i8, i2)"
+    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:],"
+    "b1, i8[:], i8, i2)"
 )
 def _compute_and_update_PI_kernel(
     i,

From 92467e24387e490b0289a37738261904ce3148d7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 17:27:06 -0600
Subject: [PATCH 120/416] Remove signature from cuda device function

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 101813759..f342d888e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2606,7 +2606,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
                 stop += 1  # because of shifting elements to the right by one
 
 
-@cuda.jit("i8(f8[:], f8, i8[:], i8)", device=True)
+@cuda.jit(device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Device function to replace numpy.searchsorted(a, v, side='right')

From bdfb258ea5e516f1c064141fe3d1d15dc895b858 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 20:37:23 -0600
Subject: [PATCH 121/416] Full Coverage confirmed


From bb5de99711bd580b77cc407cc5091ace97839c5c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 20:45:12 -0600
Subject: [PATCH 122/416] revising the definiton of parameter bfs in docstring

---
 stumpy/core.py      | 4 ++--
 stumpy/gpu_stump.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f342d888e..54eb29e4c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2620,8 +2620,8 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         value to insert into array `a`
 
     bfs : numpy.ndarray
-        the level order indices from the implicit construction of a binary
-        search tree followed by a breadth first (level order) search.
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
 
     nlevel : int
         the number of levels in the binary search tree from which the array
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 1a379eda0..d8d877078 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -120,8 +120,8 @@ def _compute_and_update_PI_kernel(
         A boolean flag for whether or not to compute QT
 
     bfs : numpy.ndarray
-        the level order indices from the implicit construction of a binary
-        search tree followed by a breadth first (level order) search.
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
 
     nlevel : int
         the number of levels in the binary search tree from which the array

From a005a415482dbce75a6030a5e0a3e98118cad333 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 30 May 2022 01:56:24 -0600
Subject: [PATCH 123/416] Copy array into device memory before passing it to
 kernel function

---
 stumpy/gpu_stump.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index d8d877078..a7682f52f 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -328,10 +328,6 @@ def _gpu_stump(
 
     Note that left and right matrix profiles are only available for self-joins.
     """
-    bfs = core._bfs_indices(k, fill_value=-1)
-    nlevel = np.floor(np.log2(k) + 1).astype(np.int64)  # number of levels in
-    # binary seearch tree from which `bfs` is constructed.
-
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
     blocks_per_grid = math.ceil(profile_len / threads_per_block)
 
@@ -344,6 +340,11 @@ def _gpu_stump(
     μ_Q = np.load(μ_Q_fname, allow_pickle=False)
     σ_Q = np.load(σ_Q_fname, allow_pickle=False)
 
+
+    device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
+    nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
+    # number of levels in # binary seearch tree from which `bfs` is constructed.
+
     with cuda.gpus[device_id]:
         device_T_A = cuda.to_device(T_A)
         device_QT_odd = cuda.to_device(QT)
@@ -398,7 +399,7 @@ def _gpu_stump(
             device_indices_L,
             device_indices_R,
             False,
-            bfs,
+            device_bfs,
             nlevel,
             k,
         )
@@ -426,7 +427,7 @@ def _gpu_stump(
                 device_indices_L,
                 device_indices_R,
                 True,
-                bfs,
+                device_bfs,
                 nlevel,
                 k,
             )

From ade9bb4f37295d1c0ae831672356a27ce625ff31 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 30 May 2022 16:58:35 -0600
Subject: [PATCH 124/416] use float values for generating arrays

---
 tests/test_core.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 152a58a01..e25d6a664 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1065,11 +1065,14 @@ def test_merge_topk_PI():
     n = 50
     k = 5
 
-    PA = np.random.randint(0, 5, size=(n, k))
-    PA = np.sort(PA)
+    PA = np.random.rand(n * k).reshape(n, k)
+    PA = np.sort(PA, axis=1)
 
-    PB = np.random.randint(0, 5, size=(n, k))
-    PB = np.sort(PB)
+    PB = np.random.rand(n * k).reshape(n, k)
+    col_idx = np.random.randint(0, k, size=n)
+    for i in range(n):
+        PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
+    PB = np.sort(PB, axis=1)
 
     IA = np.arange(n * k).reshape(n, k)
     IB = IA + n * k
@@ -1083,9 +1086,8 @@ def test_merge_topk_PI():
     naive.merge_topk_PI(ref_P, PB, ref_I, IB)
     core._merge_topk_PI(comp_P, PB, comp_I, IB)
 
-    ref = np.column_stack((ref_P, ref_I))
-    comp = np.column_stack((comp_P, comp_I))
-    npt.assert_array_equal(ref, comp)
+    npt.assert_array_equal(ref_P, comp_P)
+    npt.assert_array_equal(ref_I, comp_I)
 
 
 def test_gpu_searchsorted():

From 853c2ec805e37b7839983856f9ba0e882da3730a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:27:54 -0600
Subject: [PATCH 125/416] move device function to gpu_stump module

---
 stumpy/core.py      | 45 ---------------------------------------------
 stumpy/gpu_stump.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 54eb29e4c..0ebb5ae50 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,48 +2604,3 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
                 start = idx
                 stop += 1  # because of shifting elements to the right by one
-
-
-@cuda.jit(device=True)
-def _gpu_searchsorted_right(a, v, bfs, nlevel):
-    """
-    Device function to replace numpy.searchsorted(a, v, side='right')
-
-    Parameters
-    ----------
-    a : numpy.ndarray
-        1-dim array sorted in ascending order.
-
-    v : float
-        value to insert into array `a`
-
-    bfs : numpy.ndarray
-        The breadth-first-search indices where the missing leaves of its corresponding
-        binary search tree are filled with -1.
-
-    nlevel : int
-        the number of levels in the binary search tree from which the array
-        `bfs` is obtained.
-
-    Returns
-    -------
-    idx : int
-        the index of the insertion point
-    """
-    n = a.shape[0]
-    idx = 0
-    for level in range(nlevel):
-        if v < a[bfs[idx]]:
-            next_idx = 2 * idx + 1
-        else:
-            next_idx = 2 * idx + 2
-
-        if level == nlevel - 1 or bfs[next_idx] < 0:
-            if v < a[bfs[idx]]:
-                idx = max(bfs[idx], 0)
-            else:
-                idx = min(bfs[idx] + 1, n)
-            break
-        idx = next_idx
-
-    return idx
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index a7682f52f..ec6db99d3 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -15,6 +15,51 @@
 logger = logging.getLogger(__name__)
 
 
+@cuda.jit(device=True)
+def _gpu_searchsorted_right(a, v, bfs, nlevel):
+    """
+    Device function to replace numpy.searchsorted(a, v, side='right')
+
+    Parameters
+    ----------
+    a : numpy.ndarray
+        1-dim array sorted in ascending order.
+
+    v : float
+        value to insert into array `a`
+
+    bfs : numpy.ndarray
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
+
+    nlevel : int
+        the number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
+    Returns
+    -------
+    idx : int
+        the index of the insertion point
+    """
+    n = a.shape[0]
+    idx = 0
+    for level in range(nlevel):
+        if v < a[bfs[idx]]:
+            next_idx = 2 * idx + 1
+        else:
+            next_idx = 2 * idx + 2
+
+        if level == nlevel - 1 or bfs[next_idx] < 0:
+            if v < a[bfs[idx]]:
+                idx = max(bfs[idx], 0)
+            else:
+                idx = min(bfs[idx] + 1, n)
+            break
+        idx = next_idx
+
+    return idx
+
+
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
     "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:],"

From e3b5119246a964bb46e560a525832fe68b397bf4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:29:33 -0600
Subject: [PATCH 126/416] Add gpu_searchsorted_left for the sake completeness

---
 stumpy/gpu_stump.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index ec6db99d3..c7f7aec16 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -15,6 +15,51 @@
 logger = logging.getLogger(__name__)
 
 
+@cuda.jit(device=True)
+def _gpu_searchsorted_left(a, v, bfs, nlevel):
+    """
+    Device function to replace numpy.searchsorted(a, v, side='left')
+
+    Parameters
+    ----------
+    a : numpy.ndarray
+        1-dim array sorted in ascending order.
+
+    v : float
+        value to insert into array `a`
+
+    bfs : numpy.ndarray
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
+
+    nlevel : int
+        the number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
+    Returns
+    -------
+    idx : int
+        the index of the insertion point
+    """
+    n = a.shape[0]
+    idx = 0
+    for level in range(nlevel):
+        if v <= a[bfs[idx]]:
+            next_idx = 2 * idx + 1
+        else:
+            next_idx = 2 * idx + 2
+
+        if level == nlevel - 1 or bfs[next_idx] < 0:
+            if v <= a[bfs[idx]]:
+                idx = max(bfs[idx], 0)
+            else:
+                idx = min(bfs[idx] + 1, n)
+            break
+        idx = next_idx
+
+    return idx
+
+
 @cuda.jit(device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """

From c5779e551e2288f3db60ea93d9293cf60a70c2bd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:46:04 -0600
Subject: [PATCH 127/416] Move test function to test_gpu_stump

---
 tests/test_core.py      | 40 ----------------------------------------
 tests/test_gpu_stump.py | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 40 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index e25d6a664..528286061 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1088,43 +1088,3 @@ def test_merge_topk_PI():
 
     npt.assert_array_equal(ref_P, comp_P)
     npt.assert_array_equal(ref_I, comp_I)
-
-
-def test_gpu_searchsorted():
-    # define a function the same as `core._gpu_searchsorted_right` but
-    # without cuda.jit decorator.
-    def gpu_searchsorted_right(a, v, bfs, nlevel):
-        n = a.shape[0]
-        idx = 0
-        for level in range(nlevel):
-            if v < a[bfs[idx]]:
-                next_idx = 2 * idx + 1
-            else:
-                next_idx = 2 * idx + 2
-
-            if level == nlevel - 1 or bfs[next_idx] < 0:
-                if v < a[bfs[idx]]:
-                    idx = max(bfs[idx], 0)
-                else:
-                    idx = min(bfs[idx] + 1, n)
-                break
-            idx = next_idx
-
-        return idx
-
-    for n in range(1, 100):
-        a = np.sort(np.random.rand(n))
-        bfs = core._bfs_indices(n, fill_value=-1)
-        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
-        for i in range(n):
-            v = a[i]
-            npt.assert_almost_equal(
-                gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
-
-            v = a[i] + 0.001
-            npt.assert_almost_equal(
-                gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 1a2662647..dfbf5e405 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -38,6 +38,24 @@ def test_gpu_stump_int_input():
     with pytest.raises(TypeError):
         gpu_stump(np.arange(10), 5, ignore_trivial=True)
 
+def test_gpu_searchsorted():
+    for n in range(1, 100):
+        a = np.sort(np.random.rand(n))
+        bfs = core._bfs_indices(n, fill_value=-1)
+        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
+        for i in range(n):
+             v = a[i] - 0.001
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
+            v = a[i]
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
+            v = a[i] + 0.001
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)

From 38e531c34a63e3f4a98f476c9501b705de2a2b29 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:50:47 -0600
Subject: [PATCH 128/416] correct format

---
 stumpy/core.py          |  2 +-
 stumpy/gpu_stump.py     |  1 -
 tests/test_gpu_stump.py | 35 +++++++++++++++++++++++++++--------
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 0ebb5ae50..535471761 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import cuda, njit, prange
+from numba import njit, prange
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index c7f7aec16..22748e089 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -430,7 +430,6 @@ def _gpu_stump(
     μ_Q = np.load(μ_Q_fname, allow_pickle=False)
     σ_Q = np.load(σ_Q_fname, allow_pickle=False)
 
-
     device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
     nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
     # number of levels in # binary seearch tree from which `bfs` is constructed.
diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index dfbf5e405..1e79fb577 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -1,7 +1,7 @@
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
-from stumpy import gpu_stump
+from stumpy import core, gpu_stump
 from stumpy import config
 from numba import cuda
 
@@ -38,23 +38,42 @@ def test_gpu_stump_int_input():
     with pytest.raises(TypeError):
         gpu_stump(np.arange(10), 5, ignore_trivial=True)
 
+
 def test_gpu_searchsorted():
     for n in range(1, 100):
         a = np.sort(np.random.rand(n))
         bfs = core._bfs_indices(n, fill_value=-1)
         nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
         for i in range(n):
-             v = a[i] - 0.001
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            v = a[i] - 0.001
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="left"),
+            )
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
             v = a[i]
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="left"),
+            )
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
             v = a[i] + 0.001
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="left"),
+            )
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)

From 5a7b3c099419de1a09368f2930eabce410730693 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:26:58 -0600
Subject: [PATCH 129/416] Fixed calling function

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 22748e089..bf7e3b57d 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -284,7 +284,7 @@ def _compute_and_update_PI_kernel(
                 indices_R[j] = i
 
         if p_norm < profile[j, -1]:
-            idx = core._gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
+            idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
             for g in range(k - 1, idx, -1):
                 profile[j, g] = profile[j, g - 1]
                 indices[j, g] = indices[j, g - 1]

From e1b0d205e463fd2e02a906ab349ca492d303be27 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:32:25 -0600
Subject: [PATCH 130/416] Make function callable from both CPU and GPU

to avoid duplication for unit testing.
---
 stumpy/gpu_stump.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index bf7e3b57d..99a3ba839 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda
+from numba import cuda, jit
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,10 +15,11 @@
 logger = logging.getLogger(__name__)
 
 
-@cuda.jit(device=True)
+@jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
-    Device function to replace numpy.searchsorted(a, v, side='left')
+    A function equivalent to numpy.searchsorted(a, v, side='left'), designed
+    to be used mainly as device function
 
     Parameters
     ----------
@@ -60,7 +61,7 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
     return idx
 
 
-@cuda.jit(device=True)
+@jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Device function to replace numpy.searchsorted(a, v, side='right')

From 922544c3ae21d018db7600d4b466a2ae40d107fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:46:07 -0600
Subject: [PATCH 131/416] Fixed calling function

---
 tests/test_gpu_stump.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 1e79fb577..108ac0d91 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -2,6 +2,7 @@
 import numpy.testing as npt
 import pandas as pd
 from stumpy import core, gpu_stump
+from stumpy.gpu_stump import _gpu_searchsorted_left, _gpu_searchsorted_right
 from stumpy import config
 from numba import cuda
 
@@ -47,31 +48,31 @@ def test_gpu_searchsorted():
         for i in range(n):
             v = a[i] - 0.001
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                _gpu_searchsorted_left(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="left"),
             )
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                _gpu_searchsorted_right(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="right"),
             )
 
             v = a[i]
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                _gpu_searchsorted_left(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="left"),
             )
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                _gpu_searchsorted_right(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="right"),
             )
 
             v = a[i] + 0.001
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                _gpu_searchsorted_left(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="left"),
             )
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                _gpu_searchsorted_right(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="right"),
             )
 

From 102979b1235e00744567484b97a774658d3b2e1d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:51:31 -0600
Subject: [PATCH 132/416] Revised the test function for merge_topk_PI

---
 tests/test_core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 528286061..a297dd3fa 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1066,11 +1066,12 @@ def test_merge_topk_PI():
     k = 5
 
     PA = np.random.rand(n * k).reshape(n, k)
-    PA = np.sort(PA, axis=1)
+    PA = np.sort(PA, axis=1)  # sorting each row separately
 
     PB = np.random.rand(n * k).reshape(n, k)
+
     col_idx = np.random.randint(0, k, size=n)
-    for i in range(n):
+    for i in range(n): # creating ties between values of PA and PB
         PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
     PB = np.sort(PB, axis=1)
 

From a8aecf6679a9dbb02be80cdf75cf55ce99ae6aae Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 09:18:04 -0600
Subject: [PATCH 133/416] Revise docstrings

---
 stumpy/gpu_stump.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 99a3ba839..35bf3f12f 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -18,7 +18,7 @@
 @jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
-    A function equivalent to numpy.searchsorted(a, v, side='left'), designed
+    Equivalent to numpy.searchsorted(a, v, side='left'), designed
     to be used mainly as device function
 
     Parameters
@@ -64,7 +64,8 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
 @jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    Device function to replace numpy.searchsorted(a, v, side='right')
+    Equivalent to numpy.searchsorted(a, v, side='left'), designed
+    to be used mainly as device function
 
     Parameters
     ----------

From 38318ecdb8ab602d8ceb9d8afe4d1abc1b6ed9ed Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 09:19:31 -0600
Subject: [PATCH 134/416] Rename variable

---
 stumpy/gpu_stump.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 35bf3f12f..9fb657668 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -124,7 +124,7 @@ def _compute_and_update_PI_kernel(
     Σ_T,
     μ_Q,
     σ_Q,
-    profile_len,
+    w,
     ignore_trivial,
     excl_zone,
     profile,
@@ -179,7 +179,7 @@ def _compute_and_update_PI_kernel(
     σ_Q : numpy.ndarray
         Standard deviation of the query sequence, `Q`
 
-    profile_len : int
+    w : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -247,7 +247,7 @@ def _compute_and_update_PI_kernel(
 
     for j in range(start, QT_out.shape[0], stride):
         zone_start = max(0, j - excl_zone)
-        zone_stop = min(profile_len, j + excl_zone)
+        zone_stop = min(w, j + excl_zone)
 
         if compute_QT:
             QT_out[j] = (
@@ -307,7 +307,7 @@ def _gpu_stump(
     QT_first_fname,
     μ_Q_fname,
     σ_Q_fname,
-    profile_len,
+    w,
     ignore_trivial=True,
     range_start=1,
     device_id=0,
@@ -362,7 +362,7 @@ def _gpu_stump(
         The file name for the standard deviation of the query sequence, `Q`,
         relative to the current sliding window
 
-    profile_len : int
+    w : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -421,7 +421,7 @@ def _gpu_stump(
     Note that left and right matrix profiles are only available for self-joins.
     """
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
-    blocks_per_grid = math.ceil(profile_len / threads_per_block)
+    blocks_per_grid = math.ceil(w / threads_per_block)
 
     T_A = np.load(T_A_fname, allow_pickle=False)
     T_B = np.load(T_B_fname, allow_pickle=False)
@@ -452,14 +452,14 @@ def _gpu_stump(
             device_M_T = cuda.to_device(M_T)
             device_Σ_T = cuda.to_device(Σ_T)
 
-        profile = np.full((profile_len, k), np.inf, dtype=np.float64)
-        indices = np.full((profile_len, k), -1, dtype=np.int64)
+        profile = np.full((w, k), np.inf, dtype=np.float64)
+        indices = np.full((w, k), -1, dtype=np.int64)
 
-        profile_L = np.full(profile_len, np.inf, dtype=np.float64)
-        indices_L = np.full(profile_len, -1, dtype=np.int64)
+        profile_L = np.full(w, np.inf, dtype=np.float64)
+        indices_L = np.full(w, -1, dtype=np.int64)
 
-        profile_R = np.full(profile_len, np.inf, dtype=np.float64)
-        indices_R = np.full(profile_len, -1, dtype=np.int64)
+        profile_R = np.full(w, np.inf, dtype=np.float64)
+        indices_R = np.full(w, -1, dtype=np.int64)
 
         device_profile = cuda.to_device(profile)
         device_profile_L = cuda.to_device(profile_L)
@@ -480,7 +480,7 @@ def _gpu_stump(
             device_Σ_T,
             device_μ_Q,
             device_σ_Q,
-            profile_len,
+            w,
             ignore_trivial,
             excl_zone,
             device_profile,
@@ -508,7 +508,7 @@ def _gpu_stump(
                 device_Σ_T,
                 device_μ_Q,
                 device_σ_Q,
-                profile_len,
+                w,
                 ignore_trivial,
                 excl_zone,
                 device_profile,
@@ -695,7 +695,7 @@ def gpu_stump(
         logger.warning("Try setting `ignore_trivial = False`.")
 
     n = T_B.shape[0]
-    profile_len = T_A.shape[0] - m + 1
+    w = T_A.shape[0] - m + 1
     l = n - m + 1
     excl_zone = int(
         np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
@@ -765,7 +765,7 @@ def gpu_stump(
                     QT_first_fname,
                     μ_Q_fname,
                     σ_Q_fname,
-                    profile_len,
+                    w,
                     ignore_trivial,
                     start + 1,
                     device_ids[idx],
@@ -794,7 +794,7 @@ def gpu_stump(
                 QT_first_fname,
                 μ_Q_fname,
                 σ_Q_fname,
-                profile_len,
+                w,
                 ignore_trivial,
                 start + 1,
                 device_ids[idx],
@@ -866,7 +866,7 @@ def gpu_stump(
         indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
 
     out = np.empty(
-        (profile_len, 2 * k + 2), dtype=object
+        (w, 2 * k + 2), dtype=object
     )  # last two columns are to store
     # (top-1) left/right matrix profile indices
     out[:, :k] = profile[0]

From 76f97cbb896f0d66819022cb0acfc43e011d67c0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 14:38:11 -0600
Subject: [PATCH 135/416] Corrected format

---
 stumpy/gpu_stump.py | 8 +++-----
 tests/test_core.py  | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 9fb657668..371bbeaa4 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -15,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 
-@jit # equivalent to `__host__ __device__` in C++ CUDA
+@jit  # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
     Equivalent to numpy.searchsorted(a, v, side='left'), designed
@@ -61,7 +61,7 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
     return idx
 
 
-@jit # equivalent to `__host__ __device__` in C++ CUDA
+@jit  # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Equivalent to numpy.searchsorted(a, v, side='left'), designed
@@ -865,9 +865,7 @@ def gpu_stump(
         profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
         indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
 
-    out = np.empty(
-        (w, 2 * k + 2), dtype=object
-    )  # last two columns are to store
+    out = np.empty((w, 2 * k + 2), dtype=object)  # last two columns are to store
     # (top-1) left/right matrix profile indices
     out[:, :k] = profile[0]
     out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
diff --git a/tests/test_core.py b/tests/test_core.py
index a297dd3fa..63a33d1d0 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1071,7 +1071,7 @@ def test_merge_topk_PI():
     PB = np.random.rand(n * k).reshape(n, k)
 
     col_idx = np.random.randint(0, k, size=n)
-    for i in range(n): # creating ties between values of PA and PB
+    for i in range(n):  # creating ties between values of PA and PB
         PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
     PB = np.sort(PB, axis=1)
 

From 157944d358c3452b25cfdbf6ac79d38795fd478f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 15:58:19 -0600
Subject: [PATCH 136/416] All test passed and  full coverage


From 8a31eff92cbcd3f67f4943820bad957291006e6f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 16:10:26 -0600
Subject: [PATCH 137/416] Fixed typo

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 371bbeaa4..d6e02f669 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -64,7 +64,7 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
 @jit  # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    Equivalent to numpy.searchsorted(a, v, side='left'), designed
+    Equivalent to numpy.searchsorted(a, v, side='right'), designed
     to be used mainly as device function
 
     Parameters

From 9610d7445fa9f715dd6b5f8e7c69c178950774f0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 19:50:25 -0600
Subject: [PATCH 138/416] Change decorator to create device function

---
 stumpy/gpu_stump.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index d6e02f669..3a37db6ee 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda, jit
+from numba import cuda
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,11 +15,10 @@
 logger = logging.getLogger(__name__)
 
 
-@jit  # equivalent to `__host__ __device__` in C++ CUDA
+@cuda.jit(device=True)
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
-    Equivalent to numpy.searchsorted(a, v, side='left'), designed
-    to be used mainly as device function
+    A device function, equivalent to numpy.searchsorted(a, v, side='left')
 
     Parameters
     ----------
@@ -61,11 +60,10 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
     return idx
 
 
-@jit  # equivalent to `__host__ __device__` in C++ CUDA
+@cuda.jit(device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    Equivalent to numpy.searchsorted(a, v, side='right'), designed
-    to be used mainly as device function
+    A device function, equivalent to numpy.searchsorted(a, v, side='right')
 
     Parameters
     ----------

From 9ce12a3127fd1a646302f1d98c97da3666f8e149 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 19:54:32 -0600
Subject: [PATCH 139/416] Fixed typos

---
 stumpy/gpu_stump.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 3a37db6ee..63366a183 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -432,7 +432,7 @@ def _gpu_stump(
 
     device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
     nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
-    # number of levels in # binary seearch tree from which `bfs` is constructed.
+    # number of levels in binary seearch tree from which `bfs` is constructed.
 
     with cuda.gpus[device_id]:
         device_T_A = cuda.to_device(T_A)
@@ -853,12 +853,12 @@ def gpu_stump(
         # Update (top-k) matrix profile and matrix profile indices
         core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 
-        # Update (top-1) left matrix profile and matrix profil indices
+        # Update (top-1) left matrix profile and matrix profile indices
         cond = profile_L[0] < profile_L[i]
         profile_L[0] = np.where(cond, profile_L[0], profile_L[i])
         indices_L[0] = np.where(cond, indices_L[0], indices_L[i])
 
-        # Update (top-1) right matrix profile and matrix profil indices
+        # Update (top-1) right matrix profile and matrix profile indices
         cond = profile_R[0] < profile_R[i]
         profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
         indices_R[0] = np.where(cond, indices_R[0], indices_R[i])

From 4f2ea6c321bf40a33afd5e72ea7b9553a2e55c3f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 20:01:17 -0600
Subject: [PATCH 140/416] Rename function to improve readability

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 3074c2359..101b6857a 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -156,7 +156,7 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):  # pragma: no cover
     return result
 
 
-def searchsorted(a, v):
+def searchsorted_right(a, v):
     """
     Naive version of numpy.searchsorted(..., side='right')
     """

From e4ae016a777791a0b32bc7a09029006e2179f211 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 20:32:25 -0600
Subject: [PATCH 141/416] Enhance test function to test with different values
 of k

---
 tests/test_gpu_stump.py | 20 ++++++++++----------
 tests/test_stump.py     | 22 +++++++++++-----------
 tests/test_stumped.py   | 16 ++++++++--------
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 108ac0d91..99b6af68d 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -393,15 +393,15 @@ def test_gpu_stump_nan_zero_mean_self_join():
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_gpu_stump_self_join_KNN(T_A, T_B):
-    k = 3
     m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
-    comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+    for k in range(1, 4):
+        zone = int(np.ceil(m / 4))
+        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
+        comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
 
-    comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+        comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stump.py b/tests/test_stump.py
index af2a2315e..fcfccdea6 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -244,15 +244,15 @@ def test_stump_nan_zero_mean_self_join():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
-    k = 3
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+    for k in range(4):
+        m = 3
+        zone = int(np.ceil(m / 4))
+        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+        comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
 
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+        comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index 02e914436..bcf6f26c1 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -617,11 +617,11 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap(
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
-        k = 3
-        m = 3
-        zone = int(np.ceil(m / 4))
-        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-        comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
-        naive.replace_inf(ref_mp)
-        naive.replace_inf(comp_mp)
-        npt.assert_almost_equal(ref_mp, comp_mp)
+        for k in range(4):
+            m = 3
+            zone = int(np.ceil(m / 4))
+            ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+            comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
+            naive.replace_inf(ref_mp)
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)

From 5565904e5c101d7548da9e413a8d6191e5741ede Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 20:40:24 -0600
Subject: [PATCH 142/416] Add test for k>1 for AB-join

---
 tests/test_gpu_stump.py | 12 ++++++++++++
 tests/test_stump.py     | 17 ++++++++++++++++-
 tests/test_stumped.py   | 18 +++++++++++++++++-
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 99b6af68d..7c984164a 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -405,3 +405,15 @@ def test_gpu_stump_self_join_KNN(T_A, T_B):
         comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_gpu_stump_A_B_join_KNN(T_A, T_B):
+    for k in range(1, 4):
+        m = 3
+        ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True, k=k)
+        comp_mp = gpu_stump(T_B, m, T_A, ignore_trivial=False, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stump.py b/tests/test_stump.py
index fcfccdea6..029fc2696 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -244,7 +244,7 @@ def test_stump_nan_zero_mean_self_join():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
-    for k in range(4):
+    for k in range(1, 4):
         m = 3
         zone = int(np.ceil(m / 4))
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
@@ -256,3 +256,18 @@ def test_stump_self_join_KNN(T_A, T_B):
         comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_A_B_join_KNN(T_A, T_B):
+    for k in range(1, 4):
+        m = 3
+        ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
+        comp_mp = stump(T_A, m, T_B, ignore_trivial=False, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
+
+        comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index bcf6f26c1..363a58432 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -617,7 +617,7 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap(
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
-        for k in range(4):
+        for k in range(1, 4):
             m = 3
             zone = int(np.ceil(m / 4))
             ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
@@ -625,3 +625,19 @@ def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
             naive.replace_inf(ref_mp)
             naive.replace_inf(comp_mp)
             npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore:numpy.dtype size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed")
+@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning")
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stumped_A_B_join_KNN(T_A, T_B, dask_cluster):
+    with Client(dask_cluster) as dask_client:
+        for k in range(1, 4):
+            m = 3
+            ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
+            comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False, k=k)
+            naive.replace_inf(ref_mp)
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)

From bf6edcc51a8ce6f919e0724fdc36846e2a023cf6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 22:40:57 -0600
Subject: [PATCH 143/416] Add wrapper kernel for device function and change
 design of test function

---
 tests/test_gpu_stump.py | 78 ++++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 32 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 7c984164a..9edea0dcd 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -40,41 +40,55 @@ def test_gpu_stump_int_input():
         gpu_stump(np.arange(10), 5, ignore_trivial=True)
 
 
-def test_gpu_searchsorted():
-    for n in range(1, 100):
-        a = np.sort(np.random.rand(n))
-        bfs = core._bfs_indices(n, fill_value=-1)
-        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
-        for i in range(n):
-            v = a[i] - 0.001
-            npt.assert_almost_equal(
-                _gpu_searchsorted_left(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="left"),
-            )
-            npt.assert_almost_equal(
-                _gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
+@cuda.jit("(f8[:, :], f8[:], i8[:], i8, b1, i8[:])")
+def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
+    # A wrapper kernel for calling device function _gpu_searchsorted_left/right.
+    i = cuda.grid(1)
+    if i < A.shape[0]:
+        if is_left:
+            IDX[i] = _gpu_searchsorted_left(A[i], V[i], bfs, nlevel)
+        else:
+            IDX[i] = _gpu_searchsorted_right(A[i], V[i], bfs, nlevel)
 
-            v = a[i]
-            npt.assert_almost_equal(
-                _gpu_searchsorted_left(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="left"),
-            )
-            npt.assert_almost_equal(
-                _gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
 
-            v = a[i] + 0.001
-            npt.assert_almost_equal(
-                _gpu_searchsorted_left(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="left"),
-            )
-            npt.assert_almost_equal(
-                _gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
+def test_gpu_searchsorted():
+    n = 5000
+    for k in range(1, 21):
+        bfs = core._bfs_indices(k, fill_value=-1)
+        nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
+
+        A = np.sort(np.random.rand(n, k), axis=1)
+        V = np.empty(n)
+        col_idx = np.random.randint(0, k, size=n)
+        diff = [-0.001, 0, 0.001]
+        for i in range(n):  # creating ties between values of PA and PB
+            V[i] = np.random.choice(A[i, col_idx[i]], size=1, replace=False)
+            V[i] += diff[i % 3]
+
+        device_A = cuda.to_device(A)
+        device_V = cuda.to_device(V)
+        device_bfs = cuda.to_device(bfs)
+        for is_left in [True, False]:
+            if is_left:
+                side = 'left'
+            else:
+                side = 'right'
+
+            ref_IDX =  np.full(n, -1, dtype=np.int64)
+            for i in range(n):
+                ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
+
+            comp_IDX = np.full(n, -1, dtype=np.int64)
+            device_comp_IDX = cuda.to_device(comp_IDX)
+
+            threads_per_block = config.STUMPY_THREADS_PER_BLOCK
+            blocks_per_grid = math.ceil(n / threads_per_block)
+            _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
+            device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
             )
+            comp_IDX = device_comp_IDX.copy_to_host()
+
+            npt.assert_array_equal(ref_IDX, comp_IDX)
 
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)

From 1b7d971865ab24b7fc423c7a131e1c74c61378c9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 22:45:51 -0600
Subject: [PATCH 144/416] minor corrections

---
 tests/naive.py          | 4 ++--
 tests/test_gpu_stump.py | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 101b6857a..712bfee1b 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -239,14 +239,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
             for i in iter_range:
                 D = distance_matrix[i, i + g]  # D: a single element
                 if D < P[i, k - 1]:
-                    idx = searchsorted(P[i], D)
+                    idx = searchsorted_right(P[i], D)
                     # to keep the top-k, we must get rid of the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
                 if ignore_trivial:  # Self-joins only
                     if D < P[i + g, k - 1]:
-                        idx = searchsorted(P[i + g], D)
+                        idx = searchsorted_right(P[i + g], D)
                         P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                         I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 
diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 9edea0dcd..908537833 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -1,3 +1,4 @@
+import math
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
@@ -70,11 +71,11 @@ def test_gpu_searchsorted():
         device_bfs = cuda.to_device(bfs)
         for is_left in [True, False]:
             if is_left:
-                side = 'left'
+                side = "left"
             else:
-                side = 'right'
+                side = "right"
 
-            ref_IDX =  np.full(n, -1, dtype=np.int64)
+            ref_IDX = np.full(n, -1, dtype=np.int64)
             for i in range(n):
                 ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
 
@@ -84,7 +85,7 @@ def test_gpu_searchsorted():
             threads_per_block = config.STUMPY_THREADS_PER_BLOCK
             blocks_per_grid = math.ceil(n / threads_per_block)
             _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
-            device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
+                device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
             )
             comp_IDX = device_comp_IDX.copy_to_host()
 

From 7f65b946c592007b75d881f79064f9261a5e4f9c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 23:41:49 -0600
Subject: [PATCH 145/416] Fixed minor bug

---
 tests/test_gpu_stump.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 908537833..3b24f0e9d 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -54,7 +54,7 @@ def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
 
 def test_gpu_searchsorted():
     n = 5000
-    for k in range(1, 21):
+    for k in range(1, 100):
         bfs = core._bfs_indices(k, fill_value=-1)
         nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
 
@@ -62,9 +62,8 @@ def test_gpu_searchsorted():
         V = np.empty(n)
         col_idx = np.random.randint(0, k, size=n)
         diff = [-0.001, 0, 0.001]
-        for i in range(n):  # creating ties between values of PA and PB
-            V[i] = np.random.choice(A[i, col_idx[i]], size=1, replace=False)
-            V[i] += diff[i % 3]
+        for i in range(n):
+            V[i] = A[i, col_idx[i]] + diff[i % 3]
 
         device_A = cuda.to_device(A)
         device_V = cuda.to_device(V)

From 10878fdd997492eb85e51f5f6be574f76ba22ef3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 1 Jun 2022 23:59:23 -0600
Subject: [PATCH 146/416] Swap TA and TB to allow k to not be bigger than
 length of distance profile

---
 tests/test_gpu_stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 3b24f0e9d..f73bd389c 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -426,8 +426,8 @@ def test_gpu_stump_self_join_KNN(T_A, T_B):
 def test_gpu_stump_A_B_join_KNN(T_A, T_B):
     for k in range(1, 4):
         m = 3
-        ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True, k=k)
-        comp_mp = gpu_stump(T_B, m, T_A, ignore_trivial=False, k=k)
+        ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True, k=k)
+        comp_mp = gpu_stump(T_A, m, T_B, ignore_trivial=False, k=k)
         naive.replace_inf(ref_mp)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)

From d282dfdb0b7e985d0d15db4bdc64a8905565b0c2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 2 Jun 2022 10:18:54 -0600
Subject: [PATCH 147/416] Redesign test function

---
 tests/test_gpu_stump.py | 53 ++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index f73bd389c..67c9ec0f9 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -54,41 +54,46 @@ def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
 
 def test_gpu_searchsorted():
     n = 5000
-    for k in range(1, 100):
+    threads_per_block = config.STUMPY_THREADS_PER_BLOCK
+    blocks_per_grid = math.ceil(n / threads_per_block)
+
+    for k in range(1, 32):
         bfs = core._bfs_indices(k, fill_value=-1)
         nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
 
         A = np.sort(np.random.rand(n, k), axis=1)
         V = np.empty(n)
-        col_idx = np.random.randint(0, k, size=n)
         diff = [-0.001, 0, 0.001]
         for i in range(n):
-            V[i] = A[i, col_idx[i]] + diff[i % 3]
+            V[i] = A[i, i % k] + diff[i % 3]
 
         device_A = cuda.to_device(A)
         device_V = cuda.to_device(V)
         device_bfs = cuda.to_device(bfs)
-        for is_left in [True, False]:
-            if is_left:
-                side = "left"
-            else:
-                side = "right"
-
-            ref_IDX = np.full(n, -1, dtype=np.int64)
-            for i in range(n):
-                ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
-
-            comp_IDX = np.full(n, -1, dtype=np.int64)
-            device_comp_IDX = cuda.to_device(comp_IDX)
-
-            threads_per_block = config.STUMPY_THREADS_PER_BLOCK
-            blocks_per_grid = math.ceil(n / threads_per_block)
-            _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
-                device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
-            )
-            comp_IDX = device_comp_IDX.copy_to_host()
-
-            npt.assert_array_equal(ref_IDX, comp_IDX)
+
+        side = "left"  # is_left = True
+        ref_IDX = np.full(n, -1, dtype=np.int64)
+        for i in range(n):
+            ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
+        comp_IDX = np.full(n, -1, dtype=np.int64)
+        device_comp_IDX = cuda.to_device(comp_IDX)
+        _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
+            device_A, device_V, device_bfs, nlevel, True, device_comp_IDX
+        )
+        comp_IDX = device_comp_IDX.copy_to_host()
+        npt.assert_array_equal(ref_IDX, comp_IDX)
+
+        side = "right"  # is_left = False
+        ref_IDX = np.full(n, -1, dtype=np.int64)
+        for i in range(n):
+            ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
+        comp_IDX = np.full(n, -1, dtype=np.int64)
+        device_comp_IDX = cuda.to_device(comp_IDX)
+        _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
+            device_A, device_V, device_bfs, nlevel, False, device_comp_IDX
+        )
+        comp_IDX = device_comp_IDX.copy_to_host()
+        npt.assert_array_equal(ref_IDX, comp_IDX)
 
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)

From 9de9dd2e5242026bb42dcdf31d317f1d4d89a37e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 2 Jun 2022 10:29:16 -0600
Subject: [PATCH 148/416] minor refactoring

---
 tests/test_gpu_stump.py | 4 ++--
 tests/test_stump.py     | 6 +++---
 tests/test_stumped.py   | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 67c9ec0f9..aa70cd114 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -413,8 +413,8 @@ def test_gpu_stump_nan_zero_mean_self_join():
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_gpu_stump_self_join_KNN(T_A, T_B):
     m = 3
+    zone = int(np.ceil(m / 4))
     for k in range(1, 4):
-        zone = int(np.ceil(m / 4))
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
         comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
         naive.replace_inf(ref_mp)
@@ -429,8 +429,8 @@ def test_gpu_stump_self_join_KNN(T_A, T_B):
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_gpu_stump_A_B_join_KNN(T_A, T_B):
+    m = 3
     for k in range(1, 4):
-        m = 3
         ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True, k=k)
         comp_mp = gpu_stump(T_A, m, T_B, ignore_trivial=False, k=k)
         naive.replace_inf(ref_mp)
diff --git a/tests/test_stump.py b/tests/test_stump.py
index 029fc2696..3e0b34299 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -244,9 +244,9 @@ def test_stump_nan_zero_mean_self_join():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
+    m = 3
+    zone = int(np.ceil(m / 4))
     for k in range(1, 4):
-        m = 3
-        zone = int(np.ceil(m / 4))
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
         comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
         naive.replace_inf(ref_mp)
@@ -260,8 +260,8 @@ def test_stump_self_join_KNN(T_A, T_B):
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_A_B_join_KNN(T_A, T_B):
+    m = 3
     for k in range(1, 4):
-        m = 3
         ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
         comp_mp = stump(T_A, m, T_B, ignore_trivial=False, k=k)
         naive.replace_inf(ref_mp)
diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index 363a58432..7e8b053d3 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -617,9 +617,9 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap(
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
+        m = 3
+        zone = int(np.ceil(m / 4))
         for k in range(1, 4):
-            m = 3
-            zone = int(np.ceil(m / 4))
             ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
             comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
             naive.replace_inf(ref_mp)
@@ -634,8 +634,8 @@ def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stumped_A_B_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
+        m = 3
         for k in range(1, 4):
-            m = 3
             ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
             comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False, k=k)
             naive.replace_inf(ref_mp)

From 9789cd9ff98483a74e25bdc15d3d31d79769ce36 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 4 Jun 2022 03:00:30 -0600
Subject: [PATCH 149/416] Extend test function to test with different values of
 parameter k

---
 tests/test_core.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 63a33d1d0..f83d09504 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1063,29 +1063,27 @@ def test_select_P_ABBA_val_inf():
 
 def test_merge_topk_PI():
     n = 50
-    k = 5
+    for k in range(1, 6):
+        PA = np.random.rand(n * k).reshape(n, k)
+        PA = np.sort(PA, axis=1)  # sorting each row separately
 
-    PA = np.random.rand(n * k).reshape(n, k)
-    PA = np.sort(PA, axis=1)  # sorting each row separately
+        PB = np.random.rand(n * k).reshape(n, k)
+        col_idx = np.random.randint(0, k, size=n)
+        for i in range(n):  # creating ties between values of PA and PB
+            PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
+        PB = np.sort(PB, axis=1)  # sorting each row separately
 
-    PB = np.random.rand(n * k).reshape(n, k)
+        IA = np.arange(n * k).reshape(n, k)
+        IB = IA + n * k
 
-    col_idx = np.random.randint(0, k, size=n)
-    for i in range(n):  # creating ties between values of PA and PB
-        PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
-    PB = np.sort(PB, axis=1)
+        ref_P = PA.copy()
+        ref_I = IA.copy()
 
-    IA = np.arange(n * k).reshape(n, k)
-    IB = IA + n * k
+        comp_P = PA.copy()
+        comp_I = IA.copy()
 
-    ref_P = PA.copy()
-    ref_I = IA.copy()
+        naive.merge_topk_PI(ref_P, PB, ref_I, IB)
+        core._merge_topk_PI(comp_P, PB, comp_I, IB)
 
-    comp_P = PA.copy()
-    comp_I = IA.copy()
-
-    naive.merge_topk_PI(ref_P, PB, ref_I, IB)
-    core._merge_topk_PI(comp_P, PB, comp_I, IB)
-
-    npt.assert_array_equal(ref_P, comp_P)
-    npt.assert_array_equal(ref_I, comp_I)
+        npt.assert_array_equal(ref_P, comp_P)
+        npt.assert_array_equal(ref_I, comp_I)

From 6faa6453ef20d7f291bbb56cd3a3168ac0853214 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 4 Jun 2022 04:42:28 -0600
Subject: [PATCH 150/416] Minor changes in test function

---
 tests/test_gpu_stump.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index aa70cd114..071337cd5 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -58,39 +58,37 @@ def test_gpu_searchsorted():
     blocks_per_grid = math.ceil(n / threads_per_block)
 
     for k in range(1, 32):
-        bfs = core._bfs_indices(k, fill_value=-1)
+        device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
         nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
 
         A = np.sort(np.random.rand(n, k), axis=1)
-        V = np.empty(n)
-        diff = [-0.001, 0, 0.001]
-        for i in range(n):
-            V[i] = A[i, i % k] + diff[i % 3]
-
         device_A = cuda.to_device(A)
+
+        V = np.random.rand(n)
+        for i, idx in enumerate(np.random.choice(np.arange(n), size=k, replace=False)):
+            V[idx] = A[idx, i]  # create ties
         device_V = cuda.to_device(V)
-        device_bfs = cuda.to_device(bfs)
 
-        side = "left"  # is_left = True
-        ref_IDX = np.full(n, -1, dtype=np.int64)
-        for i in range(n):
-            ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
+        is_left = True  # test case
+        ref_IDX = [np.searchsorted(A[i], V[i], side="left") for i in range(n)]
+        ref_IDX = np.asarray(ref_IDX, dtype=np.int64)
+
         comp_IDX = np.full(n, -1, dtype=np.int64)
         device_comp_IDX = cuda.to_device(comp_IDX)
         _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
-            device_A, device_V, device_bfs, nlevel, True, device_comp_IDX
+            device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
         )
         comp_IDX = device_comp_IDX.copy_to_host()
         npt.assert_array_equal(ref_IDX, comp_IDX)
 
-        side = "right"  # is_left = False
-        ref_IDX = np.full(n, -1, dtype=np.int64)
-        for i in range(n):
-            ref_IDX[i] = np.searchsorted(A[i], V[i], side=side)
+        is_left = False  # test case
+        ref_IDX = [np.searchsorted(A[i], V[i], side="right") for i in range(n)]
+        ref_IDX = np.asarray(ref_IDX, dtype=np.int64)
+
         comp_IDX = np.full(n, -1, dtype=np.int64)
         device_comp_IDX = cuda.to_device(comp_IDX)
         _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
-            device_A, device_V, device_bfs, nlevel, False, device_comp_IDX
+            device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
         )
         comp_IDX = device_comp_IDX.copy_to_host()
         npt.assert_array_equal(ref_IDX, comp_IDX)

From caee6994451d12154ab980cc5342d7ebc2ba214f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 7 Jun 2022 10:58:40 -0600
Subject: [PATCH 151/416] Exclude test for k=1 to avoid redundancy

---
 tests/test_gpu_stump.py | 4 ++--
 tests/test_stump.py     | 6 +++---
 tests/test_stumped.py   | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 071337cd5..14f435dda 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -412,7 +412,7 @@ def test_gpu_stump_nan_zero_mean_self_join():
 def test_gpu_stump_self_join_KNN(T_A, T_B):
     m = 3
     zone = int(np.ceil(m / 4))
-    for k in range(1, 4):
+    for k in range(2, 4):
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
         comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
         naive.replace_inf(ref_mp)
@@ -428,7 +428,7 @@ def test_gpu_stump_self_join_KNN(T_A, T_B):
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_gpu_stump_A_B_join_KNN(T_A, T_B):
     m = 3
-    for k in range(1, 4):
+    for k in range(2, 4):
         ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True, k=k)
         comp_mp = gpu_stump(T_A, m, T_B, ignore_trivial=False, k=k)
         naive.replace_inf(ref_mp)
diff --git a/tests/test_stump.py b/tests/test_stump.py
index 3e0b34299..df8912829 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -246,11 +246,11 @@ def test_stump_nan_zero_mean_self_join():
 def test_stump_self_join_KNN(T_A, T_B):
     m = 3
     zone = int(np.ceil(m / 4))
-    for k in range(1, 4):
+    for k in range(2, 4):
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
         comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
         naive.replace_inf(ref_mp)
-        naive.replace_inf(comp_mp)
+        naive.replace_insf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
 
         comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
@@ -261,7 +261,7 @@ def test_stump_self_join_KNN(T_A, T_B):
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_A_B_join_KNN(T_A, T_B):
     m = 3
-    for k in range(1, 4):
+    for k in range(2, 4):
         ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
         comp_mp = stump(T_A, m, T_B, ignore_trivial=False, k=k)
         naive.replace_inf(ref_mp)
diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index 7e8b053d3..9181d81c8 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -619,7 +619,7 @@ def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
         m = 3
         zone = int(np.ceil(m / 4))
-        for k in range(1, 4):
+        for k in range(2, 4):
             ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
             comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
             naive.replace_inf(ref_mp)
@@ -635,7 +635,7 @@ def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
 def test_stumped_A_B_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
         m = 3
-        for k in range(1, 4):
+        for k in range(2, 4):
             ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
             comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False, k=k)
             naive.replace_inf(ref_mp)

From 493f6cbcd714c822e3cfb61173e3e7bc892765cb Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 7 Jun 2022 11:02:07 -0600
Subject: [PATCH 152/416] Revise test function

- Make parameter `n` a function of config setting
- Add filterwarning
---
 tests/test_gpu_stump.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 14f435dda..ef8c03c1d 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -52,8 +52,10 @@ def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
             IDX[i] = _gpu_searchsorted_right(A[i], V[i], bfs, nlevel)
 
 
+@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 def test_gpu_searchsorted():
-    n = 5000
+    n = 3 * config.STUMPY_THREADS_PER_BLOCK + 1
+
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
     blocks_per_grid = math.ceil(n / threads_per_block)
 

From 986f4697966aa344f31f14ab5a09501fe923805b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 7 Jun 2022 11:38:58 -0600
Subject: [PATCH 153/416] Fixed typo

---
 tests/test_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index df8912829..e08746758 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -250,7 +250,7 @@ def test_stump_self_join_KNN(T_A, T_B):
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
         comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
         naive.replace_inf(ref_mp)
-        naive.replace_insf(comp_mp)
+        naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
 
         comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)

From b438c9c9e9fe167a7821e1fd0703030104e08012 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 7 Jun 2022 12:08:21 -0600
Subject: [PATCH 154/416] Avoided creating new array in memory

---
 tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index f83d09504..7998c042d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1065,13 +1065,13 @@ def test_merge_topk_PI():
     n = 50
     for k in range(1, 6):
         PA = np.random.rand(n * k).reshape(n, k)
-        PA = np.sort(PA, axis=1)  # sorting each row separately
+        PA[:, :] = np.sort(PA, axis=1)  # sorting each row separately
 
         PB = np.random.rand(n * k).reshape(n, k)
         col_idx = np.random.randint(0, k, size=n)
         for i in range(n):  # creating ties between values of PA and PB
             PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
-        PB = np.sort(PB, axis=1)  # sorting each row separately
+        PB[:, :] = np.sort(PB, axis=1)  # sorting each row separately
 
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k

From a54c8789b4488568ce465bb32b4fb73e31fb5c3b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 02:06:10 -0600
Subject: [PATCH 155/416] Improve naive.prescrump to return TopK matrix profile

---
 tests/naive.py | 71 ++++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index fabe3d922..b742c86dd 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1403,16 +1403,14 @@ def aampdist_snippets(
     )
 
 
-def prescrump(T_A, m, T_B, s, exclusion_zone=None):
+def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
     dist_matrix = distance_matrix(T_A, T_B, m)
 
     n_A = T_A.shape[0]
     l = n_A - m + 1
 
-    P = np.empty(l)
-    I = np.empty(l, dtype=np.int64)
-    P[:] = np.inf
-    I[:] = -1
+    P = np.full((l, k), np.inf, dtype=np.float64)
+    I = np.full((l, k), -1, dtype=np.int64)
 
     for i in np.random.permutation(range(0, l, s)):
         distance_profile = dist_matrix[i]
@@ -1420,33 +1418,44 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None):
             apply_exclusion_zone(distance_profile, i, exclusion_zone, np.inf)
 
             # only for self-join
-            mask = distance_profile < P
-            P[mask] = distance_profile[mask]
-            I[mask] = i
-
-        I[i] = np.argmin(distance_profile)
-        P[i] = distance_profile[I[i]]
-        if P[i] == np.inf:
-            I[i] = -1
+            for idx in np.flatnonzero(distance_profile < P[:, -1]):
+                pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
+                P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
+                I[idx] = np.insert(I[idx], pos, i)[:-1]
+
+        I[i, 1:] = I[i, :-1]
+        I[i, 0] = np.argmin(distance_profile)
+        P[i, 1:] = P[i, :-1]
+        P[i, 0] = distance_profile[I[i, 0]]
+        if P[i, 0] == np.inf:
+            I[i, 0] = -1
         else:
-            j = I[i]
-            for k in range(1, min(s, l - max(i, j))):
-                d = dist_matrix[i + k, j + k]
-                if d < P[i + k]:
-                    P[i + k] = d
-                    I[i + k] = j + k
-                if d < P[j + k]:
-                    P[j + k] = d
-                    I[j + k] = i + k
-
-            for k in range(1, min(s, i + 1, j + 1)):
-                d = dist_matrix[i - k, j - k]
-                if d < P[i - k]:
-                    P[i - k] = d
-                    I[i - k] = j - k
-                if d < P[j - k]:
-                    P[j - k] = d
-                    I[j - k] = i - k
+            j = I[i, 0]  # index of 1st NN
+            for g in range(1, min(s, l - max(i, j))):
+                d = dist_matrix[i + g, j + g]
+                if d < P[i + g, -1]:
+                    pos = np.searchsorted(P[i + g], d, side="right")
+                    P[i + g] = np.insert(P[i + g], pos, d)[:-1]
+                    I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
+                if d < P[j + g]:
+                    pos = np.searchsorted(P[j + g], d, side="right")
+                    P[j + g] = np.insert(P[j + g], pos, d)[:-1]
+                    I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
+
+            for g in range(1, min(s, i + 1, j + 1)):
+                d = dist_matrix[i - g, j - g]
+                if d < P[i - g, -1]:
+                    pos = np.searchsorted(P[i - g], d, side="right")
+                    P[i - g] = np.insert(P[i - g], pos, d)[:-1]
+                    I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
+                if d < P[j - g]:
+                    pos = np.searchsorted(P[j - g], d, side="right")
+                    P[j - g] = np.insert(P[j - g], pos, d)[:-1]
+                    I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
+
+    if k == 1:
+        P = P.ravel()
+        I = I.ravel()
 
     return P, I
 

From 1bf2fc29b0913855507e7945ce00f215ab6d8a74 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 02:06:16 -0600
Subject: [PATCH 156/416] test_scrump passed


From 647ec3ebe3f35cc5d353a1f9eae1f12dc9321ad8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 02:14:04 -0600
Subject: [PATCH 157/416] Add new test function for prescrump TopK matrix
 profile

---
 tests/test_scrump.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index ff96d9eee..8ba48a024 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -690,3 +690,21 @@ def test_scrump_nan_zero_mean_self_join(percentages):
         npt.assert_almost_equal(ref_I, comp_I)
         npt.assert_almost_equal(ref_left_I, comp_left_I)
         npt.assert_almost_equal(ref_right_I, comp_right_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_prescrump_self_join_KNN(T_A, T_B):
+    m = 3
+    zone = int(np.ceil(m / 4))
+    for k in range(2, 4):
+        for s in range(1, zone + 1):
+            seed = np.random.randint(100000)
+
+            np.random.seed(seed)
+            ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone, k=k)
+
+            np.random.seed(seed)
+            comp_P, comp_I = prescrump(T_B, m, s=s, k=k)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)

From 6e35c7192cc35f9dbc2db08cd85d9aee0386e5f2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 03:49:29 -0600
Subject: [PATCH 158/416] Enhance performance prescrump to return top-k matrix
 profile

---
 stumpy/scrump.py | 191 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 140 insertions(+), 51 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 53d10b612..1c3e15c14 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -31,9 +31,10 @@ def _compute_PI(
     P_squared,
     I,
     excl_zone=None,
+    k=1,
 ):
     """
-    Compute (Numba JIT-compiled) and update the squared matrix profile distance
+    Compute (Numba JIT-compiled) and update the squared (top-k) matrix profile distance
     and matrix profile indces according to the preSCRIMP algorithm
 
     Parameters
@@ -78,14 +79,19 @@ def _compute_PI(
         `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
 
     P_squared : numpy.ndarray
-        The squared matrix profile
+        The squared (top-k) matrix profile
 
     I : numpy.ndarray
-        The matrix profile indices
+        The (top-k) matrix profile indices
 
     excl_zone : int
         The half width for the exclusion zone relative to the `i`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Returns
     -------
     None
@@ -112,58 +118,119 @@ def _compute_PI(
             squared_distance_profile[zone_start : zone_stop + 1] = np.inf
 
             # only for self-join
-            mask = squared_distance_profile < P_squared[thread_idx]
-            P_squared[thread_idx][mask] = squared_distance_profile[mask]
-            I[thread_idx][mask] = i
-
-        I[thread_idx, i] = np.argmin(squared_distance_profile)
-        P_squared[thread_idx, i] = squared_distance_profile[I[thread_idx, i]]
-        if P_squared[thread_idx, i] == np.inf:  # pragma: no cover
-            I[thread_idx, i] = -1
+            IDX = np.flatnonzero(
+                squared_distance_profile < P_squared[thread_idx, :, -1]
+            )
+            for idx in IDX:
+                pos = np.searchsorted(
+                    P_squared[thread_idx, idx],
+                    squared_distance_profile[idx],
+                    side="right",
+                )
+                # shifting to the right
+                for loc in range(k - 1, pos, -1):
+                    P_squared[thread_idx, idx, loc] = P_squared[
+                        thread_idx, idx, loc - 1
+                    ]
+                    I[thread_idx, idx, loc] = I[thread_idx, idx, loc - 1]
+
+                P_squared[thread_idx, idx, pos] = squared_distance_profile[idx]
+                I[thread_idx, idx, pos] = i
+
+        # shifting to the right
+        for loc in range(k - 1, 0, -1):
+            P_squared[thread_idx, i, loc] = P_squared[thread_idx, i, loc - 1]
+            I[thread_idx, i, loc] = I[thread_idx, i, loc - 1]
+
+        I[thread_idx, i, 0] = np.argmin(squared_distance_profile)
+        P_squared[thread_idx, i, 0] = squared_distance_profile[I[thread_idx, i, 0]]
+
+        if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
+            I[thread_idx, i, 0] = -1
         else:
-            j = I[thread_idx, i]
+            j = I[thread_idx, i, 0]
             # Given the squared distance, work backwards and compute QT
-            QT_j = (m - P_squared[thread_idx, i] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
+            QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
                 m * M_T[j] * μ_Q[i]
             )
             QT_j_prime = QT_j
-            for k in range(1, min(s, l - max(i, j))):
+            for g in range(1, min(s, l - max(i, j))):
                 QT_j = (
                     QT_j
-                    - T_B[i + k - 1] * T_A[j + k - 1]
-                    + T_B[i + k + m - 1] * T_A[j + k + m - 1]
+                    - T_B[i + g - 1] * T_A[j + g - 1]
+                    + T_B[i + g + m - 1] * T_A[j + g + m - 1]
                 )
                 D_squared = core._calculate_squared_distance(
                     m,
                     QT_j,
-                    M_T[i + k],
-                    Σ_T[i + k],
-                    μ_Q[j + k],
-                    σ_Q[j + k],
+                    M_T[i + g],
+                    Σ_T[i + g],
+                    μ_Q[j + g],
+                    σ_Q[j + g],
                 )
-                if D_squared < P_squared[thread_idx, i + k]:
-                    P_squared[thread_idx, i + k] = D_squared
-                    I[thread_idx, i + k] = j + k
-                if D_squared < P_squared[thread_idx, j + k]:
-                    P_squared[thread_idx, j + k] = D_squared
-                    I[thread_idx, j + k] = i + k
+                if D_squared < P_squared[thread_idx, i + g, -1]:
+                    pos = np.searchsorted(
+                        P_squared[thread_idx, i + g], D_squared, side="right"
+                    )
+                    # shifting to the right
+                    for loc in range(k - 1, pos, -1):
+                        P_squared[thread_idx, i + g, loc] = P_squared[
+                            thread_idx, i + g, loc - 1
+                        ]
+                        I[thread_idx, i + g, loc] = I[thread_idx, i + g, loc - 1]
+
+                    P_squared[thread_idx, i + g, pos] = D_squared
+                    I[thread_idx, i + g, pos] = j + g
+                if D_squared < P_squared[thread_idx, j + g, -1]:
+                    pos = np.searchsorted(
+                        P_squared[thread_idx, j + g], D_squared, side="right"
+                    )
+                    # shifting to the right
+                    for loc in range(k - 1, pos, -1):
+                        P_squared[thread_idx, j + g, loc] = P_squared[
+                            thread_idx, j + g, loc - 1
+                        ]
+                        I[thread_idx, j + g, loc] = I[thread_idx, j + g, loc - 1]
+
+                    P_squared[thread_idx, j + g, pos] = D_squared
+                    I[thread_idx, j + g, pos] = i + g
             QT_j = QT_j_prime
-            for k in range(1, min(s, i + 1, j + 1)):
-                QT_j = QT_j - T_B[i - k + m] * T_A[j - k + m] + T_B[i - k] * T_A[j - k]
+            for g in range(1, min(s, i + 1, j + 1)):
+                QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
                 D_squared = core._calculate_squared_distance(
                     m,
                     QT_j,
-                    M_T[i - k],
-                    Σ_T[i - k],
-                    μ_Q[j - k],
-                    σ_Q[j - k],
+                    M_T[i - g],
+                    Σ_T[i - g],
+                    μ_Q[j - g],
+                    σ_Q[j - g],
                 )
-                if D_squared < P_squared[thread_idx, i - k]:
-                    P_squared[thread_idx, i - k] = D_squared
-                    I[thread_idx, i - k] = j - k
-                if D_squared < P_squared[thread_idx, j - k]:
-                    P_squared[thread_idx, j - k] = D_squared
-                    I[thread_idx, j - k] = i - k
+                if D_squared < P_squared[thread_idx, i - g, -1]:
+                    pos = np.searchsorted(
+                        P_squared[thread_idx, i - g], D_squared, side="right"
+                    )
+                    # shifting to the right
+                    for loc in range(k - 1, pos, -1):
+                        P_squared[thread_idx, i - g, loc] = P_squared[
+                            thread_idx, i - g, loc - 1
+                        ]
+                        I[thread_idx, i - g, loc] = I[thread_idx, i - g, loc - 1]
+
+                    P_squared[thread_idx, i - g, pos] = D_squared
+                    I[thread_idx, i - g, pos] = j - g
+                if D_squared < P_squared[thread_idx, j - g, -1]:
+                    pos = np.searchsorted(
+                        P_squared[thread_idx, j - g], D_squared, side="right"
+                    )
+                    # shifting to the right
+                    for loc in range(k - 1, pos, -1):
+                        P_squared[thread_idx, j - g, loc] = P_squared[
+                            thread_idx, j - g, loc - 1
+                        ]
+                        I[thread_idx, j - g, loc] = I[thread_idx, j - g, loc - 1]
+
+                    P_squared[thread_idx, j - g, pos] = D_squared
+                    I[thread_idx, j - g, pos] = i - g
 
 
 @njit(
@@ -183,6 +250,7 @@ def _prescrump(
     indices,
     s,
     excl_zone=None,
+    k=1,
 ):
     """
     A Numba JIT-compiled implementation of the preSCRIMP algorithm.
@@ -232,13 +300,22 @@ def _prescrump(
     excl_zone : int
         The half width for the exclusion zone relative to the `i`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Returns
     -------
     out1 : numpy.ndarray
-        Matrix profile
+        The (top-k) Matrix profile. When k = 1 (default), the first and only column
+        consists of the matrix profile. However, when k > 1, the output has exacly
+        k columns consist of the top-k matrix profile.
 
     out2 : numpy.ndarray
-        Matrix profile indices
+        The (top-k) Matrix profile. When k = 1 (default), the first and only column
+        consists of the matrix profile indices. However, when k > 1, the output has
+         exacly k columns consist of the top-k matrix profile indices.
 
     Notes
     -----
@@ -249,8 +326,8 @@ def _prescrump(
     """
     n_threads = numba.config.NUMBA_NUM_THREADS
     l = T_A.shape[0] - m + 1
-    P_squared = np.full((n_threads, l), np.inf, dtype=np.float64)
-    I = np.full((n_threads, l), -1, dtype=np.int64)
+    P_squared = np.full((n_threads, l, k), np.inf, dtype=np.float64)
+    I = np.full((n_threads, l, k), -1, dtype=np.int64)
 
     idx_ranges = core._get_ranges(len(indices), n_threads, truncate=False)
     for thread_idx in prange(n_threads):
@@ -270,23 +347,21 @@ def _prescrump(
             P_squared,
             I,
             excl_zone,
+            k,
         )
 
     for thread_idx in range(1, n_threads):
-        for i in range(l):
-            if P_squared[thread_idx, i] < P_squared[0, i]:
-                P_squared[0, i] = P_squared[thread_idx, i]
-                I[0, i] = I[thread_idx, i]
+        core._merge_topk_PI(P_squared[0], P_squared[thread_idx], I[0], I[thread_idx])
 
     return np.sqrt(P_squared[0]), I[0]
 
 
 @core.non_normalized(scraamp.prescraamp)
-def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0):
+def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     """
     A convenience wrapper around the Numba JIT-compiled parallelized `_prescrump`
-    function which computes the approximate matrix profile according to the preSCRIMP
-    algorithm
+    function which computes the approximate (top-k) matrix profile according to
+    the preSCRIMP algorithm
 
     Parameters
     ----------
@@ -313,13 +388,22 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0):
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Returns
     -------
     P : numpy.ndarray
-        Matrix profile
+        The (top-k) Matrix profile. When k = 1 (default), it is a 1d array. However,
+        when k > 1, it is a 2d array with exacly `k` columns consist of the top-k
+        matrix profile.
 
     I : numpy.ndarray
-        Matrix profile indices
+        The (top-k) Matrix profile indices. When k = 1 (default), it is a 1d array.
+        However, when k > 1,  it is a 2d array with exacly `k` columns consist of
+        the top-k matrix profile indices.
 
     Notes
     -----
@@ -355,8 +439,13 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0):
         indices,
         s,
         excl_zone,
+        k,
     )
 
+    if k == 1:
+        P = P.ravel()
+        I = I.ravel()
+
     return P, I
 
 
From 5f9ce865ecc91a99562994d9ecc45787cbc754d7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 03:54:18 -0600
Subject: [PATCH 159/416] Add test function for top-k feature of prescrump AB
 join

---
 tests/test_scrump.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 8ba48a024..0989010ce 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -708,3 +708,21 @@ def test_prescrump_self_join_KNN(T_A, T_B):
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_prescrump_A_B_join_KNN(T_A, T_B):
+    m = 3
+    zone = int(np.ceil(m / 4))
+    for k in range(2, 4):
+        for s in range(1, zone + 1):
+            seed = np.random.randint(100000)
+
+            np.random.seed(seed)
+            ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s)
+
+            np.random.seed(seed)
+            comp_P, comp_I = prescrump(T_A, m, T_B=T_B, s=s)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)

From 4096b261cfc7bb139e107ee028ea69d8d889b920 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 16:12:20 -0600
Subject: [PATCH 160/416] Temporarily added parameter k to prescraamp to pass
 non normalized decorator test

---
 stumpy/scraamp.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py
index 4bcccd508..44a8b7c83 100644
--- a/stumpy/scraamp.py
+++ b/stumpy/scraamp.py
@@ -270,7 +270,8 @@ def _prescraamp(
     return np.power(P_NORM[0], 1.0 / p), I[0]
 
 
-def prescraamp(T_A, m, T_B=None, s=None, p=2.0):
+def prescraamp(T_A, m, T_B=None, s=None, p=2.0, k=1):
+    # this function should be modified so that it can return top-k matrix profile
     """
     A convenience wrapper around the Numba JIT-compiled parallelized `_prescraamp`
     function which computes the approximate matrix profile according to the
@@ -295,6 +296,11 @@ def prescraamp(T_A, m, T_B=None, s=None, p=2.0):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Returns
     -------
     P : numpy.ndarray

From c85357e9ec5b957fd98029c7d23c298b37cee1db Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 17:20:04 -0600
Subject: [PATCH 161/416] Refactored

---
 stumpy/scrump.py | 95 ++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 51 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 1c3e15c14..5b86b02e6 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -14,6 +14,32 @@
 logger = logging.getLogger(__name__)
 
 
+@njit
+def _insert(a, idx, v):
+    """
+    Insert value `v` into array `a` at index `idx` (in place) and throw away
+    the last element (i.e. not changing the length of original array)
+
+    Parameters
+    ----------
+    a: numpy.ndarray
+        a 1d array
+
+    idx: int
+        the index at which the value `v` should be inserted
+
+    v: float
+        the value that should be inserted into array `a` at index `idx`
+
+    Returns
+    -------
+    None
+    """
+    for i in range(a.shape[0] - 1, idx, -1):
+        a[i] = a[i - 1]
+    a[idx] = v
+
+
 @njit(fastmath=True)
 def _compute_PI(
     T_A,
@@ -128,22 +154,14 @@ def _compute_PI(
                     side="right",
                 )
                 # shifting to the right
-                for loc in range(k - 1, pos, -1):
-                    P_squared[thread_idx, idx, loc] = P_squared[
-                        thread_idx, idx, loc - 1
-                    ]
-                    I[thread_idx, idx, loc] = I[thread_idx, idx, loc - 1]
-
-                P_squared[thread_idx, idx, pos] = squared_distance_profile[idx]
-                I[thread_idx, idx, pos] = i
-
-        # shifting to the right
-        for loc in range(k - 1, 0, -1):
-            P_squared[thread_idx, i, loc] = P_squared[thread_idx, i, loc - 1]
-            I[thread_idx, i, loc] = I[thread_idx, i, loc - 1]
+                _insert(
+                    P_squared[thread_idx, idx, :], pos, squared_distance_profile[idx]
+                )
+                _insert(I[thread_idx, idx, :], pos, i)
 
-        I[thread_idx, i, 0] = np.argmin(squared_distance_profile)
-        P_squared[thread_idx, i, 0] = squared_distance_profile[I[thread_idx, i, 0]]
+        idx = np.argmin(squared_distance_profile)
+        _insert(P_squared[thread_idx, i, :], 0, squared_distance_profile[idx])
+        _insert(I[thread_idx, i, :], 0, idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
@@ -172,28 +190,16 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i + g], D_squared, side="right"
                     )
-                    # shifting to the right
-                    for loc in range(k - 1, pos, -1):
-                        P_squared[thread_idx, i + g, loc] = P_squared[
-                            thread_idx, i + g, loc - 1
-                        ]
-                        I[thread_idx, i + g, loc] = I[thread_idx, i + g, loc - 1]
-
-                    P_squared[thread_idx, i + g, pos] = D_squared
-                    I[thread_idx, i + g, pos] = j + g
+                    _insert(P_squared[thread_idx, i + g, :], pos, D_squared)
+                    _insert(I[thread_idx, i + g, :], pos, j + g)
+
                 if D_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j + g], D_squared, side="right"
                     )
-                    # shifting to the right
-                    for loc in range(k - 1, pos, -1):
-                        P_squared[thread_idx, j + g, loc] = P_squared[
-                            thread_idx, j + g, loc - 1
-                        ]
-                        I[thread_idx, j + g, loc] = I[thread_idx, j + g, loc - 1]
-
-                    P_squared[thread_idx, j + g, pos] = D_squared
-                    I[thread_idx, j + g, pos] = i + g
+                    _insert(P_squared[thread_idx, j + g, :], pos, D_squared)
+                    _insert(I[thread_idx, j + g, :], pos, i + g)
+
             QT_j = QT_j_prime
             for g in range(1, min(s, i + 1, j + 1)):
                 QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
@@ -209,28 +215,15 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i - g], D_squared, side="right"
                     )
-                    # shifting to the right
-                    for loc in range(k - 1, pos, -1):
-                        P_squared[thread_idx, i - g, loc] = P_squared[
-                            thread_idx, i - g, loc - 1
-                        ]
-                        I[thread_idx, i - g, loc] = I[thread_idx, i - g, loc - 1]
-
-                    P_squared[thread_idx, i - g, pos] = D_squared
-                    I[thread_idx, i - g, pos] = j - g
+                    _insert(P_squared[thread_idx, i - g, :], pos, D_squared)
+                    _insert(I[thread_idx, i - g, :], pos, j - g)
+
                 if D_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j - g], D_squared, side="right"
                     )
-                    # shifting to the right
-                    for loc in range(k - 1, pos, -1):
-                        P_squared[thread_idx, j - g, loc] = P_squared[
-                            thread_idx, j - g, loc - 1
-                        ]
-                        I[thread_idx, j - g, loc] = I[thread_idx, j - g, loc - 1]
-
-                    P_squared[thread_idx, j - g, pos] = D_squared
-                    I[thread_idx, j - g, pos] = i - g
+                    _insert(P_squared[thread_idx, j - g, :], pos, D_squared)
+                    _insert(I[thread_idx, j - g, :], pos, i - g)
 
 
 @njit(

From e9a61bea21c1f2cc20781d78ba3110f11ca727e7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 18:47:09 -0600
Subject: [PATCH 162/416] Confirmed Full test and coverage passing


From c0b05ed91b46d09910d29fe03fb1ac72aa4d3435 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 11 Jun 2022 18:49:11 -0600
Subject: [PATCH 163/416] Removed wrong comment

---
 stumpy/scrump.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 5b86b02e6..997e4552c 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -153,7 +153,6 @@ def _compute_PI(
                     squared_distance_profile[idx],
                     side="right",
                 )
-                # shifting to the right
                 _insert(
                     P_squared[thread_idx, idx, :], pos, squared_distance_profile[idx]
                 )

From 820408d22f477d863b23b64bd5a4bb8456abf4d8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 12 Jun 2022 16:45:27 -0600
Subject: [PATCH 164/416] Move function to stumpy.core

---
 stumpy/core.py   | 26 +++++++++++++++++++++++++
 stumpy/scrump.py | 50 ++++++++++++------------------------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 535471761..f9ad2a06c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,3 +2604,29 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
                 start = idx
                 stop += 1  # because of shifting elements to the right by one
+
+
+@njit
+def _insert(a, idx, v):
+    """
+    Insert value `v` into array `a` at index `idx` (in place) and throw away
+    the last element (i.e. not changing the length of original array)
+
+    Parameters
+    ----------
+    a: numpy.ndarray
+        a 1d array
+
+    idx: int
+        the index at which the value `v` should be inserted
+
+    v: float
+        the value that should be inserted into array `a` at index `idx`
+
+    Returns
+    -------
+    None
+    """
+    for i in range(a.shape[0] - 1, idx, -1):
+        a[i] = a[i - 1]
+    a[idx] = v
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 997e4552c..fe4ea09da 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -14,32 +14,6 @@
 logger = logging.getLogger(__name__)
 
 
-@njit
-def _insert(a, idx, v):
-    """
-    Insert value `v` into array `a` at index `idx` (in place) and throw away
-    the last element (i.e. not changing the length of original array)
-
-    Parameters
-    ----------
-    a: numpy.ndarray
-        a 1d array
-
-    idx: int
-        the index at which the value `v` should be inserted
-
-    v: float
-        the value that should be inserted into array `a` at index `idx`
-
-    Returns
-    -------
-    None
-    """
-    for i in range(a.shape[0] - 1, idx, -1):
-        a[i] = a[i - 1]
-    a[idx] = v
-
-
 @njit(fastmath=True)
 def _compute_PI(
     T_A,
@@ -153,14 +127,14 @@ def _compute_PI(
                     squared_distance_profile[idx],
                     side="right",
                 )
-                _insert(
+                core._insert(
                     P_squared[thread_idx, idx, :], pos, squared_distance_profile[idx]
                 )
-                _insert(I[thread_idx, idx, :], pos, i)
+                core._insert(I[thread_idx, idx, :], pos, i)
 
         idx = np.argmin(squared_distance_profile)
-        _insert(P_squared[thread_idx, i, :], 0, squared_distance_profile[idx])
-        _insert(I[thread_idx, i, :], 0, idx)
+        core._insert(P_squared[thread_idx, i, :], 0, squared_distance_profile[idx])
+        core._insert(I[thread_idx, i, :], 0, idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
@@ -189,15 +163,15 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i + g], D_squared, side="right"
                     )
-                    _insert(P_squared[thread_idx, i + g, :], pos, D_squared)
-                    _insert(I[thread_idx, i + g, :], pos, j + g)
+                    core._insert(P_squared[thread_idx, i + g, :], pos, D_squared)
+                    core._insert(I[thread_idx, i + g, :], pos, j + g)
 
                 if D_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j + g], D_squared, side="right"
                     )
-                    _insert(P_squared[thread_idx, j + g, :], pos, D_squared)
-                    _insert(I[thread_idx, j + g, :], pos, i + g)
+                    core._insert(P_squared[thread_idx, j + g, :], pos, D_squared)
+                    core._insert(I[thread_idx, j + g, :], pos, i + g)
 
             QT_j = QT_j_prime
             for g in range(1, min(s, i + 1, j + 1)):
@@ -214,15 +188,15 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i - g], D_squared, side="right"
                     )
-                    _insert(P_squared[thread_idx, i - g, :], pos, D_squared)
-                    _insert(I[thread_idx, i - g, :], pos, j - g)
+                    core._insert(P_squared[thread_idx, i - g, :], pos, D_squared)
+                    core._insert(I[thread_idx, i - g, :], pos, j - g)
 
                 if D_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j - g], D_squared, side="right"
                     )
-                    _insert(P_squared[thread_idx, j - g, :], pos, D_squared)
-                    _insert(I[thread_idx, j - g, :], pos, i - g)
+                    core._insert(P_squared[thread_idx, j - g, :], pos, D_squared)
+                    core._insert(I[thread_idx, j - g, :], pos, i - g)
 
 
 @njit(

From 9f1fc8a7609a438105dde6a8bd3c998ec17d54a5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 12 Jun 2022 16:55:00 -0600
Subject: [PATCH 165/416] replace for-loop with Advanced indexing

---
 stumpy/core.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f9ad2a06c..35f9c8f92 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2627,6 +2627,5 @@ def _insert(a, idx, v):
     -------
     None
     """
-    for i in range(a.shape[0] - 1, idx, -1):
-        a[i] = a[i - 1]
+    a[idx + 1 :] = a[idx:-1]
     a[idx] = v

From 0744378e4889ccd0af137bb61f9cf9fe063c182b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 12 Jun 2022 16:56:49 -0600
Subject: [PATCH 166/416] Improved Docstring

---
 stumpy/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 35f9c8f92..651cc3e35 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2618,7 +2618,8 @@ def _insert(a, idx, v):
         a 1d array
 
     idx: int
-        the index at which the value `v` should be inserted
+        the index at which the value `v` should be inserted. This can be any
+        integer number from `0` to `len(a) - 1`
 
     v: float
         the value that should be inserted into array `a` at index `idx`

From e13fb7ab2a03b429efbaec1ada03bd80acdeb939 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 12 Jun 2022 17:20:02 -0600
Subject: [PATCH 167/416] Added test function

---
 tests/test_core.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 7998c042d..4934a684d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1087,3 +1087,17 @@ def test_merge_topk_PI():
 
         npt.assert_array_equal(ref_P, comp_P)
         npt.assert_array_equal(ref_I, comp_I)
+
+
+def test_insert():
+    for k in range(1, 6):
+        ref_A = np.random.rand(k)
+        comp_A = ref_A.copy()
+
+        insert_idx = np.arange(k)
+        values = np.random.rand(k)
+        for (idx, v) in zip(insert_idx, values):
+            ref_A = np.insert(ref_A, idx, v)[:-1]
+            core._insert(comp_A, idx, v)  # updating comp_A
+
+            npt.assert_array_equal(ref_A, comp_A)

From 522b5ec9bb07f3e3f3bea362bad688ae3fb0177d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 12 Jun 2022 18:36:13 -0600
Subject: [PATCH 168/416] minor change in test function

---
 tests/test_core.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 4934a684d..d9eb3c33e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1091,13 +1091,15 @@ def test_merge_topk_PI():
 
 def test_insert():
     for k in range(1, 6):
-        ref_A = np.random.rand(k)
-        comp_A = ref_A.copy()
+        a = np.random.rand(k)
 
-        insert_idx = np.arange(k)
+        indices = np.arange(k)
         values = np.random.rand(k)
-        for (idx, v) in zip(insert_idx, values):
-            ref_A = np.insert(ref_A, idx, v)[:-1]
-            core._insert(comp_A, idx, v)  # updating comp_A
+        for (idx, v) in zip(indices, values):
+            ref = a.copy()
+            comp = a.copy()
 
-            npt.assert_array_equal(ref_A, comp_A)
+            ref = np.insert(ref, idx, v)[:-1]
+            core._insert(comp, idx, v)  # updating comp_A
+
+            npt.assert_array_equal(ref, comp)

From 736bf6befa23fceef1a811d0ed55f5f9058c8258 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 13 Jun 2022 18:13:27 -0600
Subject: [PATCH 169/416] Revised docstrings

---
 stumpy/core.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 651cc3e35..860c6ec41 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2565,7 +2565,8 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles PA and PB, and update PA (in place) while
-    prioritizing values of PA in ties. Also, update IA accordingly.
+    always choosing values of PA over values of PB in case of ties. Also, update
+    IA accordingly.
 
     Parameters
     ----------
@@ -2609,8 +2610,8 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit
 def _insert(a, idx, v):
     """
-    Insert value `v` into array `a` at index `idx` (in place) and throw away
-    the last element (i.e. not changing the length of original array)
+    Insert value `v` into array `a` at index `idx` (in place) and discard
+    the last element (i.e. without changing the length of `a`)
 
     Parameters
     ----------

From 5d265daaead86941c6b4c1757915075ae32ee4c9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 13 Jun 2022 18:27:00 -0600
Subject: [PATCH 170/416] Renamed function to make it more specific

---
 stumpy/core.py   |  2 +-
 stumpy/scrump.py | 38 ++++++++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 860c6ec41..c4bc4cd15 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2608,7 +2608,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
 
 @njit
-def _insert(a, idx, v):
+def _shift_at_index_and_insert(a, idx, v):
     """
     Insert value `v` into array `a` at index `idx` (in place) and discard
     the last element (i.e. without changing the length of `a`)
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index fe4ea09da..908607ee4 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -127,14 +127,16 @@ def _compute_PI(
                     squared_distance_profile[idx],
                     side="right",
                 )
-                core._insert(
+                core._shift_at_index_and_insert(
                     P_squared[thread_idx, idx, :], pos, squared_distance_profile[idx]
                 )
-                core._insert(I[thread_idx, idx, :], pos, i)
+                core._shift_at_index_and_insert(I[thread_idx, idx, :], pos, i)
 
         idx = np.argmin(squared_distance_profile)
-        core._insert(P_squared[thread_idx, i, :], 0, squared_distance_profile[idx])
-        core._insert(I[thread_idx, i, :], 0, idx)
+        core._shift_at_index_and_insert(
+            P_squared[thread_idx, i, :], 0, squared_distance_profile[idx]
+        )
+        core._shift_at_index_and_insert(I[thread_idx, i, :], 0, idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
@@ -163,15 +165,19 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i + g], D_squared, side="right"
                     )
-                    core._insert(P_squared[thread_idx, i + g, :], pos, D_squared)
-                    core._insert(I[thread_idx, i + g, :], pos, j + g)
+                    core._shift_at_index_and_insert(
+                        P_squared[thread_idx, i + g, :], pos, D_squared
+                    )
+                    core._shift_at_index_and_insert(I[thread_idx, i + g, :], pos, j + g)
 
                 if D_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j + g], D_squared, side="right"
                     )
-                    core._insert(P_squared[thread_idx, j + g, :], pos, D_squared)
-                    core._insert(I[thread_idx, j + g, :], pos, i + g)
+                    core._shift_at_index_and_insert(
+                        P_squared[thread_idx, j + g, :], pos, D_squared
+                    )
+                    core._shift_at_index_and_insert(I[thread_idx, j + g, :], pos, i + g)
 
             QT_j = QT_j_prime
             for g in range(1, min(s, i + 1, j + 1)):
@@ -188,15 +194,23 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i - g], D_squared, side="right"
                     )
-                    core._insert(P_squared[thread_idx, i - g, :], pos, D_squared)
-                    core._insert(I[thread_idx, i - g, :], pos, j - g)
+                    core._shift_at_index_and_shift_at_index_and_insert(
+                        P_squared[thread_idx, i - g, :], pos, D_squared
+                    )
+                    core._shift_at_index_and_shift_at_index_and_insert(
+                        I[thread_idx, i - g, :], pos, j - g
+                    )
 
                 if D_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j - g], D_squared, side="right"
                     )
-                    core._insert(P_squared[thread_idx, j - g, :], pos, D_squared)
-                    core._insert(I[thread_idx, j - g, :], pos, i - g)
+                    core._shift_at_index_and_shift_at_index_and_insert(
+                        P_squared[thread_idx, j - g, :], pos, D_squared
+                    )
+                    core._shift_at_index_and_shift_at_index_and_insert(
+                        I[thread_idx, j - g, :], pos, i - g
+                    )
 
 
 @njit(

From 4db2de1a32979226f79855d0da39e5dcf6df2246 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 13 Jun 2022 18:33:36 -0600
Subject: [PATCH 171/416] Added if to check input parameter

---
 stumpy/core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index c4bc4cd15..f7b8d4ee4 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2629,5 +2629,6 @@ def _shift_at_index_and_insert(a, idx, v):
     -------
     None
     """
-    a[idx + 1 :] = a[idx:-1]
-    a[idx] = v
+    if idx < len(a):
+        a[idx + 1 :] = a[idx:-1]
+        a[idx] = v

From 16d02f2364661e3fa4b7ab1bd6c094b1cb60e61a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 13 Jun 2022 18:49:08 -0600
Subject: [PATCH 172/416] Revise test function

- rename functions
- consider edge case in testing
---
 tests/test_core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index d9eb3c33e..25416dbcd 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1089,17 +1089,17 @@ def test_merge_topk_PI():
         npt.assert_array_equal(ref_I, comp_I)
 
 
-def test_insert():
+def test_shift_at_index_and_insert():
     for k in range(1, 6):
         a = np.random.rand(k)
 
-        indices = np.arange(k)
-        values = np.random.rand(k)
+        indices = np.arange(k + 1)
+        values = np.random.rand(k + 1)
         for (idx, v) in zip(indices, values):
             ref = a.copy()
             comp = a.copy()
 
             ref = np.insert(ref, idx, v)[:-1]
-            core._insert(comp, idx, v)  # updating comp_A
+            core._shift_at_index_and_insert(comp, idx, v)  # update comp in place
 
             npt.assert_array_equal(ref, comp)

From 29894f2c8186a3bc9978b002564e008ead1e6ffc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 13 Jun 2022 19:36:20 -0600
Subject: [PATCH 173/416] Removed unnecessary trailing colon

---
 stumpy/scrump.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 908607ee4..908aed5bd 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -128,15 +128,15 @@ def _compute_PI(
                     side="right",
                 )
                 core._shift_at_index_and_insert(
-                    P_squared[thread_idx, idx, :], pos, squared_distance_profile[idx]
+                    P_squared[thread_idx, idx], pos, squared_distance_profile[idx]
                 )
-                core._shift_at_index_and_insert(I[thread_idx, idx, :], pos, i)
+                core._shift_at_index_and_insert(I[thread_idx, idx], pos, i)
 
         idx = np.argmin(squared_distance_profile)
         core._shift_at_index_and_insert(
-            P_squared[thread_idx, i, :], 0, squared_distance_profile[idx]
+            P_squared[thread_idx, i], 0, squared_distance_profile[idx]
         )
-        core._shift_at_index_and_insert(I[thread_idx, i, :], 0, idx)
+        core._shift_at_index_and_insert(I[thread_idx, i], 0, idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
@@ -166,18 +166,18 @@ def _compute_PI(
                         P_squared[thread_idx, i + g], D_squared, side="right"
                     )
                     core._shift_at_index_and_insert(
-                        P_squared[thread_idx, i + g, :], pos, D_squared
+                        P_squared[thread_idx, i + g], pos, D_squared
                     )
-                    core._shift_at_index_and_insert(I[thread_idx, i + g, :], pos, j + g)
+                    core._shift_at_index_and_insert(I[thread_idx, i + g], pos, j + g)
 
                 if D_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j + g], D_squared, side="right"
                     )
                     core._shift_at_index_and_insert(
-                        P_squared[thread_idx, j + g, :], pos, D_squared
+                        P_squared[thread_idx, j + g], pos, D_squared
                     )
-                    core._shift_at_index_and_insert(I[thread_idx, j + g, :], pos, i + g)
+                    core._shift_at_index_and_insert(I[thread_idx, j + g], pos, i + g)
 
             QT_j = QT_j_prime
             for g in range(1, min(s, i + 1, j + 1)):
@@ -195,10 +195,10 @@ def _compute_PI(
                         P_squared[thread_idx, i - g], D_squared, side="right"
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
-                        P_squared[thread_idx, i - g, :], pos, D_squared
+                        P_squared[thread_idx, i - g], pos, D_squared
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
-                        I[thread_idx, i - g, :], pos, j - g
+                        I[thread_idx, i - g], pos, j - g
                     )
 
                 if D_squared < P_squared[thread_idx, j - g, -1]:
@@ -206,10 +206,10 @@ def _compute_PI(
                         P_squared[thread_idx, j - g], D_squared, side="right"
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
-                        P_squared[thread_idx, j - g, :], pos, D_squared
+                        P_squared[thread_idx, j - g], pos, D_squared
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
-                        I[thread_idx, j - g, :], pos, i - g
+                        I[thread_idx, j - g], pos, i - g
                     )
 
 
From 12d02aa9b58e9f4a1d1fe1c3f0fe113c2cea47c4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 00:42:56 -0600
Subject: [PATCH 174/416] rename variable to improve readability

---
 stumpy/scrump.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 908aed5bd..263879057 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -153,7 +153,7 @@ def _compute_PI(
                     - T_B[i + g - 1] * T_A[j + g - 1]
                     + T_B[i + g + m - 1] * T_A[j + g + m - 1]
                 )
-                D_squared = core._calculate_squared_distance(
+                d_squared = core._calculate_squared_distance(
                     m,
                     QT_j,
                     M_T[i + g],
@@ -161,28 +161,28 @@ def _compute_PI(
                     μ_Q[j + g],
                     σ_Q[j + g],
                 )
-                if D_squared < P_squared[thread_idx, i + g, -1]:
+                if d_squared < P_squared[thread_idx, i + g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, i + g], D_squared, side="right"
+                        P_squared[thread_idx, i + g], d_squared, side="right"
                     )
                     core._shift_at_index_and_insert(
-                        P_squared[thread_idx, i + g], pos, D_squared
+                        P_squared[thread_idx, i + g], pos, d_squared
                     )
                     core._shift_at_index_and_insert(I[thread_idx, i + g], pos, j + g)
 
-                if D_squared < P_squared[thread_idx, j + g, -1]:
+                if d_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, j + g], D_squared, side="right"
+                        P_squared[thread_idx, j + g], d_squared, side="right"
                     )
                     core._shift_at_index_and_insert(
-                        P_squared[thread_idx, j + g], pos, D_squared
+                        P_squared[thread_idx, j + g], pos, d_squared
                     )
                     core._shift_at_index_and_insert(I[thread_idx, j + g], pos, i + g)
 
             QT_j = QT_j_prime
             for g in range(1, min(s, i + 1, j + 1)):
                 QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
-                D_squared = core._calculate_squared_distance(
+                d_squared = core._calculate_squared_distance(
                     m,
                     QT_j,
                     M_T[i - g],
@@ -190,23 +190,23 @@ def _compute_PI(
                     μ_Q[j - g],
                     σ_Q[j - g],
                 )
-                if D_squared < P_squared[thread_idx, i - g, -1]:
+                if d_squared < P_squared[thread_idx, i - g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, i - g], D_squared, side="right"
+                        P_squared[thread_idx, i - g], d_squared, side="right"
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
-                        P_squared[thread_idx, i - g], pos, D_squared
+                        P_squared[thread_idx, i - g], pos, d_squared
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
                         I[thread_idx, i - g], pos, j - g
                     )
 
-                if D_squared < P_squared[thread_idx, j - g, -1]:
+                if d_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, j - g], D_squared, side="right"
+                        P_squared[thread_idx, j - g], d_squared, side="right"
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
-                        P_squared[thread_idx, j - g], pos, D_squared
+                        P_squared[thread_idx, j - g], pos, d_squared
                     )
                     core._shift_at_index_and_shift_at_index_and_insert(
                         I[thread_idx, j - g], pos, i - g

From 3818cf622576b635ee8f6bf4ec623bd13271ec7d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 00:49:07 -0600
Subject: [PATCH 175/416] Revised performant and naive version of prescrump

- allowed prescrump to allow 2d array when k is 1
- revised/improve docstrings
---
 stumpy/scrump.py | 24 ++++++++++--------------
 tests/naive.py   |  4 ----
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 263879057..77f06021f 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -289,13 +289,13 @@ def _prescrump(
     -------
     out1 : numpy.ndarray
         The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile. However, when k > 1, the output has exacly
-        k columns consist of the top-k matrix profile.
+        consists of the matrix profile. When k > 1, the output has exacly k columns
+        consist of the top-k matrix profile.
 
     out2 : numpy.ndarray
         The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile indices. However, when k > 1, the output has
-         exacly k columns consist of the top-k matrix profile indices.
+        consists of the matrix profile indices. When k > 1, the output has exacly
+        k columns consist of the top-k matrix profile indices.
 
     Notes
     -----
@@ -376,14 +376,14 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     Returns
     -------
     P : numpy.ndarray
-        The (top-k) Matrix profile. When k = 1 (default), it is a 1d array. However,
-        when k > 1, it is a 2d array with exacly `k` columns consist of the top-k
-        matrix profile.
+        The (top-k) Matrix profile. When k = 1 (default), the first and only column
+        consists of the matrix profile. When k > 1, the output has exacly k columns
+        consist of the top-k matrix profile.
 
     I : numpy.ndarray
-        The (top-k) Matrix profile indices. When k = 1 (default), it is a 1d array.
-        However, when k > 1,  it is a 2d array with exacly `k` columns consist of
-        the top-k matrix profile indices.
+        The (top-k) Matrix profile. When k = 1 (default), the first and only column
+        consists of the matrix profile indices. When k > 1, the output has exacly
+        k columns consist of the top-k matrix profile indices.
 
     Notes
     -----
@@ -422,10 +422,6 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
         k,
     )
 
-    if k == 1:
-        P = P.ravel()
-        I = I.ravel()
-
     return P, I
 
 
diff --git a/tests/naive.py b/tests/naive.py
index b742c86dd..40b9d71e2 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1453,10 +1453,6 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
                     P[j - g] = np.insert(P[j - g], pos, d)[:-1]
                     I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
 
-    if k == 1:
-        P = P.ravel()
-        I = I.ravel()
-
     return P, I
 
 
From 241dee9281e9d1fcd72d104f0089d0ff3fcb52fc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 00:54:55 -0600
Subject: [PATCH 176/416] Add comments and reminders to improve readability

---
 stumpy/scrump.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 77f06021f..165692317 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -113,11 +113,31 @@ def _compute_PI(
         squared_distance_profile[:] = core._mass(Q, T_B, QT, μ_Q[i], σ_Q[i], M_T, Σ_T)
         squared_distance_profile[:] = np.square(squared_distance_profile)
         if excl_zone is not None:
+            # self-join
             zone_start = max(0, i - excl_zone)
             zone_stop = min(l, i + excl_zone)
             squared_distance_profile[zone_start : zone_stop + 1] = np.inf
 
-            # only for self-join
+            # Reminder(1): this `squared_distance_profile` is the (square of) distance profile
+            # that corresponds to `S_i`, the subsequence with start index `i`.
+
+            # Reminder(2): `P_squared[thread_idx, index, :]` should contain the (approx.)
+            # TopK distance between `S_idx` to its neighbors (in thread_idx). And,
+            # these distances are sorted ascendingly. so, `P_squared[thread_idx, index, 0]`
+            # is smallest and `P_squared[thread_idx, index, -1]` is the largest in the array
+            # `P_squared[thread_idx, index, :]`
+
+            # The value `d_squared = squared_distance_profile[idx]` is the squared-distance
+            # between `S_i` and the `S_idx`. Therefore, `d_squared` is the squared_distance
+            # from `S_idx` to one of its neighbors, `S_i`. If `d_squared` is less than
+            # `P_squared[thread_idx, idx, -1]`, then that means the so-far-discovered TopK
+            # for `S_idx` (i.e. `P_squared[thread_idx, idx, :]`) MUST be updated!
+
+            # note: further explanation
+            # `squared_distance_profile` (of `S_i`) is actually the `i`-th row of
+            # Squared-Distance-Matrix. Its idx-th element (which is in idx-th column),
+            # is `d_squared = squared_distance_profile[idx]`. If `d_squared < P_squared[thread_idx, idx, -1]`,
+            # then  `P_squared[thread_idx, idx, :]` MUST be updated.
             IDX = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
@@ -132,11 +152,16 @@ def _compute_PI(
                 )
                 core._shift_at_index_and_insert(I[thread_idx, idx], pos, i)
 
+        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
         idx = np.argmin(squared_distance_profile)
         core._shift_at_index_and_insert(
             P_squared[thread_idx, i], 0, squared_distance_profile[idx]
         )
         core._shift_at_index_and_insert(I[thread_idx, i], 0, idx)
+        # [note] EXACT (not approx.) values of `P_squared[thread_idx, i, :]``
+        # (not just its 0-th element) can be found by doing something like
+        # `np.sort(squared_distance_profile)[:k]`. However, it can increase the
+        # computing time, and thus this is avoided here.
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1

From 99095b8b4de8559a2977521e35989f83411dc46f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 01:15:17 -0600
Subject: [PATCH 177/416] minor changes to improve readability

---
 stumpy/scrump.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 165692317..db5683c29 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -133,7 +133,7 @@ def _compute_PI(
             # `P_squared[thread_idx, idx, -1]`, then that means the so-far-discovered TopK
             # for `S_idx` (i.e. `P_squared[thread_idx, idx, :]`) MUST be updated!
 
-            # note: further explanation
+            # note: further explanation!
             # `squared_distance_profile` (of `S_i`) is actually the `i`-th row of
             # Squared-Distance-Matrix. Its idx-th element (which is in idx-th column),
             # is `d_squared = squared_distance_profile[idx]`. If `d_squared < P_squared[thread_idx, idx, -1]`,
@@ -142,23 +142,18 @@ def _compute_PI(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
             for idx in IDX:
-                pos = np.searchsorted(
-                    P_squared[thread_idx, idx],
-                    squared_distance_profile[idx],
-                    side="right",
-                )
-                core._shift_at_index_and_insert(
-                    P_squared[thread_idx, idx], pos, squared_distance_profile[idx]
-                )
+                d_squared = squared_distance_profile[idx]
+                pos = np.searchsorted(P_squared[thread_idx, idx], d_squared, side="right")
+                core._shift_at_index_and_insert(P_squared[thread_idx, idx], pos, d_squared)
                 core._shift_at_index_and_insert(I[thread_idx, idx], pos, i)
 
         # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
-        idx = np.argmin(squared_distance_profile)
+        nn_of_i = np.argmin(squared_distance_profile)
         core._shift_at_index_and_insert(
-            P_squared[thread_idx, i], 0, squared_distance_profile[idx]
+            P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i]
         )
-        core._shift_at_index_and_insert(I[thread_idx, i], 0, idx)
-        # [note] EXACT (not approx.) values of `P_squared[thread_idx, i, :]``
+        core._shift_at_index_and_insert(I[thread_idx, i], 0, nn_of_i)
+        # [note] EXACT (not approx.) values of `P_squared[thread_idx, i, :]`
         # (not just its 0-th element) can be found by doing something like
         # `np.sort(squared_distance_profile)[:k]`. However, it can increase the
         # computing time, and thus this is avoided here.

From 7a593b1f5a9ac3265d70b87aafd6741a579db189 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 09:09:33 -0600
Subject: [PATCH 178/416] Revised comments

---
 stumpy/scrump.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index db5683c29..b06fe5ebe 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -109,7 +109,6 @@ def _compute_PI(
     for i in indices[start:stop]:
         Q = T_A[i : i + m]
         QT[:] = core._sliding_dot_product(Q, T_B)
-        # Update P[i] relative to all T[j : j + m]
         squared_distance_profile[:] = core._mass(Q, T_B, QT, μ_Q[i], σ_Q[i], M_T, Σ_T)
         squared_distance_profile[:] = np.square(squared_distance_profile)
         if excl_zone is not None:
@@ -118,11 +117,13 @@ def _compute_PI(
             zone_stop = min(l, i + excl_zone)
             squared_distance_profile[zone_start : zone_stop + 1] = np.inf
 
+            # Update `P_squared[thread_idx, index, :]` with `squared_distance_profile[index]`
+
             # Reminder(1): this `squared_distance_profile` is the (square of) distance profile
             # that corresponds to `S_i`, the subsequence with start index `i`.
 
             # Reminder(2): `P_squared[thread_idx, index, :]` should contain the (approx.)
-            # TopK distance between `S_idx` to its neighbors (in thread_idx). And,
+            # TopK distance between `S_index` to its neighbors (in thread_idx). And,
             # these distances are sorted ascendingly. so, `P_squared[thread_idx, index, 0]`
             # is smallest and `P_squared[thread_idx, index, -1]` is the largest in the array
             # `P_squared[thread_idx, index, :]`
@@ -132,12 +133,18 @@ def _compute_PI(
             # from `S_idx` to one of its neighbors, `S_i`. If `d_squared` is less than
             # `P_squared[thread_idx, idx, -1]`, then that means the so-far-discovered TopK
             # for `S_idx` (i.e. `P_squared[thread_idx, idx, :]`) MUST be updated!
+            # Note that the matrix profile of indices in the trivial zone of `i` cannot
+            # be updated here since `squared_distance_profile` in those indices are
+            # set to inf.
 
             # note: further explanation!
             # `squared_distance_profile` (of `S_i`) is actually the `i`-th row of
             # Squared-Distance-Matrix. Its idx-th element (which is in idx-th column),
             # is `d_squared = squared_distance_profile[idx]`. If `d_squared < P_squared[thread_idx, idx, -1]`,
-            # then  `P_squared[thread_idx, idx, :]` MUST be updated.
+            # it means this value (`d_squared`) can be in the TopK neighbors of `S_idx`.
+            # In other words, `d_squared` can be in TopK smallest values of `idx`-th COLUMN. (Recall
+            # that in SELF-JOIN we can use EITHER row OR column to find NearestNeighbors)
+            # Therefore, `P_squared[thread_idx, idx, :]` MUST be updated.
             IDX = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
@@ -147,16 +154,17 @@ def _compute_PI(
                 core._shift_at_index_and_insert(P_squared[thread_idx, idx], pos, d_squared)
                 core._shift_at_index_and_insert(I[thread_idx, idx], pos, i)
 
-        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
+        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]` to update
+        # matrix profile at index `i`.
         nn_of_i = np.argmin(squared_distance_profile)
         core._shift_at_index_and_insert(
             P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i]
         )
         core._shift_at_index_and_insert(I[thread_idx, i], 0, nn_of_i)
         # [note] EXACT (not approx.) values of `P_squared[thread_idx, i, :]`
-        # (not just its 0-th element) can be found by doing something like
-        # `np.sort(squared_distance_profile)[:k]`. However, it can increase the
-        # computing time, and thus this is avoided here.
+        # (not just its 0-th element but ALL TopK) can be found by doing something like
+        # `np.sort(squared_distance_profile)[:k]`. However, this can increase the
+        # computing time, and thus this was avoided here.
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1

From 3efa744d3d706e80804917fda127d083d7b1a9fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 09:16:02 -0600
Subject: [PATCH 179/416] Added comment to clarify the insertion indx idx-1

---
 stumpy/stump.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index f5a5fe811..e6e25c834 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -212,6 +212,12 @@ def _compute_diagonal(
                 if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
+                # ρ[thread_idx, i, :] is sorted ascendingly. To update
+                # it, Its first element (i.e. the smallest value
+                # of array ρ[thread_idx, i]) MUST be discarded. Therefore,
+                # if the insertion index of new value in `ρ[thread_idx, i]` is idx,
+                # then, it should be substracted by 1 since the left of idx is shifted
+                # to the left.
                 if pearson > ρ[thread_idx, i, 0]:
                     idx = np.searchsorted(ρ[thread_idx, i], pearson)
                     ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1:idx]

From 7d680b5731007080c99b399dfb51321fad7798d1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 09:31:24 -0600
Subject: [PATCH 180/416] Choosed shorter name for function

---
 stumpy/core.py     |  2 +-
 stumpy/scrump.py   | 38 ++++++++++++--------------------------
 tests/test_core.py |  4 ++--
 3 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f7b8d4ee4..3a22ac92c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2608,7 +2608,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
 
 @njit
-def _shift_at_index_and_insert(a, idx, v):
+def _shift_insert_at_index(a, idx, v):
     """
     Insert value `v` into array `a` at index `idx` (in place) and discard
     the last element (i.e. without changing the length of `a`)
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index b06fe5ebe..571d83c6d 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -151,16 +151,14 @@ def _compute_PI(
             for idx in IDX:
                 d_squared = squared_distance_profile[idx]
                 pos = np.searchsorted(P_squared[thread_idx, idx], d_squared, side="right")
-                core._shift_at_index_and_insert(P_squared[thread_idx, idx], pos, d_squared)
-                core._shift_at_index_and_insert(I[thread_idx, idx], pos, i)
+                core._shift_insert_at_index(P_squared[thread_idx, idx], pos, d_squared)
+                core._shift_insert_at_index(I[thread_idx, idx], pos, i)
 
         # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]` to update
         # matrix profile at index `i`.
         nn_of_i = np.argmin(squared_distance_profile)
-        core._shift_at_index_and_insert(
-            P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i]
-        )
-        core._shift_at_index_and_insert(I[thread_idx, i], 0, nn_of_i)
+        core._shift_insert_at_index(P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i])
+        core._shift_insert_at_index(I[thread_idx, i], 0, nn_of_i)
         # [note] EXACT (not approx.) values of `P_squared[thread_idx, i, :]`
         # (not just its 0-th element but ALL TopK) can be found by doing something like
         # `np.sort(squared_distance_profile)[:k]`. However, this can increase the
@@ -193,19 +191,15 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i + g], d_squared, side="right"
                     )
-                    core._shift_at_index_and_insert(
-                        P_squared[thread_idx, i + g], pos, d_squared
-                    )
-                    core._shift_at_index_and_insert(I[thread_idx, i + g], pos, j + g)
+                    core._shift_insert_at_index(P_squared[thread_idx, i + g], pos, d_squared)
+                    core._shift_insert_at_index(I[thread_idx, i + g], pos, j + g)
 
                 if d_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j + g], d_squared, side="right"
                     )
-                    core._shift_at_index_and_insert(
-                        P_squared[thread_idx, j + g], pos, d_squared
-                    )
-                    core._shift_at_index_and_insert(I[thread_idx, j + g], pos, i + g)
+                    core._shift_insert_at_index(P_squared[thread_idx, j + g], pos, d_squared)
+                    core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
 
             QT_j = QT_j_prime
             for g in range(1, min(s, i + 1, j + 1)):
@@ -222,23 +216,15 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i - g], d_squared, side="right"
                     )
-                    core._shift_at_index_and_shift_at_index_and_insert(
-                        P_squared[thread_idx, i - g], pos, d_squared
-                    )
-                    core._shift_at_index_and_shift_at_index_and_insert(
-                        I[thread_idx, i - g], pos, j - g
-                    )
+                    core._shift_insert_at_index(P_squared[thread_idx, i - g], pos, d_squared)
+                    core._shift_insert_at_index(I[thread_idx, i - g], pos, j - g)
 
                 if d_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j - g], d_squared, side="right"
                     )
-                    core._shift_at_index_and_shift_at_index_and_insert(
-                        P_squared[thread_idx, j - g], pos, d_squared
-                    )
-                    core._shift_at_index_and_shift_at_index_and_insert(
-                        I[thread_idx, j - g], pos, i - g
-                    )
+                    core._shift_insert_at_index(P_squared[thread_idx, j - g], pos, d_squared)
+                    core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
 
 
 @njit(
diff --git a/tests/test_core.py b/tests/test_core.py
index 25416dbcd..b7ea76b8c 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1089,7 +1089,7 @@ def test_merge_topk_PI():
         npt.assert_array_equal(ref_I, comp_I)
 
 
-def test_shift_at_index_and_insert():
+def test_shift_insert_at_index():
     for k in range(1, 6):
         a = np.random.rand(k)
 
@@ -1100,6 +1100,6 @@ def test_shift_at_index_and_insert():
             comp = a.copy()
 
             ref = np.insert(ref, idx, v)[:-1]
-            core._shift_at_index_and_insert(comp, idx, v)  # update comp in place
+            core._shift_insert_at_index(comp, idx, v)  # update comp in place
 
             npt.assert_array_equal(ref, comp)

From 6c8ab788b8f567323c07f50fca128b6b69c6e2f7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 11:34:38 -0600
Subject: [PATCH 181/416] Fixed typos

---
 stumpy/scrump.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 571d83c6d..4911bb9af 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -303,12 +303,12 @@ def _prescrump(
     -------
     out1 : numpy.ndarray
         The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile. When k > 1, the output has exacly k columns
+        consists of the matrix profile. When k > 1, the output has exactly k columns
         consist of the top-k matrix profile.
 
     out2 : numpy.ndarray
         The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile indices. When k > 1, the output has exacly
+        consists of the matrix profile indices. When k > 1, the output has exactly
         k columns consist of the top-k matrix profile indices.
 
     Notes
@@ -391,12 +391,12 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     -------
     P : numpy.ndarray
         The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile. When k > 1, the output has exacly k columns
+        consists of the matrix profile. When k > 1, the output has exactly k columns
         consist of the top-k matrix profile.
 
     I : numpy.ndarray
         The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile indices. When k > 1, the output has exacly
+        consists of the matrix profile indices. When k > 1, the output has exactly
         k columns consist of the top-k matrix profile indices.
 
     Notes

From 0d6011dd361224df56fbf19a0931c812d8b51f07 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 11:35:01 -0600
Subject: [PATCH 182/416] Renamed variable to improve readability

---
 tests/naive.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 40b9d71e2..ef5a42d78 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -237,28 +237,28 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
                 iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
             for i in iter_range:
-                D = distance_matrix[i, i + g]  # D: a single element
-                if D < P[i, k - 1]:
-                    idx = searchsorted_right(P[i], D)
+                d = distance_matrix[i, i + g]
+                if d < P[i, k - 1]:
+                    idx = searchsorted_right(P[i], d)
                     # to keep the top-k, we must get rid of the last element.
-                    P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
+                    P[i, :k] = np.insert(P[i, :k], idx, d)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
                 if ignore_trivial:  # Self-joins only
-                    if D < P[i + g, k - 1]:
-                        idx = searchsorted_right(P[i + g], D)
-                        P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
+                    if d < P[i + g, k - 1]:
+                        idx = searchsorted_right(P[i + g], d)
+                        P[i + g, :k] = np.insert(P[i + g, :k], idx, d)[:-1]
                         I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 
                     if i < i + g:
                         # Left matrix profile and left matrix profile index
-                        if D < P[i + g, k]:
-                            P[i + g, k] = D
+                        if d < P[i + g, k]:
+                            P[i + g, k] = d
                             I[i + g, k] = i
 
-                        if D < P[i, k + 1]:
+                        if d < P[i, k + 1]:
                             # right matrix profile and right matrix profile index
-                            P[i, k + 1] = D
+                            P[i, k + 1] = d
                             I[i, k + 1] = i + g
 
     result = np.empty((l, 2 * k + 2), dtype=object)

From 5bf6fc9b1af4699c9d7ef6006ad82f0c69aeca0b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 12:07:20 -0600
Subject: [PATCH 183/416] Revised and Improved comments

---
 stumpy/scrump.py | 48 +++++++++++++-----------------------------------
 1 file changed, 13 insertions(+), 35 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 4911bb9af..bc80e6585 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -112,39 +112,20 @@ def _compute_PI(
         squared_distance_profile[:] = core._mass(Q, T_B, QT, μ_Q[i], σ_Q[i], M_T, Σ_T)
         squared_distance_profile[:] = np.square(squared_distance_profile)
         if excl_zone is not None:
-            # self-join
             zone_start = max(0, i - excl_zone)
             zone_stop = min(l, i + excl_zone)
             squared_distance_profile[zone_start : zone_stop + 1] = np.inf
 
-            # Update `P_squared[thread_idx, index, :]` with `squared_distance_profile[index]`
-
-            # Reminder(1): this `squared_distance_profile` is the (square of) distance profile
-            # that corresponds to `S_i`, the subsequence with start index `i`.
-
-            # Reminder(2): `P_squared[thread_idx, index, :]` should contain the (approx.)
-            # TopK distance between `S_index` to its neighbors (in thread_idx). And,
-            # these distances are sorted ascendingly. so, `P_squared[thread_idx, index, 0]`
-            # is smallest and `P_squared[thread_idx, index, -1]` is the largest in the array
-            # `P_squared[thread_idx, index, :]`
-
-            # The value `d_squared = squared_distance_profile[idx]` is the squared-distance
-            # between `S_i` and the `S_idx`. Therefore, `d_squared` is the squared_distance
-            # from `S_idx` to one of its neighbors, `S_i`. If `d_squared` is less than
-            # `P_squared[thread_idx, idx, -1]`, then that means the so-far-discovered TopK
-            # for `S_idx` (i.e. `P_squared[thread_idx, idx, :]`) MUST be updated!
-            # Note that the matrix profile of indices in the trivial zone of `i` cannot
-            # be updated here since `squared_distance_profile` in those indices are
-            # set to inf.
-
-            # note: further explanation!
-            # `squared_distance_profile` (of `S_i`) is actually the `i`-th row of
-            # Squared-Distance-Matrix. Its idx-th element (which is in idx-th column),
-            # is `d_squared = squared_distance_profile[idx]`. If `d_squared < P_squared[thread_idx, idx, -1]`,
-            # it means this value (`d_squared`) can be in the TopK neighbors of `S_idx`.
-            # In other words, `d_squared` can be in TopK smallest values of `idx`-th COLUMN. (Recall
-            # that in SELF-JOIN we can use EITHER row OR column to find NearestNeighbors)
-            # Therefore, `P_squared[thread_idx, idx, :]` MUST be updated.
+        if excl_zone is not None: # self-join
+            # note: S_index = T[index: index + m]
+            # `v = squared_distance_profile[idx]` is (the square of)
+            # `dist(S_i, S_idx)`, which is the same as `dist(S_idx, S_i)`. So,
+            # `squared_distance_profile[idx]` is (the square of) distane from `S_idx`
+            # to one of its neighbors, `S_i`. Therefore, the value `v` can be used to
+            # update the TopK of `S_idx`, stored "ascendingly" in `P_squared[thread_idx, idx, :]`.
+
+            # `P_squared[thread_idx, idx, :]` in inf for those `idx` that are in the trivial zone,
+            # including the `i` itself. So, those will not be updated here.
             IDX = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
@@ -154,19 +135,16 @@ def _compute_PI(
                 core._shift_insert_at_index(P_squared[thread_idx, idx], pos, d_squared)
                 core._shift_insert_at_index(I[thread_idx, idx], pos, i)
 
-        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]` to update
-        # matrix profile at index `i`.
+        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
         nn_of_i = np.argmin(squared_distance_profile)
         core._shift_insert_at_index(P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i])
         core._shift_insert_at_index(I[thread_idx, i], 0, nn_of_i)
-        # [note] EXACT (not approx.) values of `P_squared[thread_idx, i, :]`
-        # (not just its 0-th element but ALL TopK) can be found by doing something like
-        # `np.sort(squared_distance_profile)[:k]`. However, this can increase the
-        # computing time, and thus this was avoided here.
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
         else:
+            # update P_squared[thread_idx, index, :] for those `index` that are
+            # in the vicinity of `i` or its 1NN, `j`.
             j = I[thread_idx, i, 0]
             # Given the squared distance, work backwards and compute QT
             QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (

From b978c70880281bfba9535ba0ff5d536c806b546e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 12:08:11 -0600
Subject: [PATCH 184/416] Corrected format

---
 stumpy/scrump.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index bc80e6585..db5417010 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -116,7 +116,7 @@ def _compute_PI(
             zone_stop = min(l, i + excl_zone)
             squared_distance_profile[zone_start : zone_stop + 1] = np.inf
 
-        if excl_zone is not None: # self-join
+        if excl_zone is not None:  # self-join
             # note: S_index = T[index: index + m]
             # `v = squared_distance_profile[idx]` is (the square of)
             # `dist(S_i, S_idx)`, which is the same as `dist(S_idx, S_i)`. So,
@@ -131,13 +131,17 @@ def _compute_PI(
             )
             for idx in IDX:
                 d_squared = squared_distance_profile[idx]
-                pos = np.searchsorted(P_squared[thread_idx, idx], d_squared, side="right")
+                pos = np.searchsorted(
+                    P_squared[thread_idx, idx], d_squared, side="right"
+                )
                 core._shift_insert_at_index(P_squared[thread_idx, idx], pos, d_squared)
                 core._shift_insert_at_index(I[thread_idx, idx], pos, i)
 
         # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
         nn_of_i = np.argmin(squared_distance_profile)
-        core._shift_insert_at_index(P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i])
+        core._shift_insert_at_index(
+            P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i]
+        )
         core._shift_insert_at_index(I[thread_idx, i], 0, nn_of_i)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
@@ -169,14 +173,18 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i + g], d_squared, side="right"
                     )
-                    core._shift_insert_at_index(P_squared[thread_idx, i + g], pos, d_squared)
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, i + g], pos, d_squared
+                    )
                     core._shift_insert_at_index(I[thread_idx, i + g], pos, j + g)
 
                 if d_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j + g], d_squared, side="right"
                     )
-                    core._shift_insert_at_index(P_squared[thread_idx, j + g], pos, d_squared)
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, j + g], pos, d_squared
+                    )
                     core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
 
             QT_j = QT_j_prime
@@ -194,14 +202,18 @@ def _compute_PI(
                     pos = np.searchsorted(
                         P_squared[thread_idx, i - g], d_squared, side="right"
                     )
-                    core._shift_insert_at_index(P_squared[thread_idx, i - g], pos, d_squared)
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, i - g], pos, d_squared
+                    )
                     core._shift_insert_at_index(I[thread_idx, i - g], pos, j - g)
 
                 if d_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
                         P_squared[thread_idx, j - g], d_squared, side="right"
                     )
-                    core._shift_insert_at_index(P_squared[thread_idx, j - g], pos, d_squared)
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, j - g], pos, d_squared
+                    )
                     core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
 
 
From fa33084098032052fff762196d2658b49f1e44a7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 12:10:23 -0600
Subject: [PATCH 185/416] Corrected style

---
 stumpy/scrump.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index db5417010..0c9da42ea 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -121,11 +121,12 @@ def _compute_PI(
             # `v = squared_distance_profile[idx]` is (the square of)
             # `dist(S_i, S_idx)`, which is the same as `dist(S_idx, S_i)`. So,
             # `squared_distance_profile[idx]` is (the square of) distane from `S_idx`
-            # to one of its neighbors, `S_i`. Therefore, the value `v` can be used to
-            # update the TopK of `S_idx`, stored "ascendingly" in `P_squared[thread_idx, idx, :]`.
+            # to one of its neighbors, `S_i`. Therefore, the value `v` can be
+            # used to update the TopK of `S_idx`, stored "ascendingly" in
+            # `P_squared[thread_idx, idx, :]`.
 
-            # `P_squared[thread_idx, idx, :]` in inf for those `idx` that are in the trivial zone,
-            # including the `i` itself. So, those will not be updated here.
+            # `P_squared[thread_idx, idx, :]` in inf for those `idx` that are in
+            # the trivial zone, including the `i` itself. Those are not updated here.
             IDX = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )

From dab7c47c1823e7e3dfa128c79213d8c225a011f0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 12:54:36 -0600
Subject: [PATCH 186/416] Enhanced naive scrump to return TopK matrix profile

---
 tests/naive.py | 54 ++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index ef5a42d78..cf5c2fa31 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1456,7 +1456,7 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
     return P, I
 
 
-def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s):
+def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
     dist_matrix = distance_matrix(T_A, T_B, m)
 
     n_A = T_A.shape[0]
@@ -1478,42 +1478,40 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s):
     diags_ranges_start = diags_ranges[0, 0]
     diags_ranges_stop = diags_ranges[0, 1]
 
-    out = np.full((l, 4), np.inf, dtype=object)
-    out[:, 1:] = -1
-    left_P = np.full(l, np.inf, dtype=np.float64)
-    right_P = np.full(l, np.inf, dtype=np.float64)
+    P = np.full((l, k + 2), np.inf, dtype=np.float64)  # Topk + left/ right
+    I = np.full((l, k + 2), -1, dtype=np.int64)  # Topk + left/ right
 
     for diag_idx in range(diags_ranges_start, diags_ranges_stop):
-        k = diags[diag_idx]
+        g = diags[diag_idx]
 
         for i in range(n_A - m + 1):
             for j in range(n_B - m + 1):
-                if j - i == k:
-                    if dist_matrix[i, j] < out[i, 0]:
-                        out[i, 0] = dist_matrix[i, j]
-                        out[i, 1] = i + k
-
-                    if exclusion_zone is not None and dist_matrix[i, j] < out[i + k, 0]:
-                        out[i + k, 0] = dist_matrix[i, j]
-                        out[i + k, 1] = i
+                if j - i == g:
+                    d = dist_matrix[i, j]
+                    if d < P[i, k - 1]:
+                        # update TopK of P[i]
+                        idx = searchsorted_right(P[i], d)
+                        P[i, :k] = np.insert(P[i, :k], idx, d)[:-1]
+                        I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
+
+                    if exclusion_zone is not None and d < P[i + g, k - 1]:
+                        idx = searchsorted_right(P[i + g], d)
+                        P[i + g, :k] = np.insert(P[i + g, :k], idx, d)[:-1]
+                        I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 
                     # left matrix profile and left matrix profile indices
-                    if (
-                        exclusion_zone is not None
-                        and i < i + k
-                        and dist_matrix[i, j] < left_P[i + k]
-                    ):
-                        left_P[i + k] = dist_matrix[i, j]
-                        out[i + k, 2] = i
+                    if exclusion_zone is not None and i < i + g and d < P[i + g, k]:
+                        P[i + g, k] = d
+                        I[i + g, k] = i
 
                     # right matrix profile and right matrix profile indices
-                    if (
-                        exclusion_zone is not None
-                        and i + k > i
-                        and dist_matrix[i, j] < right_P[i]
-                    ):
-                        right_P[i] = dist_matrix[i, j]
-                        out[i, 3] = i + k
+                    if exclusion_zone is not None and i + g > i and d < P[i, k + 1]:
+                        P[i, k + 1] = d
+                        I[i, k + 1] = i + g
+
+    out = np.empty((l, 2 * k + 2), dtype=object)
+    out[:, :k] = P[:, :k]
+    out[:, k:] = I
 
     return out
 

From ed30ea0d35a7f8b45ab0d284128fce0ed62dcdec Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 12:55:45 -0600
Subject: [PATCH 187/416] Added new test function

---
 tests/test_scrump.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 0989010ce..1d581e2c5 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -726,3 +726,43 @@ def test_prescrump_A_B_join_KNN(T_A, T_B):
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("percentages", percentages)
+def test_scrump_self_join_KNN(T_A, T_B, percentages):
+    m = 3
+    zone = int(np.ceil(m / 4))
+
+    for k in range(2, 4):
+        for percentage in percentages:
+            seed = np.random.randint(100000)
+
+            np.random.seed(seed)
+            ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None, k=k)
+            ref_P = ref_mp[:, 0]
+            ref_I = ref_mp[:, 1]
+            ref_left_I = ref_mp[:, 2]
+            ref_right_I = ref_mp[:, 3]
+
+            np.random.seed(seed)
+            approx = scrump(
+                T_B,
+                m,
+                ignore_trivial=True,
+                percentage=percentage,
+                pre_scrump=False,
+                k=k,
+            )
+            approx.update()
+            comp_P = approx.P_
+            comp_I = approx.I_
+            comp_left_I = approx.left_I_
+            comp_right_I = approx.right_I_
+
+            naive.replace_inf(ref_P)
+            naive.replace_inf(comp_P)
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_left_I, comp_left_I)
+            npt.assert_almost_equal(ref_right_I, comp_right_I)

From 3fa9f54272ca6c272688b1bf40437892cdf212ed Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 13:15:06 -0600
Subject: [PATCH 188/416] Enhanced scrump to return TopK matrix profile

---
 stumpy/scrump.py | 89 ++++++++++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 36 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 0c9da42ea..2388676bd 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -481,6 +481,11 @@ class scrump:
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Attributes
     ----------
     P_ : numpy.ndarray
@@ -544,6 +549,7 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
+        k=1,
     ):
         """
         Initialize the `scrump` object
@@ -586,6 +592,11 @@ def __init__(
         p : float, default 2.0
             The p-norm to apply for computing the Minkowski distance. This parameter is
             ignored when `normalize == True`.
+
+        k : int, default 1
+            The number of top `k` smallest distances used to construct the matrix
+            profile. Note that this will increase the total computational time and
+            memory usage when k > 1.
         """
         self._ignore_trivial = ignore_trivial
 
@@ -642,11 +653,15 @@ def __init__(
         self._n_A = self._T_A.shape[0]
         self._n_B = self._T_B.shape[0]
         self._l = self._n_A - self._m + 1
+        self._k = k
 
-        self._P = np.empty((self._l, 3), dtype=np.float64)
-        self._I = np.empty((self._l, 3), dtype=np.int64)
-        self._P[:, :] = np.inf
-        self._I[:, :] = -1
+        self._P = np.full((self._l, self._k), np.inf, dtype=np.float64)
+        self._PL = np.full(self._l, np.inf, dtype=np.float64)
+        self._PR = np.full(self._l, np.inf, dtype=np.float64)
+
+        self._I = np.full((self._l, self._k), -1, dtype=np.int64)
+        self._IL = np.full(self._l, -1, dtype=np.int64)
+        self._IR = np.full(self._l, -1, dtype=np.int64)
 
         self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
 
@@ -655,13 +670,11 @@ def __init__(
 
         if pre_scrump:
             if self._ignore_trivial:
-                P, I = prescrump(T_A, m, s=s)
+                P, I = prescrump(T_A, m, s=s, k=k)
             else:
-                P, I = prescrump(T_A, m, T_B=T_B, s=s)
-            for i in range(P.shape[0]):
-                if self._P[i, 0] > P[i]:
-                    self._P[i, 0] = P[i]
-                    self._I[i, 0] = I[i]
+                P, I = prescrump(T_A, m, T_B=T_B, s=s, k=k)
+
+            core._merge_topk_PI(self._P, P, self._I, I)
 
         if self._ignore_trivial:
             self._diags = np.random.permutation(
@@ -692,9 +705,9 @@ def __init__(
 
     def update(self):
         """
-        Update the matrix profile and the matrix profile indices by computing
-        additional new distances (limited by `percentage`) that make up the full
-        distance matrix.
+        Update the (top-k) matrix profile and the (top-k) matrix profile indices by
+        computing additional new distances (limited by `percentage`) that make up
+        the full distance matrix.
         """
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
@@ -715,52 +728,56 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
-                1,  # revise module to accept parameter k for top-k matrix profile
+                self._k,
             )
 
-            P = np.column_stack((P, PL, PR))
-            I = np.column_stack((I, IL, IR))
-
-            # Update matrix profile and indices
-            for i in range(self._P.shape[0]):
-                if self._P[i, 0] > P[i, 0]:
-                    self._P[i, 0] = P[i, 0]
-                    self._I[i, 0] = I[i, 0]
-                # left matrix profile and left matrix profile indices
-                if self._P[i, 1] > P[i, 1]:
-                    self._P[i, 1] = P[i, 1]
-                    self._I[i, 1] = I[i, 1]
-                # right matrix profile and right matrix profile indices
-                if self._P[i, 2] > P[i, 2]:
-                    self._P[i, 2] = P[i, 2]
-                    self._I[i, 2] = I[i, 2]
+            # Update (top-k) matrix profile and indices
+            core._merge_topk_PI(self._P, P, self._I, I)
+
+            # update left matrix profile and indices
+            cond = PL < self._PL
+            self._PL = np.where(cond, PL, self._PL)
+            self._IL = np.where(cond, IL, self._IL)
+
+            # update right matrix profile and indices
+            cond = PR < self._PR
+            self._PR = np.where(cond, PR, self._PR)
+            self._IR = np.where(cond, IR, self._IR)
 
             self._chunk_idx += 1
 
     @property
     def P_(self):
         """
-        Get the updated matrix profile
+        Get the updated (top-k) matrix profile. When `k=1`, it is a 1d array.
+        When `k>1`, it is a 2d array with exactly k columns consist of (top-k) matrix
+        profile.
         """
-        return self._P[:, 0].astype(np.float64)
+        if self._k == 1:
+            return self._P.reshape((self._P.shape[0],)).astype(np.float64)
+        return self._P.astype(np.float64)
 
     @property
     def I_(self):
         """
-        Get the updated matrix profile indices
+        Get the updated (top-k) matrix profile indices. When `k=1`, it is a 1d array.
+        When `k>1`, it is a 2d array with exactly k columns consist of (top-k) matrix
+        profile indices.
         """
-        return self._I[:, 0].astype(np.int64)
+        if self._k == 1:
+            return self._I.reshape((self._I.shape[0],)).astype(np.int64)
+        return self._I.astype(np.int64)
 
     @property
     def left_I_(self):
         """
         Get the updated left matrix profile indices
         """
-        return self._I[:, 1].astype(np.int64)
+        return self._IL.astype(np.int64)
 
     @property
     def right_I_(self):
         """
         Get the updated right matrix profile indices
         """
-        return self._I[:, 2].astype(np.int64)
+        return self._IR.astype(np.int64)

From c282f2c0df809e312c04ea6ef0d5f76d61294765 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 13:19:21 -0600
Subject: [PATCH 189/416] Fixed test function

---
 tests/test_scrump.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 1d581e2c5..206f432e3 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -740,10 +740,10 @@ def test_scrump_self_join_KNN(T_A, T_B, percentages):
 
             np.random.seed(seed)
             ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None, k=k)
-            ref_P = ref_mp[:, 0]
-            ref_I = ref_mp[:, 1]
-            ref_left_I = ref_mp[:, 2]
-            ref_right_I = ref_mp[:, 3]
+            ref_P = ref_mp[:, :k]
+            ref_I = ref_mp[:, k : 2 * k]
+            ref_left_I = ref_mp[:, 2 * k]
+            ref_right_I = ref_mp[:, 2 * k + 1]
 
             np.random.seed(seed)
             approx = scrump(

From 380cf1d5c29ea36a42742aafb883fb1cfff37ec8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 13:55:02 -0600
Subject: [PATCH 190/416] Temporarily added parameter k to scraamp to pass
 non_normalized tests

---
 stumpy/scraamp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py
index 44a8b7c83..a6f26453c 100644
--- a/stumpy/scraamp.py
+++ b/stumpy/scraamp.py
@@ -423,6 +423,7 @@ def __init__(
         pre_scraamp=False,
         s=None,
         p=2.0,
+        k=1,  # this function needs to be modified for top-k
     ):
         """
         Initialize the `scraamp` object

From 4ec3c5a6630ffcb8af9a226ebfde248dd5b8c6b8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 14 Jun 2022 14:00:40 -0600
Subject: [PATCH 191/416] Added test function to test TopK scrump in AB_join

---
 tests/test_scrump.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 206f432e3..84d38d50c 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -766,3 +766,43 @@ def test_scrump_self_join_KNN(T_A, T_B, percentages):
             npt.assert_almost_equal(ref_I, comp_I)
             npt.assert_almost_equal(ref_left_I, comp_left_I)
             npt.assert_almost_equal(ref_right_I, comp_right_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("percentages", percentages)
+def test_scrump_A_B_join_KNN(T_A, T_B, percentages):
+    m = 3
+    for k in range(2, 4):
+        for percentage in percentages:
+            seed = np.random.randint(100000)
+
+            np.random.seed(seed)
+            ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None, k=k)
+            ref_P = ref_mp[:, :k]
+            ref_I = ref_mp[:, k : 2 * k]
+            ref_left_I = ref_mp[:, 2 * k]
+            ref_right_I = ref_mp[:, 2 * k + 1]
+
+            np.random.seed(seed)
+            approx = scrump(
+                T_A,
+                m,
+                T_B,
+                ignore_trivial=False,
+                percentage=percentage,
+                pre_scrump=False,
+                k=k,
+            )
+            approx.update()
+            comp_P = approx.P_
+            comp_I = approx.I_
+            comp_left_I = approx.left_I_
+            comp_right_I = approx.right_I_
+
+            naive.replace_inf(ref_P)
+            naive.replace_inf(comp_P)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_left_I, comp_left_I)
+            npt.assert_almost_equal(ref_right_I, comp_right_I)

From 40132d4c9a3bd70988b7b08b46b2467bff81d9b4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 15:36:05 -0600
Subject: [PATCH 192/416] Refactored

---
 stumpy/core.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 3a22ac92c..9bba9d2a5 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2596,12 +2596,8 @@ def _merge_topk_PI(PA, PB, IA, IB):
             if PB[i, j] < PA[i, -1]:
                 idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
 
-                for g in range(PB.shape[1] - 1, idx, -1):
-                    PA[i, g] = PA[i, g - 1]
-                    IA[i, g] = IA[i, g - 1]
-
-                PA[i, idx] = PB[i, j]
-                IA[i, idx] = IB[i, j]
+                _shift_insert_at_index(PA[i], idx, PB[i, j])
+                _shift_insert_at_index(IA[i], idx, IB[i, j])
 
                 start = idx
                 stop += 1  # because of shifting elements to the right by one

From b0132ca5919e4180fd7388fd80037b8faec77eb8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 15:45:13 -0600
Subject: [PATCH 193/416] Added definition of parameter k to docstring

---
 stumpy/scraamp.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py
index a6f26453c..caeca4ad3 100644
--- a/stumpy/scraamp.py
+++ b/stumpy/scraamp.py
@@ -388,6 +388,11 @@ class scraamp:
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Attributes
     ----------
     P_ : numpy.ndarray
@@ -460,6 +465,11 @@ def __init__(
 
         p : float, default 2.0
             The p-norm to apply for computing the Minkowski distance.
+
+        k : int, default 1
+            The number of top `k` smallest distances used to construct the matrix profile.
+            Note that this will increase the total computational time and memory usage
+            when k > 1.
         """
         self._ignore_trivial = ignore_trivial
         self._p = p

From fdfdf07d74f9d9145d1dfcc1242203f3d437a2f2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 15:48:23 -0600
Subject: [PATCH 194/416] Improved docstring

---
 stumpy/scrump.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 2388676bd..aec047012 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -293,14 +293,14 @@ def _prescrump(
     Returns
     -------
     out1 : numpy.ndarray
-        The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile. When k > 1, the output has exactly k columns
-        consist of the top-k matrix profile.
+        The (top-k) Matrix profile. When k=1 (default), the first (and only) column
+        in this 2D array consists of the matrix profile. When k > 1, the output
+        has exactly k columns consist of the top-k matrix profile.
 
     out2 : numpy.ndarray
-        The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile indices. When k > 1, the output has exactly
-        k columns consist of the top-k matrix profile indices.
+        The (top-k) Matrix profile indices. When k=1 (default), the first (and only)
+        column in this 2D array consists of the matrix profile indices. When k > 1,
+        the output has exactly k columns consist of the top-k matrix profile.
 
     Notes
     -----

From 7d9c76a8293b83ad4ebacbdc2f2c3da192625200 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 15:57:57 -0600
Subject: [PATCH 195/416] Removed trailing colon

---
 stumpy/stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index e6e25c834..901c1afe8 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -515,7 +515,7 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-    return P, PL, PR, I, IL[0, :], IR[0, :]
+    return P, PL, PR, I, IL[0], IR[0]
 
 
 @core.non_normalized(aamp)

From 26749889dcdb44e01ebbebdc8ce0bdda83cf9f04 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 15:59:45 -0600
Subject: [PATCH 196/416] Cleaned code

---
 stumpy/stumped.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index fba8947ae..dc2978318 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -264,9 +264,6 @@ def stumped(
             )
         )
 
-    profile = np.empty((l, 2 * k))
-    indices = np.empty((l, 2 * k))
-
     results = dask_client.gather(futures)
     profile, profile_L, profile_R, indices, indices_L, indices_R = results[0]
 

From a1855a05fe14f6a6d1838340bf9eb7a9b37c51b6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 16:29:05 -0600
Subject: [PATCH 197/416] Avoided allocating new memory in inner for-loop

---
 tests/test_core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index b7ea76b8c..993f11afe 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1092,12 +1092,14 @@ def test_merge_topk_PI():
 def test_shift_insert_at_index():
     for k in range(1, 6):
         a = np.random.rand(k)
+        ref = np.empty(k, dtype=np.float64)
+        comp = np.empty(k, dtype=np.float64)
 
         indices = np.arange(k + 1)
         values = np.random.rand(k + 1)
         for (idx, v) in zip(indices, values):
-            ref = a.copy()
-            comp = a.copy()
+            ref[:] = a
+            comp[:] = a
 
             ref = np.insert(ref, idx, v)[:-1]
             core._shift_insert_at_index(comp, idx, v)  # update comp in place

From 5b561ffb2fd1d0b4de81b05ec60fdf8d251e283e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 16:30:38 -0600
Subject: [PATCH 198/416] Fixed typos

---
 stumpy/scrump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index aec047012..441268fa4 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -120,12 +120,12 @@ def _compute_PI(
             # note: S_index = T[index: index + m]
             # `v = squared_distance_profile[idx]` is (the square of)
             # `dist(S_i, S_idx)`, which is the same as `dist(S_idx, S_i)`. So,
-            # `squared_distance_profile[idx]` is (the square of) distane from `S_idx`
+            # `squared_distance_profile[idx]` is (the square of) distance from `S_idx`
             # to one of its neighbors, `S_i`. Therefore, the value `v` can be
             # used to update the TopK of `S_idx`, stored "ascendingly" in
             # `P_squared[thread_idx, idx, :]`.
 
-            # `P_squared[thread_idx, idx, :]` in inf for those `idx` that are in
+            # `P_squared[thread_idx, idx, :]` is inf for those `idx` that are in
             # the trivial zone, including the `i` itself. Those are not updated here.
             IDX = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]

From 3d02bf447eb86cb80fe5970099fa3f4316a54a24 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 18:41:30 -0600
Subject: [PATCH 199/416] Improved comments

---
 stumpy/scrump.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 441268fa4..050e610ce 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -116,17 +116,12 @@ def _compute_PI(
             zone_stop = min(l, i + excl_zone)
             squared_distance_profile[zone_start : zone_stop + 1] = np.inf
 
-        if excl_zone is not None:  # self-join
-            # note: S_index = T[index: index + m]
-            # `v = squared_distance_profile[idx]` is (the square of)
-            # `dist(S_i, S_idx)`, which is the same as `dist(S_idx, S_i)`. So,
-            # `squared_distance_profile[idx]` is (the square of) distance from `S_idx`
-            # to one of its neighbors, `S_i`. Therefore, the value `v` can be
-            # used to update the TopK of `S_idx`, stored "ascendingly" in
-            # `P_squared[thread_idx, idx, :]`.
-
-            # `P_squared[thread_idx, idx, :]` is inf for those `idx` that are in
-            # the trivial zone, including the `i` itself. Those are not updated here.
+        if excl_zone is not None:
+            # Note that the squared distance, `squared_distance_profile[j]`,
+            # between subsequences `S_i = T[i : i + m]` and `S_j = T[j : j + m]`
+            # can be used to update the top-k for BOTH subsequence `i` and
+            # subsequence `j`. We update the latter here.
+
             IDX = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
@@ -148,14 +143,14 @@ def _compute_PI(
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
         else:
-            # update P_squared[thread_idx, index, :] for those `index` that are
-            # in the vicinity of `i` or its 1NN, `j`.
             j = I[thread_idx, i, 0]
             # Given the squared distance, work backwards and compute QT
             QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
                 m * M_T[j] * μ_Q[i]
             )
             QT_j_prime = QT_j
+            # Update Top-k of BOTH subsequences at i+g and j+g (i.e. left neighbor of i, j),
+            # by using the distance between `S_(i+g)` and `S_(j+g)`
             for g in range(1, min(s, l - max(i, j))):
                 QT_j = (
                     QT_j
@@ -189,6 +184,8 @@ def _compute_PI(
                     core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
 
             QT_j = QT_j_prime
+            # Update Top-k of BOTH subsequences at i-g and j-g (i.e. left neighbor of i, j),
+            # by using the distance between `S_(i-g)` and `S_(j-g)`
             for g in range(1, min(s, i + 1, j + 1)):
                 QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
                 d_squared = core._calculate_squared_distance(

From 551d2233554ef660e17e01e25bf7a5e04469c64e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 20:17:39 -0600
Subject: [PATCH 200/416] Avoided allocating new memory in each iteration

---
 tests/test_gpu_stump.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index ef8c03c1d..4d3093c99 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -55,6 +55,7 @@ def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 def test_gpu_searchsorted():
     n = 3 * config.STUMPY_THREADS_PER_BLOCK + 1
+    V = np.empty(n, dtype=np.float64)
 
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
     blocks_per_grid = math.ceil(n / threads_per_block)
@@ -66,7 +67,7 @@ def test_gpu_searchsorted():
         A = np.sort(np.random.rand(n, k), axis=1)
         device_A = cuda.to_device(A)
 
-        V = np.random.rand(n)
+        V[:] = np.random.rand(n)
         for i, idx in enumerate(np.random.choice(np.arange(n), size=k, replace=False)):
             V[idx] = A[idx, i]  # create ties
         device_V = cuda.to_device(V)

From 0de3a2812ec146776f8f7b2f43f1de67b2031758 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 20:27:10 -0600
Subject: [PATCH 201/416] Same ndim in output regardless of value of k

---
 stumpy/scrump.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 050e610ce..89aecba9e 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -746,23 +746,20 @@ def update(self):
     @property
     def P_(self):
         """
-        Get the updated (top-k) matrix profile. When `k=1`, it is a 1d array.
-        When `k>1`, it is a 2d array with exactly k columns consist of (top-k) matrix
-        profile.
+        Get the updated (top-k) matrix profile. When k=1 (default), the first (and only)
+        column in this 2D array consists of the matrix profile. When k > 1, the output
+        has exactly k columns consist of the top-k matrix profile.
         """
-        if self._k == 1:
-            return self._P.reshape((self._P.shape[0],)).astype(np.float64)
         return self._P.astype(np.float64)
 
     @property
     def I_(self):
         """
-        Get the updated (top-k) matrix profile indices. When `k=1`, it is a 1d array.
-        When `k>1`, it is a 2d array with exactly k columns consist of (top-k) matrix
-        profile indices.
+        Get the updated (top-k) matrix profile indices. When k=1 (default), the
+        first (and only) column in this 2D array consists of the matrix profile
+        indices. When k > 1, the output has exactly k columns consist of the top-k
+        matrix profile indices.
         """
-        if self._k == 1:
-            return self._I.reshape((self._I.shape[0],)).astype(np.int64)
         return self._I.astype(np.int64)
 
     @property

From d1e95f6465ff943cad939f55aee70fe7a35a4e2c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 20:35:34 -0600
Subject: [PATCH 202/416] Revised docstrings

---
 stumpy/scrump.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 89aecba9e..95b19e18f 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -486,19 +486,26 @@ class scrump:
     Attributes
     ----------
     P_ : numpy.ndarray
-        The updated matrix profile
+        The updated (top-k) matrix profile
 
     I_ : numpy.ndarray
-        The updated matrix profile indices
+        The updated (top-k) matrix profile indices
+
+    left_I_ : numpy.ndarray
+        The updated left (top-1) matrix profile indices
+
+    right_I_ : numpy.ndarray
+        The updated right (top-1) matrix profile indices
+
 
     Methods
     -------
     update()
         Update the matrix profile and the matrix profile indices by computing
         additional new distances (limited by `percentage`) that make up the full
-        distance matrix. Each output contains three columns that correspond to
-        the matrix profile, the left matrix profile, and the right matrix profile,
-        respectively.
+        distance matrix. The outputs are (top-k) matrix profile, (top-1) left
+        matrix profile, (top-1) right matrix profile, (top-k) matrix profile indices,
+        (top-1) left matrix profile indices, (top-1) right matrix profile indices.
 
     See Also
     --------
@@ -765,13 +772,13 @@ def I_(self):
     @property
     def left_I_(self):
         """
-        Get the updated left matrix profile indices
+        Get the updated left (top-1) matrix profile indices
         """
         return self._IL.astype(np.int64)
 
     @property
     def right_I_(self):
         """
-        Get the updated right matrix profile indices
+        Get the updated right (top-1) matrix profile indices
         """
         return self._IR.astype(np.int64)

From bfc4c8eae348136c0f21b81ce5496ef77dab8abb Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 21:16:29 -0600
Subject: [PATCH 203/416] Enhanced function to perform shift left as well

---
 stumpy/core.py | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 9bba9d2a5..051155d51 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,27 +2604,48 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
 
 @njit
-def _shift_insert_at_index(a, idx, v):
+def _shift_insert_at_index(a, idx, v, shift=1):
     """
     Insert value `v` into array `a` at index `idx` (in place) and discard
-    the last element (i.e. without changing the length of `a`)
+    the last element (i.e. without changing the length of `a`) when `shift=1` (default).
+    When `shift=-1`, the first element will be discarded instead.
+
+    Note
+    ----
+    No check is performed to ensure the value of parameter `shift` is 1 or -1.
+    It is user's responsibility to provide a valid value for this parameter.
 
     Parameters
     ----------
     a: numpy.ndarray
-        a 1d array
+        A 1d array
 
     idx: int
-        the index at which the value `v` should be inserted. This can be any
-        integer number from `0` to `len(a) - 1`
+        The index at which the value `v` should be inserted. This can be any
+        integer number from `0` to `len(a) - 1`.
 
     v: float
-        the value that should be inserted into array `a` at index `idx`
+        The value that should be inserted into array `a` at index `idx`
+
+    shift: int, default 1
+        The value 1 (default) indicates discarding the last element after inserting
+        value `v` at index `idx`. The other value, -1, indicates discarding the first
+        element after inserting value `v` at index `idx`
 
     Returns
     -------
     None
     """
-    if idx < len(a):
-        a[idx + 1 :] = a[idx:-1]
-        a[idx] = v
+    if shift == 1:
+        if 0 <= idx < len(a):
+            a[idx + 1 :] = a[idx:-1]
+            a[idx] = v
+
+    elif shift == -1:
+        if 0 < idx <= len(a):
+            a[: idx - 1] = a[1 : idx]
+            # elements were shifted to left, and thus the insertion becomes `idx-1`
+            a[idx - 1] = v
+
+    else:
+        pass

From bbcb71f31ded82981171226229f90eab01df6ae9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 21:20:24 -0600
Subject: [PATCH 204/416] Enhanced test function to test newly added
 functionality

---
 tests/test_core.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 993f11afe..65b25cdfc 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1097,11 +1097,24 @@ def test_shift_insert_at_index():
 
         indices = np.arange(k + 1)
         values = np.random.rand(k + 1)
+
+        # test shift = 1
         for (idx, v) in zip(indices, values):
             ref[:] = a
             comp[:] = a
 
             ref = np.insert(ref, idx, v)[:-1]
-            core._shift_insert_at_index(comp, idx, v)  # update comp in place
+            core._shift_insert_at_index(comp, idx, v, shift=1)  # update comp in place
+
+            npt.assert_array_equal(ref, comp)
+
+
+        # test shift = -1
+        for (idx, v) in zip(indices, values):
+            ref[:] = a
+            comp[:] = a
+
+            ref = np.insert(ref, idx, v)[1:]
+            core._shift_insert_at_index(comp, idx, v, shift=-1)  # update comp in place
 
             npt.assert_array_equal(ref, comp)

From ec889b476196a5b9ad23848978bd55334fc6a8ab Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 21:21:44 -0600
Subject: [PATCH 205/416] Fixed format

---
 stumpy/core.py     | 2 +-
 tests/test_core.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 051155d51..b4b102e83 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2643,7 +2643,7 @@ def _shift_insert_at_index(a, idx, v, shift=1):
 
     elif shift == -1:
         if 0 < idx <= len(a):
-            a[: idx - 1] = a[1 : idx]
+            a[: idx - 1] = a[1:idx]
             # elements were shifted to left, and thus the insertion becomes `idx-1`
             a[idx - 1] = v
 
diff --git a/tests/test_core.py b/tests/test_core.py
index 65b25cdfc..e3854b889 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1108,7 +1108,6 @@ def test_shift_insert_at_index():
 
             npt.assert_array_equal(ref, comp)
 
-
         # test shift = -1
         for (idx, v) in zip(indices, values):
             ref[:] = a

From f7ef962a66ee794cb502a582eba7f55a3d6a0ca4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 21:25:33 -0600
Subject: [PATCH 206/416] Fixed format

---
 stumpy/scraamp.py | 6 +++---
 stumpy/scrump.py  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py
index caeca4ad3..a0891bf22 100644
--- a/stumpy/scraamp.py
+++ b/stumpy/scraamp.py
@@ -467,9 +467,9 @@ def __init__(
             The p-norm to apply for computing the Minkowski distance.
 
         k : int, default 1
-            The number of top `k` smallest distances used to construct the matrix profile.
-            Note that this will increase the total computational time and memory usage
-            when k > 1.
+            The number of top `k` smallest distances used to construct the matrix
+            profile. Note that this will increase the total computational time and
+            memory usage when k > 1.
         """
         self._ignore_trivial = ignore_trivial
         self._p = p
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 95b19e18f..bb42b33fa 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -149,8 +149,8 @@ def _compute_PI(
                 m * M_T[j] * μ_Q[i]
             )
             QT_j_prime = QT_j
-            # Update Top-k of BOTH subsequences at i+g and j+g (i.e. left neighbor of i, j),
-            # by using the distance between `S_(i+g)` and `S_(j+g)`
+            # Update Top-k of BOTH subsequences at i+g and j+g (i.e. left neighbor
+            # of i, j), by using the distance between `S_(i+g)` and `S_(j+g)`
             for g in range(1, min(s, l - max(i, j))):
                 QT_j = (
                     QT_j
@@ -184,8 +184,8 @@ def _compute_PI(
                     core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
 
             QT_j = QT_j_prime
-            # Update Top-k of BOTH subsequences at i-g and j-g (i.e. left neighbor of i, j),
-            # by using the distance between `S_(i-g)` and `S_(j-g)`
+            # Update Top-k of BOTH subsequences at i-g and j-g (i.e. left neighbor
+            # of i, j), by using the distance between `S_(i-g)` and `S_(j-g)`
             for g in range(1, min(s, i + 1, j + 1)):
                 QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
                 d_squared = core._calculate_squared_distance(

From 92889162aa2673fcb3019637010643c4d8de9007 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 21:32:34 -0600
Subject: [PATCH 207/416] Removed/Renamed intermediate variables

---
 stumpy/scrump.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index bb42b33fa..a00e19f4f 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -122,16 +122,17 @@ def _compute_PI(
             # can be used to update the top-k for BOTH subsequence `i` and
             # subsequence `j`. We update the latter here.
 
-            IDX = np.flatnonzero(
+            idx = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
-            for idx in IDX:
-                d_squared = squared_distance_profile[idx]
+            for j in idx:
                 pos = np.searchsorted(
-                    P_squared[thread_idx, idx], d_squared, side="right"
+                    P_squared[thread_idx, j], squared_distance_profile[j], side="right"
                 )
-                core._shift_insert_at_index(P_squared[thread_idx, idx], pos, d_squared)
-                core._shift_insert_at_index(I[thread_idx, idx], pos, i)
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, j], pos, squared_distance_profile[j]
+                )
+                core._shift_insert_at_index(I[thread_idx, j], pos, i)
 
         # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
         nn_of_i = np.argmin(squared_distance_profile)

From 163a775e36811cd9eecb07ea5f4729c05941b7ba Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 21:40:44 -0600
Subject: [PATCH 208/416] Renamed variable for the sake of consistency

---
 stumpy/scrump.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index a00e19f4f..05aa6b63b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -158,7 +158,7 @@ def _compute_PI(
                     - T_B[i + g - 1] * T_A[j + g - 1]
                     + T_B[i + g + m - 1] * T_A[j + g + m - 1]
                 )
-                d_squared = core._calculate_squared_distance(
+                D_squared = core._calculate_squared_distance(
                     m,
                     QT_j,
                     M_T[i + g],
@@ -166,21 +166,21 @@ def _compute_PI(
                     μ_Q[j + g],
                     σ_Q[j + g],
                 )
-                if d_squared < P_squared[thread_idx, i + g, -1]:
+                if D_squared < P_squared[thread_idx, i + g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, i + g], d_squared, side="right"
+                        P_squared[thread_idx, i + g], D_squared, side="right"
                     )
                     core._shift_insert_at_index(
-                        P_squared[thread_idx, i + g], pos, d_squared
+                        P_squared[thread_idx, i + g], pos, D_squared
                     )
                     core._shift_insert_at_index(I[thread_idx, i + g], pos, j + g)
 
-                if d_squared < P_squared[thread_idx, j + g, -1]:
+                if D_squared < P_squared[thread_idx, j + g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, j + g], d_squared, side="right"
+                        P_squared[thread_idx, j + g], D_squared, side="right"
                     )
                     core._shift_insert_at_index(
-                        P_squared[thread_idx, j + g], pos, d_squared
+                        P_squared[thread_idx, j + g], pos, D_squared
                     )
                     core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
 
@@ -189,7 +189,7 @@ def _compute_PI(
             # of i, j), by using the distance between `S_(i-g)` and `S_(j-g)`
             for g in range(1, min(s, i + 1, j + 1)):
                 QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
-                d_squared = core._calculate_squared_distance(
+                D_squared = core._calculate_squared_distance(
                     m,
                     QT_j,
                     M_T[i - g],
@@ -197,21 +197,21 @@ def _compute_PI(
                     μ_Q[j - g],
                     σ_Q[j - g],
                 )
-                if d_squared < P_squared[thread_idx, i - g, -1]:
+                if D_squared < P_squared[thread_idx, i - g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, i - g], d_squared, side="right"
+                        P_squared[thread_idx, i - g], D_squared, side="right"
                     )
                     core._shift_insert_at_index(
-                        P_squared[thread_idx, i - g], pos, d_squared
+                        P_squared[thread_idx, i - g], pos, D_squared
                     )
                     core._shift_insert_at_index(I[thread_idx, i - g], pos, j - g)
 
-                if d_squared < P_squared[thread_idx, j - g, -1]:
+                if D_squared < P_squared[thread_idx, j - g, -1]:
                     pos = np.searchsorted(
-                        P_squared[thread_idx, j - g], d_squared, side="right"
+                        P_squared[thread_idx, j - g], D_squared, side="right"
                     )
                     core._shift_insert_at_index(
-                        P_squared[thread_idx, j - g], pos, d_squared
+                        P_squared[thread_idx, j - g], pos, D_squared
                     )
                     core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
 

From cf3748da4967d2801da34885ba54cd6b1af8b06d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 22:30:31 -0600
Subject: [PATCH 209/416] Avoided shape mismatch by reshaping ndarray

---
 tests/test_scrump.py | 76 ++++++++++++++++++++++----------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 84d38d50c..0c9d54672 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -108,8 +108,8 @@ def test_scrump_self_join(T_A, T_B, percentages):
 
         np.random.seed(seed)
         ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 
@@ -141,8 +141,8 @@ def test_scrump_A_B_join(T_A, T_B, percentages):
 
         np.random.seed(seed)
         ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 
@@ -175,8 +175,8 @@ def test_scrump_A_B_join_swap(T_A, T_B, percentages):
 
         np.random.seed(seed)
         ref_mp = naive.scrump(T_B, m, T_A, percentage, None, False, None)
-        ref_P = ref_mp[:, 0]
-        # ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        # ref_I = ref_mp[:, 1].reshape(-1, 1)
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 
@@ -211,8 +211,8 @@ def test_scrump_self_join_larger_window(T_A, T_B, m, percentages):
 
             np.random.seed(seed)
             ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
-            ref_P = ref_mp[:, 0]
-            ref_I = ref_mp[:, 1]
+            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
             ref_left_I = ref_mp[:, 2]
             ref_right_I = ref_mp[:, 3]
 
@@ -241,8 +241,8 @@ def test_scrump_self_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -262,8 +262,8 @@ def test_scrump_self_join_full(T_A, T_B):
     npt.assert_almost_equal(ref_right_I, comp_right_I)
 
     ref_mp = stump(T_B, m, ignore_trivial=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -279,8 +279,8 @@ def test_scrump_A_B_join_full(T_A, T_B):
     m = 3
 
     ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -300,8 +300,8 @@ def test_scrump_A_B_join_full(T_A, T_B):
     npt.assert_almost_equal(ref_right_I, comp_right_I)
 
     ref_mp = stump(T_A, m, T_B=T_B, ignore_trivial=False)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -317,8 +317,8 @@ def test_scrump_A_B_join_full_swap(T_A, T_B):
     m = 3
 
     ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -345,8 +345,8 @@ def test_scrump_self_join_full_larger_window(T_A, T_B, m):
         zone = int(np.ceil(m / 4))
 
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 
@@ -383,8 +383,8 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
                 if ref_P[i] < ref_mp[i, 0]:
                     ref_mp[i, 0] = ref_P[i]
                     ref_mp[i, 1] = ref_I[i]
-            ref_P = ref_mp[:, 0]
-            ref_I = ref_mp[:, 1]
+            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
             # ref_left_I = ref_mp[:, 2]
             # ref_right_I = ref_mp[:, 3]
 
@@ -424,8 +424,8 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
                 if ref_P[i] < ref_mp[i, 0]:
                     ref_mp[i, 0] = ref_P[i]
                     ref_mp[i, 1] = ref_I[i]
-            ref_P = ref_mp[:, 0]
-            ref_I = ref_mp[:, 1]
+            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
             ref_left_I = ref_mp[:, 2]
             ref_right_I = ref_mp[:, 3]
 
@@ -459,8 +459,8 @@ def test_scrump_plus_plus_self_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -488,8 +488,8 @@ def test_scrump_plus_plus_A_B_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -517,8 +517,8 @@ def test_scrump_plus_plus_A_B_join_full_swap(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -552,8 +552,8 @@ def test_scrump_constant_subsequence_self_join(percentages):
 
         np.random.seed(seed)
         ref_mp = naive.scrump(T, m, T, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 
@@ -590,8 +590,8 @@ def test_scrump_identical_subsequence_self_join(percentages):
 
         np.random.seed(seed)
         ref_mp = naive.scrump(T, m, T, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0]
-        # ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        # ref_I = ref_mp[:, 1].reshape(-1, 1)
         # ref_left_I = ref_mp[:, 2]
         # ref_right_I = ref_mp[:, 3]
 
@@ -636,8 +636,8 @@ def test_scrump_nan_inf_self_join(
 
             np.random.seed(seed)
             ref_mp = naive.scrump(T_B_sub, m, T_B_sub, percentage, zone, False, None)
-            ref_P = ref_mp[:, 0]
-            ref_I = ref_mp[:, 1]
+            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
             ref_left_I = ref_mp[:, 2]
             ref_right_I = ref_mp[:, 3]
 
@@ -670,8 +670,8 @@ def test_scrump_nan_zero_mean_self_join(percentages):
 
         np.random.seed(seed)
         ref_mp = naive.scrump(T, m, T, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
+        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 

From 467f4a3171a11a0321fec64419af4b357658790e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 23:49:25 -0600
Subject: [PATCH 210/416] Refactored

---
 stumpy/stump.py | 47 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 901c1afe8..a1570f8df 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -212,28 +212,26 @@ def _compute_diagonal(
                 if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                # ρ[thread_idx, i, :] is sorted ascendingly. To update
-                # it, Its first element (i.e. the smallest value
-                # of array ρ[thread_idx, i]) MUST be discarded. Therefore,
-                # if the insertion index of new value in `ρ[thread_idx, i]` is idx,
-                # then, it should be substracted by 1 since the left of idx is shifted
-                # to the left.
+                # ρ[thread_idx, i, :] is sorted ascendingly. It MUST be updated
+                # when the newly-calculated pearson value becomes greater than the
+                # first (i.e. smallest) element of this array. (Reminder: higher
+                # pearson value means lower distance, which is of our interest)
                 if pearson > ρ[thread_idx, i, 0]:
-                    idx = np.searchsorted(ρ[thread_idx, i], pearson)
-                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1:idx]
-                    ρ[thread_idx, i, idx - 1] = pearson
-
-                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1:idx]
-                    I[thread_idx, i, idx - 1] = i + g
+                    pos = np.searchsorted(ρ[thread_idx, i], pearson)
+                    core._shift_insert_at_index(
+                        ρ[thread_idx, i], pos, pearson, shift=-1
+                    )
+                    core._shift_insert_at_index(I[thread_idx, i], pos, i + g, shift=-1)
 
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, 0]:
-                        idx = np.searchsorted(ρ[thread_idx, i + g], pearson)
-                        ρ[thread_idx, i + g, : idx - 1] = ρ[thread_idx, i + g, 1:idx]
-                        ρ[thread_idx, i + g, idx - 1] = pearson
-
-                        I[thread_idx, i + g, : idx - 1] = I[thread_idx, i + g, 1:idx]
-                        I[thread_idx, i + g, idx - 1] = i
+                        pos = np.searchsorted(ρ[thread_idx, i + g], pearson)
+                        core._shift_insert_at_index(
+                            ρ[thread_idx, i + g], pos, pearson, shift=-1
+                        )
+                        core._shift_insert_at_index(
+                            I[thread_idx, i + g], pos, i, shift=-1
+                        )
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
@@ -477,12 +475,13 @@ def _stump(
                 k - 1, -1, -1
             ):  # reverse iteration to preserve order in ties
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
-                    idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
-                    ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
-                    ρ[0, i, idx - 1] = ρ[thread_idx, i, j]
-
-                    I[0, i, : idx - 1] = I[0, i, 1:idx]
-                    I[0, i, idx - 1] = I[thread_idx, i, j]
+                    pos = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
+                    core._shift_insert_at_index(
+                        ρ[0, i], pos, ρ[thread_idx, i, j], shift=-1
+                    )
+                    core._shift_insert_at_index(
+                        I[0, i], pos, I[thread_idx, i, j], shift=-1
+                    )
 
             if ρL[0, i] < ρL[thread_idx, i]:
                 ρL[0, i] = ρL[thread_idx, i]

From d0f59562c485e18210dfd92f5bc3fcccd8b196f8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 16 Jun 2022 23:54:12 -0600
Subject: [PATCH 211/416] Fixed comment

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 05aa6b63b..6a979d2cd 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -150,7 +150,7 @@ def _compute_PI(
                 m * M_T[j] * μ_Q[i]
             )
             QT_j_prime = QT_j
-            # Update Top-k of BOTH subsequences at i+g and j+g (i.e. left neighbor
+            # Update Top-k of BOTH subsequences at i+g and j+g (i.e. right neighbor
             # of i, j), by using the distance between `S_(i+g)` and `S_(j+g)`
             for g in range(1, min(s, l - max(i, j))):
                 QT_j = (

From 80b8594543f335ed4070e3548915bac158103591 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 17 Jun 2022 00:44:18 -0600
Subject: [PATCH 212/416] Refacored and Minor restructuring of lines

---
 stumpy/scrump.py | 168 +++++++++++++++++++++++------------------------
 1 file changed, 84 insertions(+), 84 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 6a979d2cd..d3c938e61 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -112,10 +112,91 @@ def _compute_PI(
         squared_distance_profile[:] = core._mass(Q, T_B, QT, μ_Q[i], σ_Q[i], M_T, Σ_T)
         squared_distance_profile[:] = np.square(squared_distance_profile)
         if excl_zone is not None:
-            zone_start = max(0, i - excl_zone)
-            zone_stop = min(l, i + excl_zone)
-            squared_distance_profile[zone_start : zone_stop + 1] = np.inf
+            core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf)
 
+        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
+        nn = np.argmin(squared_distance_profile)
+        core._shift_insert_at_index(
+            P_squared[thread_idx, i], 0, squared_distance_profile[nn]
+        )
+        core._shift_insert_at_index(I[thread_idx, i], 0, nn)
+
+        if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
+            I[thread_idx, i, 0] = -1
+            continue
+
+        j = I[thread_idx, i, 0]
+        # Given the squared distance, work backwards and compute QT
+        QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
+            m * M_T[j] * μ_Q[i]
+        )
+        QT_j_prime = QT_j
+        # Update Top-k of BOTH subsequences at i+g and j+g (i.e. right neighbor
+        # of i, j), by using the distance between `S_(i+g)` and `S_(j+g)`
+        for g in range(1, min(s, l - max(i, j))):
+            QT_j = (
+                QT_j
+                - T_B[i + g - 1] * T_A[j + g - 1]
+                + T_B[i + g + m - 1] * T_A[j + g + m - 1]
+            )
+            D_squared = core._calculate_squared_distance(
+                m,
+                QT_j,
+                M_T[i + g],
+                Σ_T[i + g],
+                μ_Q[j + g],
+                σ_Q[j + g],
+            )
+            if D_squared < P_squared[thread_idx, i + g, -1]:
+                pos = np.searchsorted(
+                    P_squared[thread_idx, i + g], D_squared, side="right"
+                )
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, i + g], pos, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, i + g], pos, j + g)
+
+            if D_squared < P_squared[thread_idx, j + g, -1]:
+                pos = np.searchsorted(
+                    P_squared[thread_idx, j + g], D_squared, side="right"
+                )
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, j + g], pos, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
+
+        QT_j = QT_j_prime
+        # Update Top-k of BOTH subsequences at i-g and j-g (i.e. left neighbor
+        # of i, j), by using the distance between `S_(i-g)` and `S_(j-g)`
+        for g in range(1, min(s, i + 1, j + 1)):
+            QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
+            D_squared = core._calculate_squared_distance(
+                m,
+                QT_j,
+                M_T[i - g],
+                Σ_T[i - g],
+                μ_Q[j - g],
+                σ_Q[j - g],
+            )
+            if D_squared < P_squared[thread_idx, i - g, -1]:
+                pos = np.searchsorted(
+                    P_squared[thread_idx, i - g], D_squared, side="right"
+                )
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, i - g], pos, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, i - g], pos, j - g)
+
+            if D_squared < P_squared[thread_idx, j - g, -1]:
+                pos = np.searchsorted(
+                    P_squared[thread_idx, j - g], D_squared, side="right"
+                )
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, j - g], pos, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
+
+        # self-join only
         if excl_zone is not None:
             # Note that the squared distance, `squared_distance_profile[j]`,
             # between subsequences `S_i = T[i : i + m]` and `S_j = T[j : j + m]`
@@ -134,87 +215,6 @@ def _compute_PI(
                 )
                 core._shift_insert_at_index(I[thread_idx, j], pos, i)
 
-        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
-        nn_of_i = np.argmin(squared_distance_profile)
-        core._shift_insert_at_index(
-            P_squared[thread_idx, i], 0, squared_distance_profile[nn_of_i]
-        )
-        core._shift_insert_at_index(I[thread_idx, i], 0, nn_of_i)
-
-        if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
-            I[thread_idx, i, 0] = -1
-        else:
-            j = I[thread_idx, i, 0]
-            # Given the squared distance, work backwards and compute QT
-            QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
-                m * M_T[j] * μ_Q[i]
-            )
-            QT_j_prime = QT_j
-            # Update Top-k of BOTH subsequences at i+g and j+g (i.e. right neighbor
-            # of i, j), by using the distance between `S_(i+g)` and `S_(j+g)`
-            for g in range(1, min(s, l - max(i, j))):
-                QT_j = (
-                    QT_j
-                    - T_B[i + g - 1] * T_A[j + g - 1]
-                    + T_B[i + g + m - 1] * T_A[j + g + m - 1]
-                )
-                D_squared = core._calculate_squared_distance(
-                    m,
-                    QT_j,
-                    M_T[i + g],
-                    Σ_T[i + g],
-                    μ_Q[j + g],
-                    σ_Q[j + g],
-                )
-                if D_squared < P_squared[thread_idx, i + g, -1]:
-                    pos = np.searchsorted(
-                        P_squared[thread_idx, i + g], D_squared, side="right"
-                    )
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, i + g], pos, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, i + g], pos, j + g)
-
-                if D_squared < P_squared[thread_idx, j + g, -1]:
-                    pos = np.searchsorted(
-                        P_squared[thread_idx, j + g], D_squared, side="right"
-                    )
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, j + g], pos, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
-
-            QT_j = QT_j_prime
-            # Update Top-k of BOTH subsequences at i-g and j-g (i.e. left neighbor
-            # of i, j), by using the distance between `S_(i-g)` and `S_(j-g)`
-            for g in range(1, min(s, i + 1, j + 1)):
-                QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
-                D_squared = core._calculate_squared_distance(
-                    m,
-                    QT_j,
-                    M_T[i - g],
-                    Σ_T[i - g],
-                    μ_Q[j - g],
-                    σ_Q[j - g],
-                )
-                if D_squared < P_squared[thread_idx, i - g, -1]:
-                    pos = np.searchsorted(
-                        P_squared[thread_idx, i - g], D_squared, side="right"
-                    )
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, i - g], pos, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, i - g], pos, j - g)
-
-                if D_squared < P_squared[thread_idx, j - g, -1]:
-                    pos = np.searchsorted(
-                        P_squared[thread_idx, j - g], D_squared, side="right"
-                    )
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, j - g], pos, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
-
 
 @njit(
     # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], i8, i8, f8[:], f8[:],"

From 33a96c6c8392b867afb11fefa88f13154b9ddc01 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 17 Jun 2022 03:06:28 -0600
Subject: [PATCH 213/416] Modified stimp after changing output shape in scrump

---
 stumpy/stimp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stimp.py b/stumpy/stimp.py
index 1c285f116..19b955dbe 100644
--- a/stumpy/stimp.py
+++ b/stumpy/stimp.py
@@ -218,7 +218,7 @@ def update(self):
                 approx.update()
                 self._PAN[
                     self._bfs_indices[self._n_processed], : approx.P_.shape[0]
-                ] = approx.P_
+                ] = approx.P_.ravel()
             else:
                 out = self._mp_func(
                     self._T,

From 41007f65581ca0321ab924dd9b5b8b6c6f1e1d3c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 17 Jun 2022 03:07:21 -0600
Subject: [PATCH 214/416] Add pragma no cover

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index b4b102e83..fc504c329 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2647,5 +2647,5 @@ def _shift_insert_at_index(a, idx, v, shift=1):
             # elements were shifted to left, and thus the insertion becomes `idx-1`
             a[idx - 1] = v
 
-    else:
+    else:  # pragma: no cover
         pass

From 68efe209c3041e596c3775a8b6f20fb05ced850b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 17 Jun 2022 10:01:38 -0600
Subject: [PATCH 215/416] Revised Docstrings

---
 stumpy/core.py      | 16 +++++++++-------
 stumpy/gpu_stump.py | 16 ++++++++--------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index fc504c329..a2faf2cde 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2571,18 +2571,18 @@ def _merge_topk_PI(PA, PB, IA, IB):
     Parameters
     ----------
     PA : numpy.ndarray
-        a (top-k) matrix profile, with ndim of 2, where values in each row are
+        A (top-k) matrix profile, with ndim of 2, where values in each row are
         sorted in ascending order. Also, it needs to be the same shape as PB.
 
     PB : numpy.ndarray
-        a (top-k) matrix profile, with ndim of 2, where values in each row are
+        A (top-k) matrix profile, with ndim of 2, where values in each row are
         sorted in ascending order. Also, it needs to be the same shape as PA.
 
     IA : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PA
+        A (top-k) matrix profile indices, corresponding to PA
 
     IB : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PB
+        A (top-k) matrix profile indices, corresponding to PB
 
     Returns
     -------
@@ -2607,8 +2607,9 @@ def _merge_topk_PI(PA, PB, IA, IB):
 def _shift_insert_at_index(a, idx, v, shift=1):
     """
     Insert value `v` into array `a` at index `idx` (in place) and discard
-    the last element (i.e. without changing the length of `a`) when `shift=1` (default).
-    When `shift=-1`, the first element will be discarded instead.
+    the last element when `shift=1` (default). When `shift=-1`, the first element
+    will be discarded instead. In both cases, the length of `a` remain unchanged
+    at the end of function.
 
     Note
     ----
@@ -2630,7 +2631,8 @@ def _shift_insert_at_index(a, idx, v, shift=1):
     shift: int, default 1
         The value 1 (default) indicates discarding the last element after inserting
         value `v` at index `idx`. The other value, -1, indicates discarding the first
-        element after inserting value `v` at index `idx`
+        element after inserting value `v` at index `idx`. Any value other than 1 
+        or -1 results in no change in the input array `a`.
 
     Returns
     -------
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 63366a183..ecd8434b9 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -26,20 +26,20 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
         1-dim array sorted in ascending order.
 
     v : float
-        value to insert into array `a`
+        Value to insert into array `a`
 
     bfs : numpy.ndarray
         The breadth-first-search indices where the missing leaves of its corresponding
         binary search tree are filled with -1.
 
     nlevel : int
-        the number of levels in the binary search tree from which the array
+        The number of levels in the binary search tree from which the array
         `bfs` is obtained.
 
     Returns
     -------
     idx : int
-        the index of the insertion point
+        The index of the insertion point
     """
     n = a.shape[0]
     idx = 0
@@ -71,20 +71,20 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         1-dim array sorted in ascending order.
 
     v : float
-        value to insert into array `a`
+        Value to insert into array `a`
 
     bfs : numpy.ndarray
         The breadth-first-search indices where the missing leaves of its corresponding
         binary search tree are filled with -1.
 
     nlevel : int
-        the number of levels in the binary search tree from which the array
+        The number of levels in the binary search tree from which the array
         `bfs` is obtained.
 
     Returns
     -------
     idx : int
-        the index of the insertion point
+        The index of the insertion point
     """
     n = a.shape[0]
     idx = 0
@@ -142,7 +142,7 @@ def _compute_and_update_PI_kernel(
     Parameters
     ----------
     i : int
-        sliding window `i`
+        Sliding window `i`
 
     T_A : numpy.ndarray
         The time series or sequence for which to compute the dot product
@@ -214,7 +214,7 @@ def _compute_and_update_PI_kernel(
         binary search tree are filled with -1.
 
     nlevel : int
-        the number of levels in the binary search tree from which the array
+        The number of levels in the binary search tree from which the array
         `bfs` is obtained.
 
     k : int

From 7cbeae945cbe4ca811c10ee802aaa2e4b786fc23 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 17 Jun 2022 10:04:12 -0600
Subject: [PATCH 216/416] Fixed docstring

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2faf2cde..e4a689807 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2623,7 +2623,7 @@ def _shift_insert_at_index(a, idx, v, shift=1):
 
     idx: int
         The index at which the value `v` should be inserted. This can be any
-        integer number from `0` to `len(a) - 1`.
+        integer number from `0` to `len(a)`.
 
     v: float
         The value that should be inserted into array `a` at index `idx`
@@ -2631,7 +2631,7 @@ def _shift_insert_at_index(a, idx, v, shift=1):
     shift: int, default 1
         The value 1 (default) indicates discarding the last element after inserting
         value `v` at index `idx`. The other value, -1, indicates discarding the first
-        element after inserting value `v` at index `idx`. Any value other than 1 
+        element after inserting value `v` at index `idx`. Any value other than 1
         or -1 results in no change in the input array `a`.
 
     Returns

From 2a38dbbe24f86056f03cb7b83d398063307d95e6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 10:29:30 -0600
Subject: [PATCH 217/416] Revised  docstring

---
 stumpy/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e4a689807..89ef5edbd 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2606,10 +2606,10 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit
 def _shift_insert_at_index(a, idx, v, shift=1):
     """
-    Insert value `v` into array `a` at index `idx` (in place) and discard
-    the last element when `shift=1` (default). When `shift=-1`, the first element
-    will be discarded instead. In both cases, the length of `a` remain unchanged
-    at the end of function.
+    If `shift=1`, all elements in `a[idx:]` are shifted to the right by one element
+    and the last element is discarded. If `shift=-1`, all elements in `a[:idx]`
+    are shifted to the left by one element and the first element is discarded. In
+    both cases, the length of `a` remains unchanged.
 
     Note
     ----

From 616332efd7bd9a4491a179a7553cdf174b883727 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 10:31:27 -0600
Subject: [PATCH 218/416] Removed unnecessary dangling else

---
 stumpy/core.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 89ef5edbd..4321bce4a 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2638,16 +2638,13 @@ def _shift_insert_at_index(a, idx, v, shift=1):
     -------
     None
     """
-    if shift == 1:
+    if shift >= 0:
         if 0 <= idx < len(a):
             a[idx + 1 :] = a[idx:-1]
             a[idx] = v
 
-    elif shift == -1:
+    else:
         if 0 < idx <= len(a):
             a[: idx - 1] = a[1:idx]
             # elements were shifted to left, and thus the insertion becomes `idx-1`
             a[idx - 1] = v
-
-    else:  # pragma: no cover
-        pass

From 97a17cef740f4d9c7efc7bfcebde178b7a3217c3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 10:32:27 -0600
Subject: [PATCH 219/416] Removed unnecessary comment

---
 stumpy/scrump.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index d3c938e61..3c3c2916e 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -114,7 +114,6 @@ def _compute_PI(
         if excl_zone is not None:
             core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf)
 
-        # find EXACT (not approx.) value of `P_squared[thread_idx, i, 0]`
         nn = np.argmin(squared_distance_profile)
         core._shift_insert_at_index(
             P_squared[thread_idx, i], 0, squared_distance_profile[nn]

From e55ee07596df203de0bf85871082d12db090594b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 10:38:47 -0600
Subject: [PATCH 220/416] Revised structure of  test function

so, it follows the structure of the performant version
---
 tests/naive.py | 61 ++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index cf5c2fa31..5f9dcd57c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1417,41 +1417,44 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         if exclusion_zone is not None:
             apply_exclusion_zone(distance_profile, i, exclusion_zone, np.inf)
 
-            # only for self-join
-            for idx in np.flatnonzero(distance_profile < P[:, -1]):
-                pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
-                P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
-                I[idx] = np.insert(I[idx], pos, i)[:-1]
-
         I[i, 1:] = I[i, :-1]
         I[i, 0] = np.argmin(distance_profile)
         P[i, 1:] = P[i, :-1]
         P[i, 0] = distance_profile[I[i, 0]]
+
         if P[i, 0] == np.inf:
             I[i, 0] = -1
-        else:
-            j = I[i, 0]  # index of 1st NN
-            for g in range(1, min(s, l - max(i, j))):
-                d = dist_matrix[i + g, j + g]
-                if d < P[i + g, -1]:
-                    pos = np.searchsorted(P[i + g], d, side="right")
-                    P[i + g] = np.insert(P[i + g], pos, d)[:-1]
-                    I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
-                if d < P[j + g]:
-                    pos = np.searchsorted(P[j + g], d, side="right")
-                    P[j + g] = np.insert(P[j + g], pos, d)[:-1]
-                    I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
-
-            for g in range(1, min(s, i + 1, j + 1)):
-                d = dist_matrix[i - g, j - g]
-                if d < P[i - g, -1]:
-                    pos = np.searchsorted(P[i - g], d, side="right")
-                    P[i - g] = np.insert(P[i - g], pos, d)[:-1]
-                    I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
-                if d < P[j - g]:
-                    pos = np.searchsorted(P[j - g], d, side="right")
-                    P[j - g] = np.insert(P[j - g], pos, d)[:-1]
-                    I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
+            continue
+
+        j = I[i, 0]  # index of 1st NN
+        for g in range(1, min(s, l - max(i, j))):
+            d = dist_matrix[i + g, j + g]
+            if d < P[i + g, -1]:
+                pos = np.searchsorted(P[i + g], d, side="right")
+                P[i + g] = np.insert(P[i + g], pos, d)[:-1]
+                I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
+            if d < P[j + g]:
+                pos = np.searchsorted(P[j + g], d, side="right")
+                P[j + g] = np.insert(P[j + g], pos, d)[:-1]
+                I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
+
+        for g in range(1, min(s, i + 1, j + 1)):
+            d = dist_matrix[i - g, j - g]
+            if d < P[i - g, -1]:
+                pos = np.searchsorted(P[i - g], d, side="right")
+                P[i - g] = np.insert(P[i - g], pos, d)[:-1]
+                I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
+            if d < P[j - g]:
+                pos = np.searchsorted(P[j - g], d, side="right")
+                P[j - g] = np.insert(P[j - g], pos, d)[:-1]
+                I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
+
+        # self-join only
+        if exclusion_zone is not None:
+            for idx in np.flatnonzero(distance_profile < P[:, -1]):
+                pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
+                P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
+                I[idx] = np.insert(I[idx], pos, i)[:-1]
 
     return P, I
 

From b17713669ed1b335ff50a8f2b692ba65d5e3f7ae Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 10:43:42 -0600
Subject: [PATCH 221/416] Replaced ravel with flatten to get copy of array

---
 stumpy/stimp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stimp.py b/stumpy/stimp.py
index 19b955dbe..c67c005a6 100644
--- a/stumpy/stimp.py
+++ b/stumpy/stimp.py
@@ -218,7 +218,7 @@ def update(self):
                 approx.update()
                 self._PAN[
                     self._bfs_indices[self._n_processed], : approx.P_.shape[0]
-                ] = approx.P_.ravel()
+                ] = approx.P_.flatten()
             else:
                 out = self._mp_func(
                     self._T,

From fc7c2106d22fc952ff18e47634d3d52fe6024bef Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 11:02:33 -0600
Subject: [PATCH 222/416] Changed the type of input parameter and revised
 docstring

---
 stumpy/core.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 4321bce4a..c96038522 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,12 +2604,12 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
 
 @njit
-def _shift_insert_at_index(a, idx, v, shift=1):
+def _shift_insert_at_index(a, idx, v, shift='right'):
     """
-    If `shift=1`, all elements in `a[idx:]` are shifted to the right by one element
-    and the last element is discarded. If `shift=-1`, all elements in `a[:idx]`
-    are shifted to the left by one element and the first element is discarded. In
-    both cases, the length of `a` remains unchanged.
+    If `shift=right`, all elements in `a[idx:]` are shifted to the right by one element
+    and the last element is discarded. If `shift=left` or any other string value,
+    all elements in `a[:idx]` are shifted to the left by one element and the first
+    element is discarded. In both cases, the length of `a` remains unchanged.
 
     Note
     ----
@@ -2623,22 +2623,24 @@ def _shift_insert_at_index(a, idx, v, shift=1):
 
     idx: int
         The index at which the value `v` should be inserted. This can be any
-        integer number from `0` to `len(a)`.
+        integer number from `0` to `len(a)`. When `idx=0` and `shift` is set to
+        "right", or when `idx=len(a)` and `shift` is set to any other string value,
+        then no change will occur on the input array `a`.
 
     v: float
         The value that should be inserted into array `a` at index `idx`
 
-    shift: int, default 1
-        The value 1 (default) indicates discarding the last element after inserting
-        value `v` at index `idx`. The other value, -1, indicates discarding the first
-        element after inserting value `v` at index `idx`. Any value other than 1
-        or -1 results in no change in the input array `a`.
+    shift: str, default "right"
+        The value that indicates whether the shifting of elements should be to the
+        right or to the left. If "right" (default), all elements in `a[idx:]` are
+        shifted to right by one element. For any other string value, all elements
+        in `a[:idx]` are shifted to the left by one element.
 
     Returns
     -------
     None
     """
-    if shift >= 0:
+    if shift == 'right':
         if 0 <= idx < len(a):
             a[idx + 1 :] = a[idx:-1]
             a[idx] = v

From 7a4b46e3286efb9b88014a842c56b7fe19bf84fe Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 11:24:12 -0600
Subject: [PATCH 223/416] Update the value of parameter to match its type

---
 stumpy/stump.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index a1570f8df..52bf70e08 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -219,18 +219,18 @@ def _compute_diagonal(
                 if pearson > ρ[thread_idx, i, 0]:
                     pos = np.searchsorted(ρ[thread_idx, i], pearson)
                     core._shift_insert_at_index(
-                        ρ[thread_idx, i], pos, pearson, shift=-1
+                        ρ[thread_idx, i], pos, pearson, shift="left"
                     )
-                    core._shift_insert_at_index(I[thread_idx, i], pos, i + g, shift=-1)
+                    core._shift_insert_at_index(I[thread_idx, i], pos, i + g, shift="left")
 
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, 0]:
                         pos = np.searchsorted(ρ[thread_idx, i + g], pearson)
                         core._shift_insert_at_index(
-                            ρ[thread_idx, i + g], pos, pearson, shift=-1
+                            ρ[thread_idx, i + g], pos, pearson, shift="left"
                         )
                         core._shift_insert_at_index(
-                            I[thread_idx, i + g], pos, i, shift=-1
+                            I[thread_idx, i + g], pos, i, shift="left"
                         )
 
                     if i < i + g:
@@ -477,10 +477,10 @@ def _stump(
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     pos = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     core._shift_insert_at_index(
-                        ρ[0, i], pos, ρ[thread_idx, i, j], shift=-1
+                        ρ[0, i], pos, ρ[thread_idx, i, j], shift="left"
                     )
                     core._shift_insert_at_index(
-                        I[0, i], pos, I[thread_idx, i, j], shift=-1
+                        I[0, i], pos, I[thread_idx, i, j], shift="left"
                     )
 
             if ρL[0, i] < ρL[thread_idx, i]:

From 2622d13c16e691ffa13c82ac7ce25cb66c21da5e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 11:26:35 -0600
Subject: [PATCH 224/416] Update the value of parameter to match its type

---
 tests/test_core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index e3854b889..5136175b1 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1098,22 +1098,22 @@ def test_shift_insert_at_index():
         indices = np.arange(k + 1)
         values = np.random.rand(k + 1)
 
-        # test shift = 1
+        # test shift = "right"
         for (idx, v) in zip(indices, values):
             ref[:] = a
             comp[:] = a
 
             ref = np.insert(ref, idx, v)[:-1]
-            core._shift_insert_at_index(comp, idx, v, shift=1)  # update comp in place
+            core._shift_insert_at_index(comp, idx, v, shift="right")  # update comp in place
 
             npt.assert_array_equal(ref, comp)
 
-        # test shift = -1
+        # test shift = "left"
         for (idx, v) in zip(indices, values):
             ref[:] = a
             comp[:] = a
 
             ref = np.insert(ref, idx, v)[1:]
-            core._shift_insert_at_index(comp, idx, v, shift=-1)  # update comp in place
+            core._shift_insert_at_index(comp, idx, v, shift="left")  # update comp in place
 
             npt.assert_array_equal(ref, comp)

From fc3be799b744caee61b8b50242ee80195456174f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 11:27:55 -0600
Subject: [PATCH 225/416] Correct format

---
 stumpy/core.py     | 4 ++--
 stumpy/stump.py    | 4 +++-
 tests/test_core.py | 8 ++++++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index c96038522..0d3e2ffdd 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,7 +2604,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
 
 @njit
-def _shift_insert_at_index(a, idx, v, shift='right'):
+def _shift_insert_at_index(a, idx, v, shift="right"):
     """
     If `shift=right`, all elements in `a[idx:]` are shifted to the right by one element
     and the last element is discarded. If `shift=left` or any other string value,
@@ -2640,7 +2640,7 @@ def _shift_insert_at_index(a, idx, v, shift='right'):
     -------
     None
     """
-    if shift == 'right':
+    if shift == "right":
         if 0 <= idx < len(a):
             a[idx + 1 :] = a[idx:-1]
             a[idx] = v
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 52bf70e08..b00a4b22d 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -221,7 +221,9 @@ def _compute_diagonal(
                     core._shift_insert_at_index(
                         ρ[thread_idx, i], pos, pearson, shift="left"
                     )
-                    core._shift_insert_at_index(I[thread_idx, i], pos, i + g, shift="left")
+                    core._shift_insert_at_index(
+                        I[thread_idx, i], pos, i + g, shift="left"
+                    )
 
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, 0]:
diff --git a/tests/test_core.py b/tests/test_core.py
index 5136175b1..96854093a 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1104,7 +1104,9 @@ def test_shift_insert_at_index():
             comp[:] = a
 
             ref = np.insert(ref, idx, v)[:-1]
-            core._shift_insert_at_index(comp, idx, v, shift="right")  # update comp in place
+            core._shift_insert_at_index(
+                comp, idx, v, shift="right"
+            )  # update comp in place
 
             npt.assert_array_equal(ref, comp)
 
@@ -1114,6 +1116,8 @@ def test_shift_insert_at_index():
             comp[:] = a
 
             ref = np.insert(ref, idx, v)[1:]
-            core._shift_insert_at_index(comp, idx, v, shift="left")  # update comp in place
+            core._shift_insert_at_index(
+                comp, idx, v, shift="left"
+            )  # update comp in place
 
             npt.assert_array_equal(ref, comp)

From 6411b7a05dedb4aff8a6ffff4a24dd0b65ec12d4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 12:12:01 -0600
Subject: [PATCH 226/416] Changed output structure of naive.scrump

---
 tests/naive.py       | 40 +++++++++----------
 tests/test_scrump.py | 91 +++++++++-----------------------------------
 2 files changed, 38 insertions(+), 93 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 5f9dcd57c..67d5d95a9 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1481,8 +1481,13 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
     diags_ranges_start = diags_ranges[0, 0]
     diags_ranges_stop = diags_ranges[0, 1]
 
-    P = np.full((l, k + 2), np.inf, dtype=np.float64)  # Topk + left/ right
-    I = np.full((l, k + 2), -1, dtype=np.int64)  # Topk + left/ right
+    P = np.full((l, k), np.inf, dtype=np.float64)  # Topk
+    PL = np.full(l, np.inf, dtype=np.float64)
+    PR = np.full(l, np.inf, dtype=np.float64)
+
+    I = np.full((l, k), -1, dtype=np.int64)
+    IL = np.full(l, -1, dtype=np.int64)
+    IR = np.full(l, -1, dtype=np.int64)
 
     for diag_idx in range(diags_ranges_start, diags_ranges_stop):
         g = diags[diag_idx]
@@ -1491,32 +1496,27 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
             for j in range(n_B - m + 1):
                 if j - i == g:
                     d = dist_matrix[i, j]
-                    if d < P[i, k - 1]:
-                        # update TopK of P[i]
+                    if d < P[i, - 1]: # update TopK of P[i]
                         idx = searchsorted_right(P[i], d)
-                        P[i, :k] = np.insert(P[i, :k], idx, d)[:-1]
-                        I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
+                        P[i] = np.insert(P[i], idx, d)[:-1]
+                        I[i] = np.insert(I[i], idx, i + g)[:-1]
 
-                    if exclusion_zone is not None and d < P[i + g, k - 1]:
+                    if exclusion_zone is not None and d < P[i + g, -1]:
                         idx = searchsorted_right(P[i + g], d)
-                        P[i + g, :k] = np.insert(P[i + g, :k], idx, d)[:-1]
-                        I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
+                        P[i + g] = np.insert(P[i + g], idx, d)[:-1]
+                        I[i + g] = np.insert(I[i + g], idx, i)[:-1]
 
                     # left matrix profile and left matrix profile indices
-                    if exclusion_zone is not None and i < i + g and d < P[i + g, k]:
-                        P[i + g, k] = d
-                        I[i + g, k] = i
+                    if exclusion_zone is not None and i < i + g and d < PL[i + g]:
+                        PL[i + g] = d
+                        IL[i + g] = i
 
                     # right matrix profile and right matrix profile indices
-                    if exclusion_zone is not None and i + g > i and d < P[i, k + 1]:
-                        P[i, k + 1] = d
-                        I[i, k + 1] = i + g
+                    if exclusion_zone is not None and i + g > i and d < PR[i]:
+                        PR[i] = d
+                        IR[i] = i + g
 
-    out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = P[:, :k]
-    out[:, k:] = I
-
-    return out
+    return P, I, IL, IR
 
 
 def prescraamp(T_A, m, T_B, s, exclusion_zone=None, p=2.0):
diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 0c9d54672..3f0e96c61 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -107,11 +107,7 @@ def test_scrump_self_join(T_A, T_B, percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-        ref_left_I = ref_mp[:, 2]
-        ref_right_I = ref_mp[:, 3]
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
 
         np.random.seed(seed)
         approx = scrump(
@@ -140,11 +136,7 @@ def test_scrump_A_B_join(T_A, T_B, percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-        ref_left_I = ref_mp[:, 2]
-        ref_right_I = ref_mp[:, 3]
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_A, m, T_B, percentage, None, False, None)
 
         np.random.seed(seed)
         approx = scrump(
@@ -174,11 +166,7 @@ def test_scrump_A_B_join_swap(T_A, T_B, percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_mp = naive.scrump(T_B, m, T_A, percentage, None, False, None)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        # ref_I = ref_mp[:, 1].reshape(-1, 1)
-        ref_left_I = ref_mp[:, 2]
-        ref_right_I = ref_mp[:, 3]
+        ref_P, _, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_A, percentage, None, False, None)
 
         np.random.seed(seed)
         approx = scrump(
@@ -210,11 +198,7 @@ def test_scrump_self_join_larger_window(T_A, T_B, m, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
-            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-            ref_left_I = ref_mp[:, 2]
-            ref_right_I = ref_mp[:, 3]
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
 
             np.random.seed(seed)
             approx = scrump(
@@ -378,15 +362,8 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
 
             np.random.seed(seed)
             ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone)
-            ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, True, s)
-            for i in range(ref_mp.shape[0]):
-                if ref_P[i] < ref_mp[i, 0]:
-                    ref_mp[i, 0] = ref_P[i]
-                    ref_mp[i, 1] = ref_I[i]
-            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-            # ref_left_I = ref_mp[:, 2]
-            # ref_right_I = ref_mp[:, 3]
+            ref_P_aux, ref_I_aux, _, _ = naive.scrump(T_B, m, T_B, percentage, zone, True, s)
+            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
 
             np.random.seed(seed)
             approx = scrump(
@@ -395,16 +372,12 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             approx.update()
             comp_P = approx.P_
             comp_I = approx.I_
-            # comp_left_I = approx.left_I_
-            # comp_right_I = approx.right_I_
 
             naive.replace_inf(ref_P)
             naive.replace_inf(comp_I)
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
-            # npt.assert_almost_equal(ref_left_I, comp_left_I)
-            # npt.assert_almost_equal(ref_right_I, comp_right_I)
 
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
@@ -419,15 +392,11 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
 
             np.random.seed(seed)
             ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s)
-            ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None)
-            for i in range(ref_mp.shape[0]):
-                if ref_P[i] < ref_mp[i, 0]:
-                    ref_mp[i, 0] = ref_P[i]
-                    ref_mp[i, 1] = ref_I[i]
-            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-            ref_left_I = ref_mp[:, 2]
-            ref_right_I = ref_mp[:, 3]
+
+            ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump(T_A, m, T_B, percentage, None, False, None)
+            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
+            ref_left_I = ref_left_I_aux
+            ref_right_I = ref_right_I_aux
 
             approx = scrump(
                 T_A,
@@ -551,11 +520,7 @@ def test_scrump_constant_subsequence_self_join(percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_mp = naive.scrump(T, m, T, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-        ref_left_I = ref_mp[:, 2]
-        ref_right_I = ref_mp[:, 3]
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T, m, T, percentage, zone, False, None)
 
         np.random.seed(seed)
         approx = scrump(
@@ -589,11 +554,7 @@ def test_scrump_identical_subsequence_self_join(percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_mp = naive.scrump(T, m, T, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        # ref_I = ref_mp[:, 1].reshape(-1, 1)
-        # ref_left_I = ref_mp[:, 2]
-        # ref_right_I = ref_mp[:, 3]
+        ref_P, _, _, _ = naive.scrump(T, m, T, percentage, zone, False, None)
 
         np.random.seed(seed)
         approx = scrump(
@@ -635,11 +596,7 @@ def test_scrump_nan_inf_self_join(
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_mp = naive.scrump(T_B_sub, m, T_B_sub, percentage, zone, False, None)
-            ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-            ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-            ref_left_I = ref_mp[:, 2]
-            ref_right_I = ref_mp[:, 3]
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B_sub, m, T_B_sub, percentage, zone, False, None)
 
             np.random.seed(seed)
             approx = scrump(T_B_sub, m, percentage=percentage, pre_scrump=False)
@@ -669,11 +626,7 @@ def test_scrump_nan_zero_mean_self_join(percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_mp = naive.scrump(T, m, T, percentage, zone, False, None)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
-        ref_left_I = ref_mp[:, 2]
-        ref_right_I = ref_mp[:, 3]
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T, m, T, percentage, zone, False, None)
 
         np.random.seed(seed)
         approx = scrump(T, m, percentage=percentage, pre_scrump=False)
@@ -739,11 +692,7 @@ def test_scrump_self_join_KNN(T_A, T_B, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None, k=k)
-            ref_P = ref_mp[:, :k]
-            ref_I = ref_mp[:, k : 2 * k]
-            ref_left_I = ref_mp[:, 2 * k]
-            ref_right_I = ref_mp[:, 2 * k + 1]
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_B, percentage, zone, False, None, k=k)
 
             np.random.seed(seed)
             approx = scrump(
@@ -777,12 +726,8 @@ def test_scrump_A_B_join_KNN(T_A, T_B, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None, k=k)
-            ref_P = ref_mp[:, :k]
-            ref_I = ref_mp[:, k : 2 * k]
-            ref_left_I = ref_mp[:, 2 * k]
-            ref_right_I = ref_mp[:, 2 * k + 1]
-
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_A, m, T_B, percentage, None, False, None, k=k)
+    
             np.random.seed(seed)
             approx = scrump(
                 T_A,

From 71d68c8747a8abbb7a6d748455f69768054649f4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 12:12:48 -0600
Subject: [PATCH 227/416] Correct format

---
 tests/naive.py       |  2 +-
 tests/test_scrump.py | 46 ++++++++++++++++++++++++++++++++------------
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 67d5d95a9..e3df5c236 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1496,7 +1496,7 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
             for j in range(n_B - m + 1):
                 if j - i == g:
                     d = dist_matrix[i, j]
-                    if d < P[i, - 1]: # update TopK of P[i]
+                    if d < P[i, -1]:  # update TopK of P[i]
                         idx = searchsorted_right(P[i], d)
                         P[i] = np.insert(P[i], idx, d)[:-1]
                         I[i] = np.insert(I[i], idx, i + g)[:-1]
diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 3f0e96c61..97d5164f4 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -107,7 +107,9 @@ def test_scrump_self_join(T_A, T_B, percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+            T_B, m, T_B, percentage, zone, False, None
+        )
 
         np.random.seed(seed)
         approx = scrump(
@@ -136,7 +138,9 @@ def test_scrump_A_B_join(T_A, T_B, percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_A, m, T_B, percentage, None, False, None)
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+            T_A, m, T_B, percentage, None, False, None
+        )
 
         np.random.seed(seed)
         approx = scrump(
@@ -166,7 +170,9 @@ def test_scrump_A_B_join_swap(T_A, T_B, percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_P, _, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_A, percentage, None, False, None)
+        ref_P, _, ref_left_I, ref_right_I = naive.scrump(
+            T_B, m, T_A, percentage, None, False, None
+        )
 
         np.random.seed(seed)
         approx = scrump(
@@ -198,7 +204,9 @@ def test_scrump_self_join_larger_window(T_A, T_B, m, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_B, percentage, zone, False, None)
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+                T_B, m, T_B, percentage, zone, False, None
+            )
 
             np.random.seed(seed)
             approx = scrump(
@@ -362,7 +370,9 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
 
             np.random.seed(seed)
             ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone)
-            ref_P_aux, ref_I_aux, _, _ = naive.scrump(T_B, m, T_B, percentage, zone, True, s)
+            ref_P_aux, ref_I_aux, _, _ = naive.scrump(
+                T_B, m, T_B, percentage, zone, True, s
+            )
             naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
 
             np.random.seed(seed)
@@ -393,7 +403,9 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
             np.random.seed(seed)
             ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s)
 
-            ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump(T_A, m, T_B, percentage, None, False, None)
+            ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump(
+                T_A, m, T_B, percentage, None, False, None
+            )
             naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
             ref_left_I = ref_left_I_aux
             ref_right_I = ref_right_I_aux
@@ -520,7 +532,9 @@ def test_scrump_constant_subsequence_self_join(percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T, m, T, percentage, zone, False, None)
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+            T, m, T, percentage, zone, False, None
+        )
 
         np.random.seed(seed)
         approx = scrump(
@@ -596,7 +610,9 @@ def test_scrump_nan_inf_self_join(
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B_sub, m, T_B_sub, percentage, zone, False, None)
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+                T_B_sub, m, T_B_sub, percentage, zone, False, None
+            )
 
             np.random.seed(seed)
             approx = scrump(T_B_sub, m, percentage=percentage, pre_scrump=False)
@@ -626,7 +642,9 @@ def test_scrump_nan_zero_mean_self_join(percentages):
         seed = np.random.randint(100000)
 
         np.random.seed(seed)
-        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T, m, T, percentage, zone, False, None)
+        ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+            T, m, T, percentage, zone, False, None
+        )
 
         np.random.seed(seed)
         approx = scrump(T, m, percentage=percentage, pre_scrump=False)
@@ -692,7 +710,9 @@ def test_scrump_self_join_KNN(T_A, T_B, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_B, m, T_B, percentage, zone, False, None, k=k)
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+                T_B, m, T_B, percentage, zone, False, None, k=k
+            )
 
             np.random.seed(seed)
             approx = scrump(
@@ -726,8 +746,10 @@ def test_scrump_A_B_join_KNN(T_A, T_B, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(T_A, m, T_B, percentage, None, False, None, k=k)
-    
+            ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump(
+                T_A, m, T_B, percentage, None, False, None, k=k
+            )
+
             np.random.seed(seed)
             approx = scrump(
                 T_A,

From 3e4234329400624a2543f5d921fd89448e74b1a2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 23 Jun 2022 12:28:51 -0600
Subject: [PATCH 228/416] Add test function for scrump_plus_plus for TopK

---
 tests/test_scrump.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 97d5164f4..3bd43b423 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -773,3 +773,44 @@ def test_scrump_A_B_join_KNN(T_A, T_B, percentages):
             npt.assert_almost_equal(ref_I, comp_I)
             npt.assert_almost_equal(ref_left_I, comp_left_I)
             npt.assert_almost_equal(ref_right_I, comp_right_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("percentages", percentages)
+def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages):
+    m = 3
+    zone = int(np.ceil(m / 4))
+
+    for k in range(2, 4):
+        for s in range(1, zone + 1):
+            for percentage in percentages:
+                seed = np.random.randint(100000)
+
+                np.random.seed(seed)
+                ref_P, ref_I = naive.prescrump(
+                    T_B, m, T_B, s=s, exclusion_zone=zone, k=k
+                )
+                ref_P_aux, ref_I_aux, _, _ = naive.scrump(
+                    T_B, m, T_B, percentage, zone, True, s, k=k
+                )
+                naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
+
+                np.random.seed(seed)
+                approx = scrump(
+                    T_B,
+                    m,
+                    ignore_trivial=True,
+                    percentage=percentage,
+                    pre_scrump=True,
+                    s=s,
+                    k=k,
+                )
+                approx.update()
+                comp_P = approx.P_
+                comp_I = approx.I_
+
+                naive.replace_inf(ref_P)
+                naive.replace_inf(comp_I)
+
+                npt.assert_almost_equal(ref_P, comp_P)
+                npt.assert_almost_equal(ref_I, comp_I)

From e512a63bc941387bae8daf69c5a245a015f5aba8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 12:43:48 -0600
Subject: [PATCH 229/416] Add naive version to merge peason profiles

---
 tests/naive.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/naive.py b/tests/naive.py
index e3df5c236..cd8077ffb 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1790,3 +1790,29 @@ def merge_topk_PI(PA, PB, IA, IB):
 
     PA[:, :] = profile[:, : PA.shape[1]]
     IA[:, :] = indices[:, : PA.shape[1]]
+
+
+def merge_topk_ρI(ρA, ρB, IA, IB):
+    # this is to merge two pearson profiles, each is a 2D array where each row
+    # contains an ascendingly-sorted values.
+    # Note that we are interested in keeping the top-k largest values.
+    # In the merged array (from right to left): the priority is with ρA (from right
+    # to left), and then with ρB(from right to left)
+
+    # Example:
+    # ρA = [0(I), 0(II), 1], and ρB = [0', 1'(I), 1'(II)].
+    # the prime symbol is to indicate that the values are from ρB
+    # and the greek numbers are to differntiate two same values in one array
+
+    # so, the outcome of merging process should be:
+    # [0', 0(I), 0(II), 1'(I), 1'(II), 1]
+
+    profile = np.column_stack((ρB, ρA))
+    indices = np.column_stack((IB, IA))
+
+    idx = np.argsort(profile, axis=1)
+    profile = np.take_along_axis(profile, idx, axis=1)
+    indices = np.take_along_axis(indices, idx, axis=1)
+
+    ρA[:, :] = profile[:, ρA.shape[1]: ]
+    IA[:, :] = indices[:, ρA.shape[1]: ]

From 382bda27da190c0da9129de854ea72f3f924aabc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 12:44:21 -0600
Subject: [PATCH 230/416] Add test function for  merging pearson profiles

---
 tests/test_core.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 96854093a..69f4a886f 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1089,6 +1089,34 @@ def test_merge_topk_PI():
         npt.assert_array_equal(ref_I, comp_I)
 
 
+def test_merge_topk_ρI():
+    n = 50
+    for k in range(1, 6):
+        ρA = np.random.rand(n * k).reshape(n, k)
+        ρA[:, :] = np.sort(ρA, axis=1)  # sorting each row separately
+
+        ρB = np.random.rand(n * k).reshape(n, k)
+        col_idx = np.random.randint(0, k, size=n)
+        for i in range(n):  # creating ties between values of PA and PB
+            ρB[i, col_idx[i]] = np.random.choice(ρA[i], size=1, replace=False)
+        ρB[:, :] = np.sort(ρB, axis=1)  # sorting each row separately
+
+        IA = np.arange(n * k).reshape(n, k)
+        IB = IA + n * k
+
+        ref_ρ = ρA.copy()
+        ref_I = IA.copy()
+
+        comp_ρ = ρA.copy()
+        comp_I = IA.copy()
+
+        naive.merge_topk_ρI(ref_ρ, ρB, ref_I, IB)
+        core._merge_topk_ρI(comp_ρ, ρB, comp_I, IB)
+
+        npt.assert_array_equal(ref_ρ, comp_ρ)
+        npt.assert_array_equal(ref_I, comp_I)
+
+
 def test_shift_insert_at_index():
     for k in range(1, 6):
         a = np.random.rand(k)

From b0a56f768031f77d7eb3332acf2d87a16b4e0bbe Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 12:45:37 -0600
Subject: [PATCH 231/416] Corret format

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index cd8077ffb..9858005bc 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1814,5 +1814,5 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     profile = np.take_along_axis(profile, idx, axis=1)
     indices = np.take_along_axis(indices, idx, axis=1)
 
-    ρA[:, :] = profile[:, ρA.shape[1]: ]
-    IA[:, :] = indices[:, ρA.shape[1]: ]
+    ρA[:, :] = profile[:, ρA.shape[1] :]
+    IA[:, :] = indices[:, ρA.shape[1] :]

From b5a4e1553847cbe16e94d9661a061b3b2419c343 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 13:00:15 -0600
Subject: [PATCH 232/416] Add performant function to merge pearson profiles

---
 stumpy/core.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 0d3e2ffdd..b93e074d7 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2603,6 +2603,51 @@ def _merge_topk_PI(PA, PB, IA, IB):
                 stop += 1  # because of shifting elements to the right by one
 
 
+@njit(parallel=True)
+def _merge_topk_ρI(ρA, ρB, IA, IB):
+    """
+    Merge two top-k pearson profiles ρA and ρB, and update ρA (in place). In the
+    merged array (from right to left): the priority is with ρA (from right to left),
+    and then with ρB(from right to left) Also, update IA accordingly.
+
+    Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
+    keeps `top-k` largest values.
+
+    Parameters
+    ----------
+    ρA : numpy.ndarray
+        A (top-k) pearson profile, with ndim of 2, where values in each row are
+        sorted in ascending order. Also, it needs to be the same shape as ρB.
+
+    ρB : numpy.ndarray
+        A (top-k) pearson profile, with ndim of 2, where values in each row are
+        sorted in ascending order. Also, it needs to be the same shape as ρA.
+
+    IA : numpy.ndarray
+        A (top-k) matrix profile indices, corresponding to ρA
+
+    IB : numpy.ndarray
+        A (top-k) matrix profile indices, corresponding to ρB
+
+    Returns
+    -------
+    None
+    """
+    for i in range(ρB.shape[0]):
+        # start = 0
+        # stop = np.searchsorted(PA[i], PB[i, -1], side="right")
+
+        for j in range(ρB.shape[1] - 1, -1, -1):
+            if ρB[i, j] > ρA[i, 0]:
+                idx = np.searchsorted(ρA[i], ρB[i, j], side="left")  # + start
+
+                _shift_insert_at_index(ρA[i], idx, ρB[i, j], shift="left")
+                _shift_insert_at_index(IA[i], idx, IB[i, j], shift="left")
+
+                # start = idx
+                # stop += 1  # because of shifting elements to the right by one
+
+
 @njit
 def _shift_insert_at_index(a, idx, v, shift="right"):
     """

From d4d28feafe5ed2dad2f92e764846093324301f05 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 13:04:26 -0600
Subject: [PATCH 233/416] Optimize function

---
 stumpy/core.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index b93e074d7..a25432b54 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2634,18 +2634,19 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     None
     """
     for i in range(ρB.shape[0]):
-        # start = 0
-        # stop = np.searchsorted(PA[i], PB[i, -1], side="right")
+        start = np.searchsorted(ρA[i], ρB[i, 0], side="left")
+        stop = ρB.shape[1]
 
         for j in range(ρB.shape[1] - 1, -1, -1):
             if ρB[i, j] > ρA[i, 0]:
-                idx = np.searchsorted(ρA[i], ρB[i, j], side="left")  # + start
+                idx = np.searchsorted(ρA[i, start:stop], ρB[i, j], side="left") + start
 
                 _shift_insert_at_index(ρA[i], idx, ρB[i, j], shift="left")
                 _shift_insert_at_index(IA[i], idx, IB[i, j], shift="left")
 
-                # start = idx
-                # stop += 1  # because of shifting elements to the right by one
+                stop = idx  # because of shifting elements to the left by one
+                if start > 0:
+                    start -= 1
 
 
 @njit

From 99f2a570623b714be46b288a322c4b660b763194 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 13:05:31 -0600
Subject: [PATCH 234/416] Avoid creating new memory

---
 tests/naive.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 9858005bc..03de99bf5 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1785,8 +1785,8 @@ def merge_topk_PI(PA, PB, IA, IB):
     indices = np.column_stack((IA, IB))
 
     idx = np.argsort(profile, axis=1)
-    profile = np.take_along_axis(profile, idx, axis=1)
-    indices = np.take_along_axis(indices, idx, axis=1)
+    profile[:, :] = np.take_along_axis(profile, idx, axis=1)
+    indices[:, :] = np.take_along_axis(indices, idx, axis=1)
 
     PA[:, :] = profile[:, : PA.shape[1]]
     IA[:, :] = indices[:, : PA.shape[1]]
@@ -1811,8 +1811,8 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     indices = np.column_stack((IB, IA))
 
     idx = np.argsort(profile, axis=1)
-    profile = np.take_along_axis(profile, idx, axis=1)
-    indices = np.take_along_axis(indices, idx, axis=1)
+    profile[:, :] = np.take_along_axis(profile, idx, axis=1)
+    indices[:, :] = np.take_along_axis(indices, idx, axis=1)
 
     ρA[:, :] = profile[:, ρA.shape[1] :]
     IA[:, :] = indices[:, ρA.shape[1] :]

From 855c429daf1ea3547a4c57fc737f6286517a086f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 13:07:22 -0600
Subject: [PATCH 235/416] Improve docstring

---
 stumpy/core.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index a25432b54..379fe8c60 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2568,6 +2568,9 @@ def _merge_topk_PI(PA, PB, IA, IB):
     always choosing values of PA over values of PB in case of ties. Also, update
     IA accordingly.
 
+    Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
+    keeps `top-k` smallest values.
+
     Parameters
     ----------
     PA : numpy.ndarray

From 9e02bac4d5bd84e1131b80a869182de87541d004 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 13:15:36 -0600
Subject: [PATCH 236/416] Refactored

---
 stumpy/stump.py | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index b00a4b22d..80923f33c 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -471,27 +471,18 @@ def _stump(
 
     # Reduction of results from all threads
     for thread_idx in range(1, n_threads):
-        for i in prange(l):
-            # top-k
-            for j in range(
-                k - 1, -1, -1
-            ):  # reverse iteration to preserve order in ties
-                if ρ[0, i, 0] < ρ[thread_idx, i, j]:
-                    pos = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
-                    core._shift_insert_at_index(
-                        ρ[0, i], pos, ρ[thread_idx, i, j], shift="left"
-                    )
-                    core._shift_insert_at_index(
-                        I[0, i], pos, I[thread_idx, i, j], shift="left"
-                    )
-
-            if ρL[0, i] < ρL[thread_idx, i]:
-                ρL[0, i] = ρL[thread_idx, i]
-                IL[0, i] = IL[thread_idx, i]
-
-            if ρR[0, i] < ρR[thread_idx, i]:
-                ρR[0, i] = ρR[thread_idx, i]
-                IR[0, i] = IR[thread_idx, i]
+        # update top-k arrays
+        core._merge_topk_ρI(ρ[0], ρ[thread_idx], I[0], I[thread_idx])
+
+        # update left matrix profile and  matrix profile indices
+        cond = ρL[0] < ρL[thread_idx]
+        ρL[0] = np.where(cond, ρL[thread_idx], ρL[0])
+        IL[0] = np.where(cond, IL[thread_idx], IL[0])
+
+        # update right matrix profile and  matrix profile indices
+        cond = ρR[0] < ρR[thread_idx]
+        ρR[0] = np.where(cond, ρR[thread_idx], ρR[0])
+        IR[0] = np.where(cond, IR[thread_idx], IR[0])
 
     # Reverse top-k rho (and its associated I) to be in descending order and
     # then convert from Pearson correlations to Euclidean distances (ascending order)

From be44ab06c5b10c2607c95e7de0277c18da762c39 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 13:21:56 -0600
Subject: [PATCH 237/416] Avoid creating new memory in for-loop

---
 stumpy/stumped.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index dc2978318..1246bbb2c 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -274,13 +274,13 @@ def stumped(
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L
-        profile_L = np.where(cond, PL, profile_L)
-        indices_L = np.where(cond, IL, indices_L)
+        profile_L[:] = np.where(cond, PL, profile_L)
+        indices_L[:] = np.where(cond, IL, indices_L)
 
         # Update top-1 right matrix profile and matrix profile index
         cond = PR < profile_R
-        profile_R = np.where(cond, PR, profile_R)
-        indices_R = np.where(cond, IR, indices_R)
+        profile_R[:] = np.where(cond, PR, profile_R)
+        indices_R[:] = np.where(cond, IR, indices_R)
 
     out = np.empty((l, 2 * k + 2), dtype=object)
     out[:, :k] = profile

From e0ad42af65f80df0565c56055861ee8d031b3ed0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 16:11:43 -0600
Subject: [PATCH 238/416] Update test function

---
 tests/test_stimp.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/tests/test_stimp.py b/tests/test_stimp.py
index 089c1f1f9..f30514193 100644
--- a/tests/test_stimp.py
+++ b/tests/test_stimp.py
@@ -50,12 +50,9 @@ def test_stimp_1_percent(T):
         zone = int(np.ceil(m / 4))
         s = zone
         tmp_P, tmp_I = naive.prescrump(T, m, T, s=s, exclusion_zone=zone)
-        ref_mp = naive.scrump(T, m, T, percentage, zone, True, s)
-        for i in range(ref_mp.shape[0]):
-            if tmp_P[i] < ref_mp[i, 0]:
-                ref_mp[i, 0] = tmp_P[i]
-                ref_mp[i, 1] = tmp_I[i]
-        ref_PAN[pan._bfs_indices[idx], : ref_mp.shape[0]] = ref_mp[:, 0]
+        ref_P, ref_I, _, _ = naive.scrump(T, m, T, percentage, zone, True, s)
+        naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I)
+        ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P.flatten()
 
     # Compare raw pan
     cmp_PAN = pan._PAN
@@ -108,12 +105,9 @@ def test_stimp_max_m(T):
         zone = int(np.ceil(m / 4))
         s = zone
         tmp_P, tmp_I = naive.prescrump(T, m, T, s=s, exclusion_zone=zone)
-        ref_mp = naive.scrump(T, m, T, percentage, zone, True, s)
-        for i in range(ref_mp.shape[0]):
-            if tmp_P[i] < ref_mp[i, 0]:
-                ref_mp[i, 0] = tmp_P[i]
-                ref_mp[i, 1] = tmp_I[i]
-        ref_PAN[pan._bfs_indices[idx], : ref_mp.shape[0]] = ref_mp[:, 0]
+        ref_P, ref_I, _, _ = naive.scrump(T, m, T, percentage, zone, True, s)
+        naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I)
+        ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P.flatten()
 
     # Compare raw pan
     cmp_PAN = pan._PAN

From 33c215147831daac885868129a676af6587ff67c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 17:09:00 -0600
Subject: [PATCH 239/416] Revise function to make it parallelizable

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 379fe8c60..90abbcb51 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2636,7 +2636,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     -------
     None
     """
-    for i in range(ρB.shape[0]):
+    for i in prange(ρB.shape[0]):
         start = np.searchsorted(ρA[i], ρB[i, 0], side="left")
         stop = ρB.shape[1]
 

From 4a995d14a3ff56ebf1964f6c9a56cf5609fb9914 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 18:30:04 -0600
Subject: [PATCH 240/416] Full test and coverage in 1hr


From 2c55c885e5fd7212d34b90afcf7ebb5ad9094caa Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 18:56:03 -0600
Subject: [PATCH 241/416] Revise docstrings

---
 stumpy/core.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 90abbcb51..86517ecac 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2575,17 +2575,17 @@ def _merge_topk_PI(PA, PB, IA, IB):
     ----------
     PA : numpy.ndarray
         A (top-k) matrix profile, with ndim of 2, where values in each row are
-        sorted in ascending order. Also, it needs to be the same shape as PB.
+        sorted in ascending order.
 
     PB : numpy.ndarray
         A (top-k) matrix profile, with ndim of 2, where values in each row are
-        sorted in ascending order. Also, it needs to be the same shape as PA.
+        sorted in ascending order. `PB` must have the same shape as `PA`.
 
     IA : numpy.ndarray
-        A (top-k) matrix profile indices, corresponding to PA
+        A (top-k) matrix profile indices corresponding to PA
 
     IB : numpy.ndarray
-        A (top-k) matrix profile indices, corresponding to PB
+        A (top-k) matrix profile indices corresponding to PB
 
     Returns
     -------
@@ -2620,17 +2620,18 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     ----------
     ρA : numpy.ndarray
         A (top-k) pearson profile, with ndim of 2, where values in each row are
-        sorted in ascending order. Also, it needs to be the same shape as ρB.
+        sorted in ascending order.
 
     ρB : numpy.ndarray
         A (top-k) pearson profile, with ndim of 2, where values in each row are
-        sorted in ascending order. Also, it needs to be the same shape as ρA.
+        sorted in ascending order. Also, it needs to be the same shape as ρA. `ρB`
+        must have the same shape as `ρA`.
 
     IA : numpy.ndarray
-        A (top-k) matrix profile indices, corresponding to ρA
+        A (top-k) matrix profile indices corresponding to ρA
 
     IB : numpy.ndarray
-        A (top-k) matrix profile indices, corresponding to ρB
+        A (top-k) matrix profile indices corresponding to ρB
 
     Returns
     -------

From 07b83ab1c8df656d70d3a6ceb83cbcf7964f618b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 18:58:09 -0600
Subject: [PATCH 242/416] Revise docstrings

---
 stumpy/core.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 86517ecac..951b13721 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2624,8 +2624,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
 
     ρB : numpy.ndarray
         A (top-k) pearson profile, with ndim of 2, where values in each row are
-        sorted in ascending order. Also, it needs to be the same shape as ρA. `ρB`
-        must have the same shape as `ρA`.
+        sorted in ascending order. `ρB` must have the same shape as `ρA`.
 
     IA : numpy.ndarray
         A (top-k) matrix profile indices corresponding to ρA

From 5be2cf662406901569357b038215581cf19bcbd8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 19:15:29 -0600
Subject: [PATCH 243/416] Optimize function

---
 stumpy/core.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 951b13721..c95e869f7 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2595,15 +2595,28 @@ def _merge_topk_PI(PA, PB, IA, IB):
         start = 0
         stop = np.searchsorted(PA[i], PB[i, -1], side="right")
 
+        if stop == 0:
+            # means PB[i, -1] < PA[i, 0], i.e. the maximum value in PB[i] is less
+            # than smallest value in PA[i]. So, we should replace PA[i] with PB[i].
+            PA[i] = PB[i]
+            IA[i] = IB[i]
+            continue
+
         for j in range(PB.shape[1]):
-            if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
+            if PB[i, j] >= PA[i, -1]:
+                # PB[i] is sorted ascaendingly.
+                # Hence: PB[i, j+1] >= PB[i, j] >= PA[i, -1]
+                break
+
+            # PB[i, j] is less than PA[i, -1], the maximum value in PA[i]. so,
+            # we MUST update PA[i].
+            idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
 
-                _shift_insert_at_index(PA[i], idx, PB[i, j])
-                _shift_insert_at_index(IA[i], idx, IB[i, j])
+            _shift_insert_at_index(PA[i], idx, PB[i, j])
+            _shift_insert_at_index(IA[i], idx, IB[i, j])
 
-                start = idx
-                stop += 1  # because of shifting elements to the right by one
+            start = idx
+            stop += 1  # because of shifting elements to the right by one
 
 
 @njit(parallel=True)

From 4896fe8dcdc4a6cb77226d000b7c87b0ce96e1c7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 19:32:18 -0600
Subject: [PATCH 244/416] Optimize function

---
 stumpy/core.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index c95e869f7..c643f668c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2653,16 +2653,29 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
         start = np.searchsorted(ρA[i], ρB[i, 0], side="left")
         stop = ρB.shape[1]
 
+        if start == ρB.shape[1]:
+            # means ρB[i, 0] > ρA[i, -1], i.e. the minimum value in ρB[i] is greater
+            # than greatest value in ρA[i]. So, we should replace ρA[i] with ρB[i].
+            ρA[i] = ρB[i]
+            IA[i] = IB[i]
+            continue
+
         for j in range(ρB.shape[1] - 1, -1, -1):
-            if ρB[i, j] > ρA[i, 0]:
-                idx = np.searchsorted(ρA[i, start:stop], ρB[i, j], side="left") + start
+            if ρB[i, j] <= ρA[i, 0]:
+                # ρB[i] is sorted ascaendingly.
+                # Hence, next iteration: ρB[i, j-1] <= ρB[i, j] <= ρA[i, 0]
+                break
+
+            # ρB[i, j] is greater than ρA[i, 0], the minimum value in ρA[i]. so,
+            # we MUST update ρA[i] to make sure we are keeping top-k largest values.
+            idx = np.searchsorted(ρA[i, start:stop], ρB[i, j], side="left") + start
 
-                _shift_insert_at_index(ρA[i], idx, ρB[i, j], shift="left")
-                _shift_insert_at_index(IA[i], idx, IB[i, j], shift="left")
+            _shift_insert_at_index(ρA[i], idx, ρB[i, j], shift="left")
+            _shift_insert_at_index(IA[i], idx, IB[i, j], shift="left")
 
-                stop = idx  # because of shifting elements to the left by one
-                if start > 0:
-                    start -= 1
+            stop = idx  # because of shifting elements to the left by one
+            if start > 0:
+                start -= 1
 
 
 @njit

From d9a0a20c9d65d223f3b95de15c54703133146f8b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 19:34:53 -0600
Subject: [PATCH 245/416] Rename variable to improve readability

---
 stumpy/scrump.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 3c3c2916e..1532e8056 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -114,11 +114,11 @@ def _compute_PI(
         if excl_zone is not None:
             core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf)
 
-        nn = np.argmin(squared_distance_profile)
+        nn_idx = np.argmin(squared_distance_profile)
         core._shift_insert_at_index(
-            P_squared[thread_idx, i], 0, squared_distance_profile[nn]
+            P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
         )
-        core._shift_insert_at_index(I[thread_idx, i], 0, nn)
+        core._shift_insert_at_index(I[thread_idx, i], 0, nn_idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1

From eabe0fbff4819b324dba25da53268a53f1e1ea71 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 24 Jun 2022 19:41:02 -0600
Subject: [PATCH 246/416] Revise comments

---
 stumpy/scrump.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 1532e8056..9d1f69d3e 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -130,8 +130,9 @@ def _compute_PI(
             m * M_T[j] * μ_Q[i]
         )
         QT_j_prime = QT_j
-        # Update Top-k of BOTH subsequences at i+g and j+g (i.e. right neighbor
-        # of i, j), by using the distance between `S_(i+g)` and `S_(j+g)`
+        # Update top-k for both subsequences `S[i+g] = T[i+g:i+g+m]`` and
+        # `S[j+g] = T[j+g:j+g+m]` (i.e., the right neighbors of `T[i : i+m]` and
+        # `T[j:j+m]`) by using the distance between `S[i+g]` and `S[j+g]`
         for g in range(1, min(s, l - max(i, j))):
             QT_j = (
                 QT_j
@@ -165,8 +166,9 @@ def _compute_PI(
                 core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
 
         QT_j = QT_j_prime
-        # Update Top-k of BOTH subsequences at i-g and j-g (i.e. left neighbor
-        # of i, j), by using the distance between `S_(i-g)` and `S_(j-g)`
+        # Update top-k for both subsequences `S[i-g] = T[i-g:i-g+m]` and
+        # `S[j-g] = T[j-g:j-g+m]` (i.e., the left neighbors of `T[i : i+m]` and
+        # `T[j:j+m]`) by using the distance between `S[i-g]` and `S[j-g]`
         for g in range(1, min(s, i + 1, j + 1)):
             QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
             D_squared = core._calculate_squared_distance(
@@ -195,7 +197,8 @@ def _compute_PI(
                 )
                 core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
 
-        # self-join only
+        # In the case of a self-join, the calculated distances can also be used
+        # to refine the top-k for all non-trivial subsequences
         if excl_zone is not None:
             # Note that the squared distance, `squared_distance_profile[j]`,
             # between subsequences `S_i = T[i : i + m]` and `S_j = T[j : j + m]`

From e298fd36670438726f401c06fbb92f0b89b7ebaa Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 09:39:35 -0600
Subject: [PATCH 247/416] Improve  comments and docstrings

---
 stumpy/core.py | 64 ++++++++++++++++++++++++++------------------------
 tests/naive.py | 31 ++++++++++++++----------
 2 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index c643f668c..92d4cbde3 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2596,24 +2596,25 @@ def _merge_topk_PI(PA, PB, IA, IB):
         stop = np.searchsorted(PA[i], PB[i, -1], side="right")
 
         if stop == 0:
-            # means PB[i, -1] < PA[i, 0], i.e. the maximum value in PB[i] is less
-            # than smallest value in PA[i]. So, we should replace PA[i] with PB[i].
+            # means `PB[i, -1] < PA[i, 0]`, i.e. the maximum value in `PB[i]` is
+            # less than smallest value in `PA[i]`. So, we should replace `PA[i]`
+            # with `PB[i]` so that we have the top-k smallest.
             PA[i] = PB[i]
             IA[i] = IB[i]
             continue
 
         for j in range(PB.shape[1]):
             if PB[i, j] >= PA[i, -1]:
-                # PB[i] is sorted ascaendingly.
-                # Hence: PB[i, j+1] >= PB[i, j] >= PA[i, -1]
+                # `PB[i]` is sorted ascaendingly.
+                # Hence, in next iteration: `PB[i, j+1] >= PB[i, j] >= PA[i, -1]`
                 break
 
-            # PB[i, j] is less than PA[i, -1], the maximum value in PA[i]. so,
-            # we MUST update PA[i].
+            # `PB[i, j]` is less than `PA[i, -1]`, the maximum value in `PA[i]`.
+            # so, we must update `PA[i]` to have the top-k smallest values.
             idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
 
-            _shift_insert_at_index(PA[i], idx, PB[i, j])
-            _shift_insert_at_index(IA[i], idx, IB[i, j])
+            _shift_insert_at_index(PA[i], idx, PB[i, j], shift="right")
+            _shift_insert_at_index(IA[i], idx, IB[i, j], shift="right")
 
             start = idx
             stop += 1  # because of shifting elements to the right by one
@@ -2622,9 +2623,13 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit(parallel=True)
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
-    Merge two top-k pearson profiles ρA and ρB, and update ρA (in place). In the
-    merged array (from right to left): the priority is with ρA (from right to left),
-    and then with ρB(from right to left) Also, update IA accordingly.
+    Merge two top-k pearson profiles ρA and ρB, and update ρA (in place). The priority
+    is with ρA (from right to left) and then ρB (from right to left).
+
+    Example:
+    note: the prime symbol below is to distinguish two elements with same value
+    ρA = [0, 0', 1], and ρB = [0, 1, 1'].
+    merging outcome: [1_B, 1'_B, 1_A]
 
     Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
     keeps `top-k` largest values.
@@ -2654,42 +2659,39 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
         stop = ρB.shape[1]
 
         if start == ρB.shape[1]:
-            # means ρB[i, 0] > ρA[i, -1], i.e. the minimum value in ρB[i] is greater
-            # than greatest value in ρA[i]. So, we should replace ρA[i] with ρB[i].
+            # means `ρB[i, 0] > ρA[i, -1]`, i.e. the minimum value in `ρB[i]` is
+            # greater than greatest value in `ρA[i]`. So, we should replace `ρA[i]`
+            # with `ρB[i]` so that we have top-k largest values
             ρA[i] = ρB[i]
             IA[i] = IB[i]
             continue
 
         for j in range(ρB.shape[1] - 1, -1, -1):
             if ρB[i, j] <= ρA[i, 0]:
-                # ρB[i] is sorted ascaendingly.
-                # Hence, next iteration: ρB[i, j-1] <= ρB[i, j] <= ρA[i, 0]
+                # `ρB[i]` is sorted ascaendingly.
+                # Hence, in the next iteration: `ρB[i, j-1] <= ρB[i, j] <= ρA[i, 0]`
                 break
 
-            # ρB[i, j] is greater than ρA[i, 0], the minimum value in ρA[i]. so,
-            # we MUST update ρA[i] to make sure we are keeping top-k largest values.
+            # `ρB[i, j]` is greater than `ρA[i, 0]`, the minimum value in `ρA[i]`.
+            # so, we must update `ρA[i]` to make sure we have top-k largest values.
             idx = np.searchsorted(ρA[i, start:stop], ρB[i, j], side="left") + start
 
             _shift_insert_at_index(ρA[i], idx, ρB[i, j], shift="left")
             _shift_insert_at_index(IA[i], idx, IB[i, j], shift="left")
 
-            stop = idx  # because of shifting elements to the left by one
+            stop = idx
             if start > 0:
-                start -= 1
+                start -= 1  # because of shifting elements to the left by one
 
 
 @njit
 def _shift_insert_at_index(a, idx, v, shift="right"):
     """
     If `shift=right`, all elements in `a[idx:]` are shifted to the right by one element
-    and the last element is discarded. If `shift=left` or any other string value,
-    all elements in `a[:idx]` are shifted to the left by one element and the first
-    element is discarded. In both cases, the length of `a` remains unchanged.
-
-    Note
-    ----
-    No check is performed to ensure the value of parameter `shift` is 1 or -1.
-    It is user's responsibility to provide a valid value for this parameter.
+    and the last element is discarded. If `shift=left` (or any string value other
+    than "right") all elements in `a[:idx]` are shifted to the left by one element
+    and the first element is discarded. In both cases, the length of `a` remains
+    unchanged.
 
     Parameters
     ----------
@@ -2698,9 +2700,9 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
 
     idx: int
         The index at which the value `v` should be inserted. This can be any
-        integer number from `0` to `len(a)`. When `idx=0` and `shift` is set to
-        "right", or when `idx=len(a)` and `shift` is set to any other string value,
-        then no change will occur on the input array `a`.
+        integer number from `0` to `len(a)`. When `idx=0` and `shift="right"`,
+        OR when `idx=len(a)` and `shift != "right"`, then no change will occur on
+        the input array `a`.
 
     v: float
         The value that should be inserted into array `a` at index `idx`
@@ -2723,5 +2725,5 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
     else:
         if 0 < idx <= len(a):
             a[: idx - 1] = a[1:idx]
-            # elements were shifted to left, and thus the insertion becomes `idx-1`
+            # elements were shifted to left, thus the insertion index becomes `idx-1`
             a[idx - 1] = v
diff --git a/tests/naive.py b/tests/naive.py
index 03de99bf5..47fe32c21 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1793,20 +1793,26 @@ def merge_topk_PI(PA, PB, IA, IB):
 
 
 def merge_topk_ρI(ρA, ρB, IA, IB):
-    # this is to merge two pearson profiles, each is a 2D array where each row
-    # contains an ascendingly-sorted values.
-    # Note that we are interested in keeping the top-k largest values.
-    # In the merged array (from right to left): the priority is with ρA (from right
-    # to left), and then with ρB(from right to left)
+    # this is to merge two pearson profiles `ρA` and `ρB`, where each is a 2D array
+    # and each row is sorted ascendingly. Smaller distance corresponds to larger
+    # pearson values. Therefore, we want to keep top-k largest values in merging
+    # row `ρA[i]` and `ρB[i]`. The priority is with `ρA (from right to left)` and
+    # then `ρB (from right to left)`.
 
     # Example:
-    # ρA = [0(I), 0(II), 1], and ρB = [0', 1'(I), 1'(II)].
-    # the prime symbol is to indicate that the values are from ρB
-    # and the greek numbers are to differntiate two same values in one array
-
-    # so, the outcome of merging process should be:
-    # [0', 0(I), 0(II), 1'(I), 1'(II), 1]
-
+    # note: the prime symbol below is to distinguish two elements with same value
+    # ρA = [0, 0', 1], and ρB = [0, 1, 1'].
+    # merging outcome: [1_B, 1'_B, 1_A]
+
+    # Naive Implementation:
+    # keeping top-k largest with the aforementioned priority rule is the same as
+    # sorting ascendingly while prioritizing `ρB` (from left to right) over `ρA`
+    # (from left to right), and then keep the second half of merged array.
+
+    # In our example, it would be like this:
+    # merging `ρB` and `ρA` (prioritizing smaller values in `ρB`):
+    # [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second half
+    # of this array (and discard the first half)
     profile = np.column_stack((ρB, ρA))
     indices = np.column_stack((IB, IA))
 
@@ -1814,5 +1820,6 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     profile[:, :] = np.take_along_axis(profile, idx, axis=1)
     indices[:, :] = np.take_along_axis(indices, idx, axis=1)
 
+    # keep the last k elements (top-k largest values)
     ρA[:, :] = profile[:, ρA.shape[1] :]
     IA[:, :] = indices[:, ρA.shape[1] :]

From 9afba6c3d1460fea3b906623870678ee5364dc70 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 10:15:58 -0600
Subject: [PATCH 248/416] Correct naive implementation

---
 tests/naive.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 47fe32c21..802d95a73 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -720,7 +720,7 @@ def __init__(self, T, m, excl_zone=None, p=2.0):
         self._T_isfinite = np.isfinite(self._T)
         self._m = m
         self._p = p
-        if excl_zone is None:
+        if excl_zone is None:  # see stumpi, and make similar changes here
             self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
 
         self._l = self._T.shape[0] - m + 1
@@ -792,11 +792,13 @@ def __init__(self, T, m, excl_zone=None):
         self._T = self._T.copy()
         self._T_isfinite = np.isfinite(self._T)
         self._m = m
-        if excl_zone is None:
+
+        self._excl_zone = excl_zone
+        if self._excl_zone is None:
             self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
 
         self._l = self._T.shape[0] - m + 1
-        mp = stump(T, m)
+        mp = stump(T, m, exclusion_zone=self._excl_zone)
         self.P_ = mp[:, 0]
         self.I_ = mp[:, 1].astype(np.int64)
         self.left_P_ = np.full(self.P_.shape, np.inf)

From 05945a026de823314840bd86cd1f87da5a65e312 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 11:20:51 -0600
Subject: [PATCH 249/416] Enhance naive function to support top  matrix profile

---
 tests/naive.py | 54 ++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 802d95a73..ab8d251cb 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -787,25 +787,28 @@ def update(self, t):
 
 
 class stumpi_egress(object):
-    def __init__(self, T, m, excl_zone=None):
+    def __init__(self, T, m, excl_zone=None, k=1):
         self._T = np.asarray(T)
         self._T = self._T.copy()
         self._T_isfinite = np.isfinite(self._T)
         self._m = m
+        self._k = k
 
         self._excl_zone = excl_zone
         if self._excl_zone is None:
             self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
 
         self._l = self._T.shape[0] - m + 1
-        mp = stump(T, m, exclusion_zone=self._excl_zone)
-        self.P_ = mp[:, 0]
-        self.I_ = mp[:, 1].astype(np.int64)
-        self.left_P_ = np.full(self.P_.shape, np.inf)
-        self.left_I_ = mp[:, 2].astype(np.int64)
-        for i, j in enumerate(self.left_I_):
-            if j >= 0:
-                D = core.mass(self._T[i : i + self._m], self._T[j : j + self._m])
+        mp = stump(T, m, exclusion_zone=self._excl_zone, k=k)
+        self.P_ = mp[:, :k].astype(np.float64)
+        self.I_ = mp[:, k : 2 * k].astype(np.int64)
+
+        self.left_I_ = mp[:, 2 * k].astype(np.int64)
+        self.left_P_ = np.full_like(self.left_I_, np.inf, dtype=np.float64)
+
+        for i, nn_i in enumerate(self.left_I_):
+            if nn_i >= 0:
+                D = core.mass(self._T[i : i + self._m], self._T[nn_i : nn_i + self._m])
                 self.left_P_[i] = D[0]
 
         self._n_appended = 0
@@ -821,8 +824,8 @@ def update(self, t):
             self._T[-1] = 0
         self._n_appended += 1
 
-        self.P_[:] = np.roll(self.P_, -1)
-        self.I_[:] = np.roll(self.I_, -1)
+        self.P_[:, :] = np.roll(self.P_, -1, axis=0)
+        self.I_[:, :] = np.roll(self.I_, -1, axis=0)
         self.left_P_[:] = np.roll(self.left_P_, -1)
         self.left_I_[:] = np.roll(self.left_I_, -1)
 
@@ -835,22 +838,25 @@ def update(self, t):
             D[:] = np.inf
 
         apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
+        # update top-k matrix profile using newly calculated distance profile `D`
         for j in range(D.shape[0]):
-            if D[j] < self.P_[j]:
-                self.I_[j] = D.shape[0] - 1 + self._n_appended
-                self.P_[j] = D[j]
-
-        I_last = np.argmin(D)
+            if D[j] < self.P_[j, -1]:
+                pos = np.searchsorted(self.P_[j], D[j], side="right")
+                self.P_[j] = np.insert(self.P_[j], pos, D[j])[:-1]
+                self.I_[j] = np.insert(
+                    self.I_[j], pos, D.shape[0] - 1 + self._n_appended
+                )
 
-        if np.isinf(D[I_last]):
-            self.I_[-1] = -1
-            self.P_[-1] = np.inf
-        else:
-            self.I_[-1] = I_last + self._n_appended
-            self.P_[-1] = D[I_last]
+        # update top-k for the last, newly-updated index
+        I_last_topk = np.argsort(D)[: self._k]
+        self.P_[-1] = D[I_last_topk]
+        self.I_[-1] = I_last_topk + self._n_appended
+        self.I_[-1][self.P_[-1] == np.inf] = -1
 
-        self.left_I_[-1] = I_last + self._n_appended
-        self.left_P_[-1] = D[I_last]
+        # for  last indx, the left matrix profile value is self.P_[-1, 0]
+        # and the same goes for left matrix profile index
+        self.left_P_[-1] = self.P_[-1, 0]
+        self.left_I_[-1] = self.I_[-1, 0]
 
 
 def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m):

From a72aeb76a5670a218955c472693b0b7373fc6251 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 12:19:51 -0600
Subject: [PATCH 250/416] Enhace performant function to support topk matrix
 profile

---
 stumpy/stumpi.py | 144 +++++++++++++++++++++++++++++------------------
 1 file changed, 88 insertions(+), 56 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 1a8bbf72b..63c6ff42e 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -36,19 +36,30 @@ class stumpi:
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Attributes
     ----------
     P_ : numpy.ndarray
-        The updated matrix profile for `T`
+        The updated (top-k) matrix profile for `T`. When `k=1` (default), the first
+        (and only) column in this 2D array consists of the matrix profile. When
+        `k > 1`, the output has exactly `k` columns consist of the top-k matrix
+        profile.
 
     I_ : numpy.ndarray
-        The updated matrix profile indices for `T`
+        The updated (top-k) matrix profile indices for `T`. When `k=1` (default),
+        the first (and only) column in this 2D array consists of the matrix profile
+        indices. When `k > 1`, the output has exactly `k` columns consist of the
+        top-k matrix profile indices.
 
     left_P_ : numpy.ndarray
-        The updated left matrix profile for `T`
+        The updated left (top-1) matrix profile for `T`
 
     left_I_ : numpy.ndarray
-        The updated left matrix profile indices for `T`
+        The updated left (top-1) matrix profile indices for `T`
 
     T_ : numpy.ndarray
         The updated time series or sequence for which the matrix profile and matrix
@@ -81,7 +92,7 @@ class stumpi:
     array([-1,  0,  1,  2])
     """
 
-    def __init__(self, T, m, egress=True, normalize=True, p=2.0):
+    def __init__(self, T, m, egress=True, normalize=True, p=2.0, k=1):
         """
         Initialize the `stumpi` object
 
@@ -106,27 +117,34 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0):
         p : float, default 2.0
             The p-norm to apply for computing the Minkowski distance. This parameter is
             ignored when `normalize == True`.
+
+        k : int, default 1
+            The number of top `k` smallest distances used to construct the matrix profile.
+            Note that this will increase the total computational time and memory usage
+            when k > 1.
         """
         self._T = core._preprocess(T)
         core.check_window_size(m, max_size=self._T.shape[-1])
         self._m = m
+        self._k = k
+
         self._n = self._T.shape[0]
         self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
         self._T_isfinite = np.isfinite(self._T)
         self._egress = egress
 
-        mp = stump(self._T, self._m)
-        self._P = mp[:, 0].astype(np.float64)
-        self._I = mp[:, 1].astype(np.int64)
-        self._left_I = mp[:, 2].astype(np.int64)
-        self._left_P = np.empty(self._P.shape, dtype=np.float64)
-        self._left_P[:] = np.inf
+        mp = stump(self._T, self._m, k=self._k)
+        self._P = mp[:, :k].astype(np.float64)
+        self._I = mp[:, k : 2 * k].astype(np.int64)
+
+        self._left_I = mp[:, 2 * k].astype(np.int64)
+        self._left_P = np.full_like(self._left_I, np.inf, dtype=np.float64)
 
         self._T, self._M_T, self._Σ_T = core.preprocess(self._T, self._m)
         # Retrieve the left matrix profile values
-        for i, j in enumerate(self._left_I):
-            if j >= 0:
-                D = core.mass(self._T[i : i + self._m], self._T[j : j + self._m])
+        for i, nn_i in enumerate(self._left_I):
+            if nn_i >= 0:
+                D = core.mass(self._T[i : i + self._m], self._T[nn_i : nn_i + self._m])
                 self._left_P[i] = D[0]
 
         Q = self._T[-m:]
@@ -138,7 +156,7 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0):
     def update(self, t):
         """
         Append a single new data point, `t`, to the existing time series `T` and update
-        the matrix profile and matrix profile indices.
+        the (top-k) matrix profile and matrix profile indices.
 
         Parameters
         ----------
@@ -161,8 +179,8 @@ def update(self, t):
 
     def _update_egress(self, t):
         """
-        Ingress a new data point, egress the oldest data point, and update the matrix
-        profile and matrix profile indices
+        Ingress a new data point, egress the oldest data point, and update the (top-k)
+        matrix profile and matrix profile indices
         """
         self._n = self._T.shape[0]
         l = self._n - self._m + 1 - 1  # Subtract 1 due to egress
@@ -174,8 +192,8 @@ def _update_egress(self, t):
         t_drop = self._T[l - 1]
         self._T_isfinite[:-1] = self._T_isfinite[1:]
 
-        self._I[:-1] = self._I[1:]
-        self._P[:-1] = self._P[1:]
+        self._I[:-1, :] = self._I[1:, :]
+        self._P[:-1, :] = self._P[1:, :]
         self._left_I[:-1] = self._left_I[1:]
         self._left_P[:-1] = self._left_P[1:]
 
@@ -211,28 +229,34 @@ def _update_egress(self, t):
 
         core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
 
-        update_idx = np.argwhere(D < self._P).flatten()
-        self._I[update_idx] = D.shape[0] + self._n_appended - 1  # D.shape[0] is base-1
-        self._P[update_idx] = D[update_idx]
-
-        I_last = np.argmin(D)
-
-        if np.isinf(D[I_last]):
-            self._I[-1] = -1
-            self._P[-1] = np.inf
-        else:
-            self._I[-1] = I_last + self._n_appended
-            self._P[-1] = D[I_last]
-
-        self._left_I[-1] = I_last + self._n_appended
-        self._left_P[-1] = D[I_last]
+        update_idx = np.argwhere(D < self._P[:, -1]).flatten()
+        for i in update_idx:
+            pos = np.searchsorted(self._P[i], D[i], side="right")
+            core._shift_insert_at_index(self._P[i], pos, D[i])
+            core._shift_insert_at_index(
+                self._I[i], pos, D.shape[0] + self._n_appended - 1
+            )
+            # D.shape[0] is base-1
+
+        # O(Nlog(K)) time complexity
+        self._P[-1] = np.inf
+        self._I[-1] = -1
+        for i, d in enumerate(D):
+            if d < self._P[-1, -1]:  # mean last index, maximum value (k-th value)
+                pos = np.searchsorted(self._P[-1], d, side="right")
+                core._shift_insert_at_index(self._P[-1], pos, d)
+                core._shift_insert_at_index(self._I[-1], pos, i + self._n_appended)
+
+        # for last index, the left matrix profile is basically `self._P[-1, 0]`
+        self._left_P[-1] = self._P[-1, 0]
+        self._left_I[-1] = self._I[-1, 0]
 
         self._QT[:] = self._QT_new
 
     def _update(self, t):
         """
-        Ingress a new data point and update the matrix profile and matrix profile
-        indices without egressing the oldest data point
+        Ingress a new data point and update the (top-k) matrix profile and matrix
+        profile indices without egressing the oldest data point
         """
         n = self._T.shape[0]
         l = n - self._m + 1
@@ -269,25 +293,33 @@ def _update(self, t):
 
         core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
 
-        update_idx = np.argwhere(D[:l] < self._P[:l]).flatten()
-        self._I[update_idx] = l
-        self._P[update_idx] = D[update_idx]
+        update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
+        for i in update_idx:
+            pos = np.searchsorted(self._P[i], D[i], side="right")
+            core._shift_insert_at_index(self._P[i], pos, D[i])
+            core._shift_insert_at_index(self._I[i], pos, l)
 
-        I_last = np.argmin(D)
-        if np.isinf(D[I_last]):
-            I_new = np.append(self._I, -1)
-            P_new = np.append(self._P, np.inf)
-        else:
-            I_new = np.append(self._I, I_last)
-            P_new = np.append(self._P, D[I_last])
-        left_I_new = np.append(self._left_I, I_last)
-        left_P_new = np.append(self._left_P, D[I_last])
+        # Calculating top-k and left matrix profile for new subsequence whose
+        # distance profie is D
+
+        # O(Nlog(K)) time complexity for obtaining top-k
+        P_new = np.full((1, self._k), np.inf, dtype=np.float64)
+        I_new = np.full((1, self._k), -1, dtype=np.int64)
+
+        for i, d in enumerate(D):
+            if d < P_new[-1]:  # maximum value in sorted array P_new
+                pos = np.searchsorted(P_new, d, side="right")
+                core._shift_insert_at_index(P_new, pos, d)
+                core._shift_insert_at_index(I_new, pos, i)
+
+        left_I_new = P_new[0, 0]
+        left_P_new = I_new[0, 0]
 
         self._T = T_new
-        self._P = P_new
-        self._I = I_new
-        self._left_I = left_I_new
-        self._left_P = left_P_new
+        self._P = np.append(self._P, P_new, axis=0)
+        self._I = np.append(self._I, I_new, axis=0)
+        self._left_P = np.append(self._left_P, left_P_new)
+        self._left_I = np.append(self._left_I, left_I_new)
         self._QT = QT_new
         self._M_T = M_T_new
         self._Σ_T = Σ_T_new
@@ -295,28 +327,28 @@ def _update(self, t):
     @property
     def P_(self):
         """
-        Get the matrix profile
+        Get the (top-k) matrix profile
         """
         return self._P.astype(np.float64)
 
     @property
     def I_(self):
         """
-        Get the matrix profile indices
+        Get the (top-k) matrix profile indices
         """
         return self._I.astype(np.int64)
 
     @property
     def left_P_(self):
         """
-        Get the left matrix profile
+        Get the (top-1) left matrix profile
         """
         return self._left_P.astype(np.float64)
 
     @property
     def left_I_(self):
         """
-        Get the left matrix profile indices
+        Get the (top-1) sleft matrix profile indices
         """
         return self._left_I.astype(np.int64)
 

From c7ffdac6b94a5615c8c4bd2a5c9818af2d6af6c1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 12:21:13 -0600
Subject: [PATCH 251/416] Update existing test functions

---
 tests/test_stumpi.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index 0fa2c3066..e67446d0a 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -33,8 +33,8 @@ def test_stumpi_self_join():
     comp_left_I = stream.left_I_
 
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0]
-    ref_I = ref_mp[:, 1]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)
+    ref_I = ref_mp[:, 1].reshape(-1, 1)
     ref_left_P = np.empty(ref_P.shape)
     ref_left_P[:] = np.inf
     ref_left_I = ref_mp[:, 2]
@@ -211,8 +211,8 @@ def test_stumpi_init_nan_inf_self_join(substitute, substitution_locations):
 
         stream.T_[substitution_location] = substitute
         ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)
+        ref_I = ref_mp[:, 1].reshape(-1, 1)
 
         naive.replace_inf(ref_P)
         naive.replace_inf(comp_P)
@@ -386,8 +386,8 @@ def test_stumpi_stream_nan_inf_self_join(substitute, substitution_locations):
 
         stream.T_[30:][substitution_location] = substitute
         ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-        ref_P = ref_mp[:, 0]
-        ref_I = ref_mp[:, 1]
+        ref_P = ref_mp[:, 0].reshape(-1, 1)
+        ref_I = ref_mp[:, 1].reshape(-1, 1)
 
         naive.replace_inf(ref_P)
         naive.replace_inf(comp_P)
@@ -547,7 +547,7 @@ def test_stumpi_constant_subsequence_self_join():
     # comp_I = stream.I_
 
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)
     # ref_I = ref_mp[:, 1]
 
     naive.replace_inf(ref_P)
@@ -702,7 +702,7 @@ def test_stumpi_identical_subsequence_self_join():
     # comp_I = stream.I_
 
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0]
+    ref_P = ref_mp[:, 0].reshape(-1, 1)
     # ref_I = ref_mp[:, 1]
 
     naive.replace_inf(ref_P)

From 34941c2e6eb602d1f1e02a87056870abcd36e8ca Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 12:23:23 -0600
Subject: [PATCH 252/416] Correct format

---
 stumpy/stumpi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 63c6ff42e..c147f7a1c 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -119,9 +119,9 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0, k=1):
             ignored when `normalize == True`.
 
         k : int, default 1
-            The number of top `k` smallest distances used to construct the matrix profile.
-            Note that this will increase the total computational time and memory usage
-            when k > 1.
+            The number of top `k` smallest distances used to construct the matrix
+            profile. Note that this will increase the total computational time and
+            memory usage when `k > 1`.
         """
         self._T = core._preprocess(T)
         core.check_window_size(m, max_size=self._T.shape[-1])

From 8cbe3081fea873cce2caea20bba62662e4183630 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 12:32:54 -0600
Subject: [PATCH 253/416] Fix shape of array

---
 stumpy/stumpi.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index c147f7a1c..4482c23e4 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -303,8 +303,8 @@ def _update(self, t):
         # distance profie is D
 
         # O(Nlog(K)) time complexity for obtaining top-k
-        P_new = np.full((1, self._k), np.inf, dtype=np.float64)
-        I_new = np.full((1, self._k), -1, dtype=np.int64)
+        P_new = np.full(self._k, np.inf, dtype=np.float64)
+        I_new = np.full(self._k, -1, dtype=np.int64)
 
         for i, d in enumerate(D):
             if d < P_new[-1]:  # maximum value in sorted array P_new
@@ -312,12 +312,12 @@ def _update(self, t):
                 core._shift_insert_at_index(P_new, pos, d)
                 core._shift_insert_at_index(I_new, pos, i)
 
-        left_I_new = P_new[0, 0]
-        left_P_new = I_new[0, 0]
+        left_I_new = P_new[0]
+        left_P_new = I_new[0]
 
         self._T = T_new
-        self._P = np.append(self._P, P_new, axis=0)
-        self._I = np.append(self._I, I_new, axis=0)
+        self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
+        self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
         self._left_P = np.append(self._left_P, left_P_new)
         self._left_I = np.append(self._left_I, left_I_new)
         self._QT = QT_new

From 2b5038dcc6fc3099db6ba1d4f07b1029ad4e3967 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 12:35:43 -0600
Subject: [PATCH 254/416] Fix shape of array

---
 tests/test_stumpi.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index e67446d0a..b43a6e8d1 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -35,9 +35,8 @@ def test_stumpi_self_join():
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
     ref_P = ref_mp[:, 0].reshape(-1, 1)
     ref_I = ref_mp[:, 1].reshape(-1, 1)
-    ref_left_P = np.empty(ref_P.shape)
-    ref_left_P[:] = np.inf
     ref_left_I = ref_mp[:, 2]
+    ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64)
     for i, j in enumerate(ref_left_I):
         if j >= 0:
             D = core.mass(stream.T_[i : i + m], stream.T_[j : j + m])

From 9cc800a90e8057a1d8e7978a8ffdbad5ff09cd29 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 12:42:35 -0600
Subject: [PATCH 255/416] Add kind keyword for sorting

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index ab8d251cb..721e89aa2 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -848,7 +848,7 @@ def update(self, t):
                 )
 
         # update top-k for the last, newly-updated index
-        I_last_topk = np.argsort(D)[: self._k]
+        I_last_topk = np.argsort(D, kind="mergesort")[: self._k]
         self.P_[-1] = D[I_last_topk]
         self.I_[-1] = I_last_topk + self._n_appended
         self.I_[-1][self.P_[-1] == np.inf] = -1

From 5b0b3fefbbde49ad6c417819c29706cfb647fadf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 14:29:23 -0600
Subject: [PATCH 256/416] Fix bugs

---
 stumpy/stumpi.py     |  4 ++--
 tests/naive.py       |  2 +-
 tests/test_stumpi.py | 23 +++++++++++++----------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 4482c23e4..727c9c64b 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -312,8 +312,8 @@ def _update(self, t):
                 core._shift_insert_at_index(P_new, pos, d)
                 core._shift_insert_at_index(I_new, pos, i)
 
-        left_I_new = P_new[0]
-        left_P_new = I_new[0]
+        left_I_new = I_new[0]
+        left_P_new = P_new[0]
 
         self._T = T_new
         self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
diff --git a/tests/naive.py b/tests/naive.py
index 721e89aa2..5b404f50c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -845,7 +845,7 @@ def update(self, t):
                 self.P_[j] = np.insert(self.P_[j], pos, D[j])[:-1]
                 self.I_[j] = np.insert(
                     self.I_[j], pos, D.shape[0] - 1 + self._n_appended
-                )
+                )[:-1]
 
         # update top-k for the last, newly-updated index
         I_last_topk = np.argsort(D, kind="mergesort")[: self._k]
diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index b43a6e8d1..262d88495 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -35,7 +35,7 @@ def test_stumpi_self_join():
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
     ref_P = ref_mp[:, 0].reshape(-1, 1)
     ref_I = ref_mp[:, 1].reshape(-1, 1)
-    ref_left_I = ref_mp[:, 2]
+    ref_left_I = ref_mp[:, 2].astype(np.int64)
     ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64)
     for i, j in enumerate(ref_left_I):
         if j >= 0:
@@ -860,21 +860,24 @@ def test_stumpi_profile_index_match():
 
     T_stream = T_full[:warm_start].copy()
     stream = stumpi(T_stream, m, egress=True)
-    P = np.full(stream.P_.shape, np.inf)
-    left_P = np.full(stream.left_P_.shape, np.inf)
+    P = np.full_like(stream.P_, np.inf, dtype=np.float64)
+    left_P = np.full_like(stream.left_P_, np.inf, dtype=np.float64)
 
     n = 0
     for i in range(len(T_stream), len(T_full)):
         t = T_full[i]
         stream.update(t)
 
-        P[:] = np.inf
-        idx = np.argwhere(stream.I_ >= 0).flatten()
-        P[idx] = naive.distance(
-            naive.z_norm(T_full_subseq[idx + n + 1], axis=1),
-            naive.z_norm(T_full_subseq[stream.I_[idx]], axis=1),
-            axis=1,
-        )
+        P[:, :] = np.inf
+        mask = stream.I_ >= 0
+
+        for j in range(P.shape[1]):  # `j` as j-th nearest neighbor
+            IDX = np.flatnonzero(mask[:, j])
+            P[IDX, j] = naive.distance(
+                naive.z_norm(T_full_subseq[IDX + n + 1], axis=1),
+                naive.z_norm(T_full_subseq[stream.I_[IDX, j]], axis=1),
+                axis=1,
+            )
 
         left_P[:] = np.inf
         idx = np.argwhere(stream.left_I_ >= 0).flatten()

From 0335294ae9ee6d195ec3017a82cee6a66d59f699 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 21:49:48 -0600
Subject: [PATCH 257/416] Remove ineffective inner prange

---
 stumpy/stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 80923f33c..59be1e61d 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -493,7 +493,7 @@ def _stump(
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
     for i in prange(p_norm.shape[0]):
-        for j in prange(p_norm.shape[1]):
+        for j in range(p_norm.shape[1]):
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 

From 6c05e30794cddc1a7f6e3c3c4579071c152fed20 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 22:28:50 -0600
Subject: [PATCH 258/416] Temporarily added parameter k to avoid decorator
 failure

---
 stumpy/aampi.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/stumpy/aampi.py b/stumpy/aampi.py
index de7c24126..938b5b488 100644
--- a/stumpy/aampi.py
+++ b/stumpy/aampi.py
@@ -8,6 +8,7 @@
 
 
 class aampi:
+    # needs to be enhanced to support top-k matrix profile
     """
     Compute an incremental non-normalized (i.e., without z-normalization) matrix profile
     for streaming data
@@ -28,6 +29,11 @@ class aampi:
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Attributes
     ----------
     P_ : numpy.ndarray
@@ -62,7 +68,7 @@ class aampi:
     Note that we have extended this algorithm for AB-joins as well.
     """
 
-    def __init__(self, T, m, egress=True, p=2.0):
+    def __init__(self, T, m, egress=True, p=2.0, k=1):
         """
         Initialize the `stumpi` object
 
@@ -81,6 +87,12 @@ def __init__(self, T, m, egress=True, p=2.0):
 
         p : float, default 2.0
             The p-norm to apply for computing the Minkowski distance.
+
+
+        k : int, default 1
+            The number of top `k` smallest distances used to construct the matrix
+            profile. Note that this will increase the total computational time and
+            memory usage when k > 1.
         """
         self._T = core._preprocess(T)
         core.check_window_size(m, max_size=self._T.shape[-1])

From 23a54ba827af4913fbad9b91660c189c3f983ca2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 22:31:29 -0600
Subject: [PATCH 259/416] Improve comments

---
 stumpy/scrump.py | 4 ++--
 tests/naive.py   | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 9d1f69d3e..0dd4b25dd 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -197,8 +197,8 @@ def _compute_PI(
                 )
                 core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
 
-        # In the case of a self-join, the calculated distances can also be used
-        # to refine the top-k for all non-trivial subsequences
+        # In the case of a self-join, the calculated distance profile can also be
+        # used to refine the top-k for all non-trivial subsequences
         if excl_zone is not None:
             # Note that the squared distance, `squared_distance_profile[j]`,
             # between subsequences `S_i = T[i : i + m]` and `S_j = T[j : j + m]`
diff --git a/tests/naive.py b/tests/naive.py
index 5b404f50c..f4c39a172 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1457,7 +1457,8 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
                 P[j - g] = np.insert(P[j - g], pos, d)[:-1]
                 I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
 
-        # self-join only
+        # In the case of a self-join, the calculated distance profile can also be
+        # used to refine the top-k for all non-trivial subsequences
         if exclusion_zone is not None:
             for idx in np.flatnonzero(distance_profile < P[:, -1]):
                 pos = np.searchsorted(P[idx], distance_profile[idx], side="right")

From 5af6ec00b320cec630eb9a61341c851db93bb610 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 22:40:19 -0600
Subject: [PATCH 260/416] Improve comments

---
 tests/naive.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index f4c39a172..f257dc4a0 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1803,25 +1803,24 @@ def merge_topk_PI(PA, PB, IA, IB):
 
 def merge_topk_ρI(ρA, ρB, IA, IB):
     # this is to merge two pearson profiles `ρA` and `ρB`, where each is a 2D array
-    # and each row is sorted ascendingly. Smaller distance corresponds to larger
-    # pearson values. Therefore, we want to keep top-k largest values in merging
-    # row `ρA[i]` and `ρB[i]`. The priority is with `ρA (from right to left)` and
-    # then `ρB (from right to left)`.
+    # and each row is sorted ascendingly. we want to keep top-k largest values in
+    # merging row `ρA[i]` and `ρB[i]`.
 
-    # Example:
-    # note: the prime symbol below is to distinguish two elements with same value
+    # In case of ties between `ρA` and `ρB`, the priority is with `ρA`. In case
+    # of ties within `ρA, the priority is with an element with greater index.
+    # Example
+    # note: the prime symbol is to distinguish two elements with same value
     # ρA = [0, 0', 1], and ρB = [0, 1, 1'].
     # merging outcome: [1_B, 1'_B, 1_A]
 
     # Naive Implementation:
-    # keeping top-k largest with the aforementioned priority rule is the same as
-    # sorting ascendingly while prioritizing `ρB` (from left to right) over `ρA`
-    # (from left to right), and then keep the second half of merged array.
-
-    # In our example, it would be like this:
-    # merging `ρB` and `ρA` (prioritizing smaller values in `ρB`):
-    # [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second half
-    # of this array (and discard the first half)
+    # keeping top-k largest with the aforementioned priority rules is the same as
+    # `merge_topk_PI` but with swapping `ρA` and `ρB`
+
+    # For the same example:
+    # merging `ρB` and `ρA` ascendingly while choosing `ρB` over `ρA` in case of
+    # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second
+    # half of this array, and discard the first half.
     profile = np.column_stack((ρB, ρA))
     indices = np.column_stack((IB, IA))
 

From 26cec6e1a2ba8d11d562483e16421584cdc2bf5e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 22:48:51 -0600
Subject: [PATCH 261/416] Improve docstring

---
 stumpy/core.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 92d4cbde3..74df988b2 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2623,11 +2623,16 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit(parallel=True)
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
-    Merge two top-k pearson profiles ρA and ρB, and update ρA (in place). The priority
-    is with ρA (from right to left) and then ρB (from right to left).
+    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) by
+    keeping the top-k largest values in merging two `top-k` rows `ρA[i]` and `ρB[i]`,
+    each sorted ascendingly.
+
+    from right to left of the merged array: In case of ties between `ρA[i]` and
+    `ρB[i]`, the priority is with `ρA[i]`, and in case of ties within `ρA[i]`,
+    the priority is with element with greater index.
 
     Example:
-    note: the prime symbol below is to distinguish two elements with same value
+    note: the prime symbol is to distinguish two elements with same value
     ρA = [0, 0', 1], and ρB = [0, 1, 1'].
     merging outcome: [1_B, 1'_B, 1_A]
 

From b177c84eccd3d01ed72a7b1c80f1988725342afa Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 22:59:16 -0600
Subject: [PATCH 262/416] Add KNN test function for stumpi

---
 tests/test_stumpi.py | 172 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index 262d88495..2d00ab525 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -891,3 +891,175 @@ def test_stumpi_profile_index_match():
         npt.assert_almost_equal(stream.left_P_, left_P)
 
         n += 1
+
+
+def test_stumpi_self_join_KNN():
+    m = 3
+    zone = int(np.ceil(m / 4))
+
+    for k in range(2, 4):
+        seed = np.random.randint(100000)
+        np.random.seed(seed)
+
+        T = np.random.rand(30)
+        stream = stumpi(T, m, egress=False, k=k)
+        for i in range(34):
+            t = np.random.rand()
+            stream.update(t)
+
+        comp_P = stream.P_
+        comp_I = stream.I_
+        comp_left_P = stream.left_P_
+        comp_left_I = stream.left_I_
+
+        ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True, k=k)
+        ref_P = ref_mp[:, 0].reshape(-1, 1)
+        ref_I = ref_mp[:, 1].reshape(-1, 1)
+        ref_left_I = ref_mp[:, 2].astype(np.int64)
+        ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64)
+        for i, j in enumerate(ref_left_I):
+            if j >= 0:
+                D = core.mass(stream.T_[i : i + m], stream.T_[j : j + m])
+                ref_left_P[i] = D[0]
+
+        naive.replace_inf(ref_P)
+        naive.replace_inf(ref_left_P)
+        naive.replace_inf(comp_P)
+        naive.replace_inf(comp_left_P)
+
+        npt.assert_almost_equal(ref_P, comp_P)
+        npt.assert_almost_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_left_P, comp_left_P)
+        npt.assert_almost_equal(ref_left_I, comp_left_I)
+
+        np.random.seed(seed)
+        T = np.random.rand(30)
+        T = pd.Series(T)
+        stream = stumpi(T, m, egress=False, k=k)
+        for i in range(34):
+            t = np.random.rand()
+            stream.update(t)
+
+        comp_P = stream.P_
+        comp_I = stream.I_
+        comp_left_P = stream.left_P_
+        comp_left_I = stream.left_I_
+
+        naive.replace_inf(comp_P)
+        naive.replace_inf(comp_left_P)
+
+        npt.assert_almost_equal(ref_P, comp_P)
+        npt.assert_almost_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_left_P, comp_left_P)
+        npt.assert_almost_equal(ref_left_I, comp_left_I)
+
+
+def test_stumpi_self_join_egress_KNN():
+    m = 3
+
+    for k in range(2, 4):
+        seed = np.random.randint(100000)
+        np.random.seed(seed)
+        n = 30
+        T = np.random.rand(n)
+
+        ref_mp = naive.stumpi_egress(T, m, k=k)
+        ref_P = ref_mp.P_.copy()
+        ref_I = ref_mp.I_
+        ref_left_P = ref_mp.left_P_.copy()
+        ref_left_I = ref_mp.left_I_
+
+        stream = stumpi(T, m, egress=True, k=k)
+
+        comp_P = stream.P_.copy()
+        comp_I = stream.I_
+        comp_left_P = stream.left_P_.copy()
+        comp_left_I = stream.left_I_
+
+        naive.replace_inf(ref_P)
+        naive.replace_inf(ref_left_P)
+        naive.replace_inf(comp_P)
+        naive.replace_inf(comp_left_P)
+
+        npt.assert_almost_equal(ref_P, comp_P)
+        npt.assert_almost_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_left_P, comp_left_P)
+        npt.assert_almost_equal(ref_left_I, comp_left_I)
+
+        for i in range(34):
+            t = np.random.rand()
+            ref_mp.update(t)
+            stream.update(t)
+
+            comp_P = stream.P_.copy()
+            comp_I = stream.I_
+            comp_left_P = stream.left_P_.copy()
+            comp_left_I = stream.left_I_
+
+            ref_P = ref_mp.P_.copy()
+            ref_I = ref_mp.I_
+            ref_left_P = ref_mp.left_P_.copy()
+            ref_left_I = ref_mp.left_I_
+
+            naive.replace_inf(ref_P)
+            naive.replace_inf(ref_left_P)
+            naive.replace_inf(comp_P)
+            naive.replace_inf(comp_left_P)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_left_P, comp_left_P)
+            npt.assert_almost_equal(ref_left_I, comp_left_I)
+
+        np.random.seed(seed)
+        T = np.random.rand(n)
+        T = pd.Series(T)
+
+        ref_mp = naive.stumpi_egress(T, m, k=k)
+        ref_P = ref_mp.P_.copy()
+        ref_I = ref_mp.I_
+        ref_left_P = ref_mp.left_P_.copy()
+        ref_left_I = ref_mp.left_I_
+
+        stream = stumpi(T, m, egress=True, k=k)
+
+        comp_P = stream.P_.copy()
+        comp_I = stream.I_
+        comp_left_P = stream.left_P_.copy()
+        comp_left_I = stream.left_I_
+
+        naive.replace_inf(ref_P)
+        naive.replace_inf(ref_left_P)
+        naive.replace_inf(comp_P)
+        naive.replace_inf(comp_left_P)
+
+        npt.assert_almost_equal(ref_P, comp_P)
+        npt.assert_almost_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_left_P, comp_left_P)
+        npt.assert_almost_equal(ref_left_I, comp_left_I)
+
+        for i in range(34):
+            t = np.random.rand()
+            t = np.random.rand()
+            ref_mp.update(t)
+            stream.update(t)
+
+            comp_P = stream.P_.copy()
+            comp_I = stream.I_
+            comp_left_P = stream.left_P_.copy()
+            comp_left_I = stream.left_I_
+
+            ref_P = ref_mp.P_.copy()
+            ref_I = ref_mp.I_
+            ref_left_P = ref_mp.left_P_.copy()
+            ref_left_I = ref_mp.left_I_
+
+            naive.replace_inf(ref_P)
+            naive.replace_inf(ref_left_P)
+            naive.replace_inf(comp_P)
+            naive.replace_inf(comp_left_P)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_left_P, comp_left_P)
+            npt.assert_almost_equal(ref_left_I, comp_left_I)

From c5d23452b08e76227a050dfc0cdf5632472706b8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 25 Jun 2022 23:06:20 -0600
Subject: [PATCH 263/416] Fix shape of output for KNN test

---
 tests/test_stumpi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index 2d00ab525..13c4a6ee4 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -913,9 +913,9 @@ def test_stumpi_self_join_KNN():
         comp_left_I = stream.left_I_
 
         ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True, k=k)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)
-        ref_I = ref_mp[:, 1].reshape(-1, 1)
-        ref_left_I = ref_mp[:, 2].astype(np.int64)
+        ref_P = ref_mp[:, :k]
+        ref_I = ref_mp[:, k : 2 * k]
+        ref_left_I = ref_mp[:, 2 * k].astype(np.int64)
         ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64)
         for i, j in enumerate(ref_left_I):
             if j >= 0:

From cdc11c8a194b290766ed0cb9e8a0a7c266e0c8d6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 26 Jun 2022 07:09:48 -0600
Subject: [PATCH 264/416] Full test and coverage 1 hr


From fa7fa4a620727201652565cc8b366cc696e5d071 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 26 Jun 2022 23:30:55 -0600
Subject: [PATCH 265/416] Avoid using searchsort when k is 1

---
 stumpy/gpu_stump.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index ecd8434b9..8a13686d8 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -284,13 +284,17 @@ def _compute_and_update_PI_kernel(
                 indices_R[j] = i
 
         if p_norm < profile[j, -1]:
-            idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
-            for g in range(k - 1, idx, -1):
-                profile[j, g] = profile[j, g - 1]
-                indices[j, g] = indices[j, g - 1]
+            if k == 1:
+                profile[j, 0] = p_norm
+                indices[j, 0] = i
+            else:
+                idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
+                for g in range(k - 1, idx, -1):
+                    profile[j, g] = profile[j, g - 1]
+                    indices[j, g] = indices[j, g - 1]
 
-            profile[j, idx] = p_norm
-            indices[j, idx] = i
+                profile[j, idx] = p_norm
+                indices[j, idx] = i
 
 
 def _gpu_stump(

From d41a2e96dd062daedadee82c78b28f5d14ebcb86 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 28 Jun 2022 22:27:02 -0600
Subject: [PATCH 266/416] Revise code according to top k matrix profile
 structure

---
 stumpy/stumpi.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 0dc4a039c..619ae8f72 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -143,15 +143,15 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0, k=1):
         self._T, self._M_T, self._Σ_T = core.preprocess(self._T, self._m)
         # Retrieve the left matrix profile values
 
-        # Since each matrix profile value is the minimum between the left and right
-        # matrix profile values, we can save time by re-computing only the left matrix
-        # profile value when the matrix profile index is equal to the right matrix
-        # profile index.
-        mask = self._left_I == self._I
-        self._left_P[mask] = self._P[mask]
+        # Since each (top-1) matrix profile value is the minimum between the left
+        # and right matrix profile values, we can save time by re-computing only
+        # the left matrix profile value when the (top-1) matrix profile index is
+        # equal to the right matrix profile index.
+        mask = self._left_I == self._I[:, 0]
+        self._left_P[mask] = self._P[mask, 0]
 
         # Only re-compute the `i`-th left matrix profile value, `self._left_P[i]`,
-        # when `self._I[i] != self._left_I[i]`
+        # when `self._I[i, 0] != self._left_I[i]`
         for i in np.flatnonzero(self._left_I >= 0 & ~mask):
             j = self._left_I[i]
             QT = np.dot(self._T[i : i + self._m], self._T[j : j + self._m])

From 38f4c1def82fa62a5004d82192be1ea6425ad328 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 30 Jun 2022 00:15:54 -0600
Subject: [PATCH 267/416] Remove if condition

---
 stumpy/gpu_stump.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 8a13686d8..ecd8434b9 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -284,17 +284,13 @@ def _compute_and_update_PI_kernel(
                 indices_R[j] = i
 
         if p_norm < profile[j, -1]:
-            if k == 1:
-                profile[j, 0] = p_norm
-                indices[j, 0] = i
-            else:
-                idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
-                for g in range(k - 1, idx, -1):
-                    profile[j, g] = profile[j, g - 1]
-                    indices[j, g] = indices[j, g - 1]
+            idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
+            for g in range(k - 1, idx, -1):
+                profile[j, g] = profile[j, g - 1]
+                indices[j, g] = indices[j, g - 1]
 
-                profile[j, idx] = p_norm
-                indices[j, idx] = i
+            profile[j, idx] = p_norm
+            indices[j, idx] = i
 
 
 def _gpu_stump(

From e4fd875e8560b8099cb6c5d227b0e63f21844570 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 30 Jun 2022 23:05:50 -0600
Subject: [PATCH 268/416] Improve dosctrings

---
 stumpy/core.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 74df988b2..26949a52a 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2571,6 +2571,12 @@ def _merge_topk_PI(PA, PB, IA, IB):
     Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
     keeps `top-k` smallest values.
 
+    `PA` and `PB` are 2D arrays, with each row sorted ascendingly. To update `PA[i]`,
+    the array `PB[i]` is traversed forward from index `0` to its last index, and
+    will update `PA[i]` if its element is smaller than `PA[i, -1]`, i.e. the greatest
+    value in `PA[i]`. In case of tied value `v`, it will be inserted to the right side
+    of the greatest index in `PA[i]` whose value is `v`.
+
     Parameters
     ----------
     PA : numpy.ndarray
@@ -2627,18 +2633,15 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     keeping the top-k largest values in merging two `top-k` rows `ρA[i]` and `ρB[i]`,
     each sorted ascendingly.
 
-    from right to left of the merged array: In case of ties between `ρA[i]` and
-    `ρB[i]`, the priority is with `ρA[i]`, and in case of ties within `ρA[i]`,
-    the priority is with element with greater index.
-
-    Example:
-    note: the prime symbol is to distinguish two elements with same value
-    ρA = [0, 0', 1], and ρB = [0, 1, 1'].
-    merging outcome: [1_B, 1'_B, 1_A]
-
     Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
     keeps `top-k` largest values.
 
+    `ρA` and `ρB` are 2D arrays, with each row sorted ascendingly. To update `ρA[i]`,
+    the array `ρB[i]` is traversed backward from its last index to index 0, and will
+    update `ρA[i]` if its element is greater than `ρA[i, 0]`, i.e. the smallest value
+    in `ρA[i]`. In case of tied value `v`, it will be inserted to the left side of the
+    lowest index in `ρA[i]` whose value is `v`.
+
     Parameters
     ----------
     ρA : numpy.ndarray
@@ -2705,8 +2708,8 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
 
     idx: int
         The index at which the value `v` should be inserted. This can be any
-        integer number from `0` to `len(a)`. When `idx=0` and `shift="right"`,
-        OR when `idx=len(a)` and `shift != "right"`, then no change will occur on
+        integer number from `0` to `len(a)`. When `idx=len(a)` and `shift="right"`,
+        OR when `idx=0` and `shift != "right"`, then no change will occur on
         the input array `a`.
 
     v: float

From 0afb3ec2903e3616683897bb7e7b3f7b2091a507 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 30 Jun 2022 23:07:04 -0600
Subject: [PATCH 269/416] Avoid allocating new memory

---
 stumpy/scrump.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 0dd4b25dd..a28a99f34 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -743,13 +743,13 @@ def update(self):
 
             # update left matrix profile and indices
             cond = PL < self._PL
-            self._PL = np.where(cond, PL, self._PL)
-            self._IL = np.where(cond, IL, self._IL)
+            self._PL[:] = np.where(cond, PL, self._PL)
+            self._IL[:] = np.where(cond, IL, self._IL)
 
             # update right matrix profile and indices
             cond = PR < self._PR
-            self._PR = np.where(cond, PR, self._PR)
-            self._IR = np.where(cond, IR, self._IR)
+            self._PR[:] = np.where(cond, PR, self._PR)
+            self._IR[:] = np.where(cond, IR, self._IR)
 
             self._chunk_idx += 1
 

From 13da458fd22e060c778417ba41ea9fd03a492389 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 30 Jun 2022 23:19:44 -0600
Subject: [PATCH 270/416] Avoid allocating new memory

---
 stumpy/gpu_stump.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index ecd8434b9..f3e03348c 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -528,9 +528,9 @@ def _gpu_stump(
         indices_L = device_indices_L.copy_to_host()
         indices_R = device_indices_R.copy_to_host()
 
-        profile = np.sqrt(profile)
-        profile_L = np.sqrt(profile_L)
-        profile_R = np.sqrt(profile_R)
+        profile[:, :] = np.sqrt(profile)
+        profile_L[:] = np.sqrt(profile_L)
+        profile_R[:] = np.sqrt(profile_R)
 
         profile_fname = core.array_to_temp_file(profile)
         profile_L_fname = core.array_to_temp_file(profile_L)

From 93b5708ed05cceeef962ff6ebc19f31dde4070dc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 01:31:19 -0600
Subject: [PATCH 271/416] Improve comments

---
 stumpy/arimp_stump.py | 98 +++++++++++++++++++++++++++++++++++++++++++
 stumpy/gpu_stump.py   |  2 +-
 stumpy/scrump.py      |  4 +-
 stumpy/stump.py       |  8 ++--
 stumpy/stumpi.py      | 13 +++---
 5 files changed, 111 insertions(+), 14 deletions(-)
 create mode 100644 stumpy/arimp_stump.py

diff --git a/stumpy/arimp_stump.py b/stumpy/arimp_stump.py
new file mode 100644
index 000000000..6951bd5bd
--- /dev/null
+++ b/stumpy/arimp_stump.py
@@ -0,0 +1,98 @@
+# naive
+def arimp_naive(T_A, m, exclusion_zone=None, row_wise=False):
+    """
+    Traverse distance matrix diagonally and update the matrix profile and
+    matrix profile indices if the parameter `row_wise` is set to `False`.
+    If the parameter `row_wise` is set to `True`, it is a row-wise traversal.
+    """
+
+    distance_matrix = np.array(
+        [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
+    )
+    T_B = T_A.copy()
+
+    distance_matrix[np.isnan(distance_matrix)] = np.inf
+
+    n_A = T_A.shape[0]
+    n_B = T_B.shape[0]
+    l = n_A - m + 1
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
+    SL = list([np.inf] for _ in range(l))
+    SLI = list([-1] for _ in range(l))
+
+    SR = list([np.inf] for _ in range(l))
+    ISR = list([-1] for _ in range(l))
+
+    RL = list([np.inf] for _ in range(l))
+    RLI = list([-1] for _ in range(l))
+
+    LR = list([np.inf] for _ in range(l))
+    LRI = list([-1] for _ in range(l))
+
+    if row_wise:
+        for i in range(l):
+            apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
+
+        for i, D in enumerate(distance_matrix):
+            # self-join / AB-join: matrix proifle and indices
+            idx = np.argmin(D)
+            P[i, 0] = D[idx]
+            if P[i, 0] == np.inf:
+                idx = -1
+            I[i, 0] = idx
+
+            # self-join: left matrix profile
+            if ignore_trivial and i > 0:
+                IL = np.argmin(D[:i])
+                if D[IL] == np.inf:
+                    IL = -1
+                I[i, 1] = IL
+
+            # self-join: right matrix profile
+            if ignore_trivial and i < D.shape[0]:
+                IR = i + np.argmin(D[i:])  # shift argmin by `i` to get true index
+                if D[IR] == np.inf:
+                    IR = -1
+                I[i, 2] = IR
+
+    else:  # diagonal traversal
+        if ignore_trivial:
+            diags = np.arange(exclusion_zone + 1, n_A - m + 1)
+        else:
+            diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
+
+        for k in diags:
+            if k >= 0:
+                iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
+            else:
+                iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
+
+            for i in iter_range:
+                D = distance_matrix[i, i + k]
+                if D < P[i, 0]:
+                    P[i, 0] = D
+                    I[i, 0] = i + k
+
+                if ignore_trivial:  # Self-joins only
+                    if D < P[i + k, 0]:
+                        P[i + k, 0] = D
+                        I[i + k, 0] = i
+
+                    if i < i + k:
+                        # Left matrix profile and left matrix profile index
+                        if D < P[i + k, 1]:
+                            P[i + k, 1] = D
+                            I[i + k, 1] = i
+
+                        if D < P[i, 2]:
+                            # right matrix profile and right matrix profile index
+                            P[i, 2] = D
+                            I[i, 2] = i + k
+
+    result = np.empty((l, 4), dtype=object)
+    result[:, 0] = P[:, 0]
+    result[:, 1:4] = I[:, :]
+
+    return result
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index f3e03348c..e236d3b05 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -432,7 +432,7 @@ def _gpu_stump(
 
     device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
     nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
-    # number of levels in binary seearch tree from which `bfs` is constructed.
+    # number of levels in binary search tree from which `bfs` is constructed.
 
     with cuda.gpus[device_id]:
         device_T_A = cuda.to_device(T_A)
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index a28a99f34..fced2d043 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -758,7 +758,7 @@ def P_(self):
         """
         Get the updated (top-k) matrix profile. When k=1 (default), the first (and only)
         column in this 2D array consists of the matrix profile. When k > 1, the output
-        has exactly k columns consist of the top-k matrix profile.
+        has exactly k columns consisting of the top-k matrix profile.
         """
         return self._P.astype(np.float64)
 
@@ -767,7 +767,7 @@ def I_(self):
         """
         Get the updated (top-k) matrix profile indices. When k=1 (default), the
         first (and only) column in this 2D array consists of the matrix profile
-        indices. When k > 1, the output has exactly k columns consist of the top-k
+        indices. When k > 1, the output has exactly k columns consisting of the top-k
         matrix profile indices.
         """
         return self._I.astype(np.int64)
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 59be1e61d..ed726bdd6 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -212,10 +212,10 @@ def _compute_diagonal(
                 if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                # ρ[thread_idx, i, :] is sorted ascendingly. It MUST be updated
-                # when the newly-calculated pearson value becomes greater than the
-                # first (i.e. smallest) element of this array. (Reminder: higher
-                # pearson value means lower distance, which is of our interest)
+                # `ρ[thread_idx, i, :]` is sorted ascendingly and MUST be updated
+                # when the newly-calculated `pearson` value becomes greater than the
+                # first (i.e. smallest) element in this array. Note that a higher
+                # pearson value corresponds to a lower distance.
                 if pearson > ρ[thread_idx, i, 0]:
                     pos = np.searchsorted(ρ[thread_idx, i], pearson)
                     core._shift_insert_at_index(
diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 619ae8f72..8c0f0335a 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -256,16 +256,18 @@ def _update_egress(self, t):
             )
             # D.shape[0] is base-1
 
-        # O(Nlog(K)) time complexity
+        # Calculate the (top-k) matrix profile values/indices for the last susequence
+        # by using its correspondng distance profile `D`
         self._P[-1] = np.inf
         self._I[-1] = -1
         for i, d in enumerate(D):
-            if d < self._P[-1, -1]:  # mean last index, maximum value (k-th value)
+            if d < self._P[-1, -1]:
                 pos = np.searchsorted(self._P[-1], d, side="right")
                 core._shift_insert_at_index(self._P[-1], pos, d)
                 core._shift_insert_at_index(self._I[-1], pos, i + self._n_appended)
 
-        # for last index, the left matrix profile is basically `self._P[-1, 0]`
+        # All neighbors of the last subsequence are on its left. So, its matrix profile
+        # value/index and its left matrix profile value/index must be equal.
         self._left_P[-1] = self._P[-1, 0]
         self._left_I[-1] = self._I[-1, 0]
 
@@ -318,12 +320,9 @@ def _update(self, t):
             core._shift_insert_at_index(self._I[i], pos, l)
 
         # Calculating top-k and left matrix profile for new subsequence whose
-        # distance profie is D
-
-        # O(Nlog(K)) time complexity for obtaining top-k
+        # distance profie is `D`
         P_new = np.full(self._k, np.inf, dtype=np.float64)
         I_new = np.full(self._k, -1, dtype=np.int64)
-
         for i, d in enumerate(D):
             if d < P_new[-1]:  # maximum value in sorted array P_new
                 pos = np.searchsorted(P_new, d, side="right")

From 114c0cca27db0dd0db1aa51c5377363e4e54bd56 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 02:09:18 -0600
Subject: [PATCH 272/416] Remove numpy.where to avoid copying unchanged values

---
 stumpy/gpu_stump.py | 12 ++++++------
 stumpy/scrump.py    | 12 ++++++------
 stumpy/stump.py     | 12 ++++++------
 stumpy/stumped.py   | 12 ++++++------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index e236d3b05..99343db66 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -854,14 +854,14 @@ def gpu_stump(
         core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 
         # Update (top-1) left matrix profile and matrix profile indices
-        cond = profile_L[0] < profile_L[i]
-        profile_L[0] = np.where(cond, profile_L[0], profile_L[i])
-        indices_L[0] = np.where(cond, indices_L[0], indices_L[i])
+        mask = profile_L[0] < profile_L[i]
+        profile_L[0][mask] = profile_L[i][mask]
+        indices_L[0][mask] = indices_L[i][mask]
 
         # Update (top-1) right matrix profile and matrix profile indices
-        cond = profile_R[0] < profile_R[i]
-        profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
-        indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
+        mask = profile_R[0] < profile_R[i]
+        profile_R[0][mask] = profile_R[i][mask]
+        indices_R[0][mask] = indices_R[i][mask]
 
     out = np.empty((w, 2 * k + 2), dtype=object)  # last two columns are to store
     # (top-1) left/right matrix profile indices
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index fced2d043..3f535b11e 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -742,14 +742,14 @@ def update(self):
             core._merge_topk_PI(self._P, P, self._I, I)
 
             # update left matrix profile and indices
-            cond = PL < self._PL
-            self._PL[:] = np.where(cond, PL, self._PL)
-            self._IL[:] = np.where(cond, IL, self._IL)
+            mask = PL < self._PL
+            self._PL[mask] = PL[mask]
+            self._IL[mask] = IL[mask]
 
             # update right matrix profile and indices
-            cond = PR < self._PR
-            self._PR[:] = np.where(cond, PR, self._PR)
-            self._IR[:] = np.where(cond, IR, self._IR)
+            mask = PR < self._PR
+            self._PR[mask] = PR[mask]
+            self._IR[mask] = IR[mask]
 
             self._chunk_idx += 1
 
diff --git a/stumpy/stump.py b/stumpy/stump.py
index ed726bdd6..8f035c268 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -475,14 +475,14 @@ def _stump(
         core._merge_topk_ρI(ρ[0], ρ[thread_idx], I[0], I[thread_idx])
 
         # update left matrix profile and  matrix profile indices
-        cond = ρL[0] < ρL[thread_idx]
-        ρL[0] = np.where(cond, ρL[thread_idx], ρL[0])
-        IL[0] = np.where(cond, IL[thread_idx], IL[0])
+        mask = ρL[0] < ρL[thread_idx]
+        ρL[0, mask] = ρL[thread_idx, mask]
+        IL[0, mask] = IL[thread_idx, mask]
 
         # update right matrix profile and  matrix profile indices
-        cond = ρR[0] < ρR[thread_idx]
-        ρR[0] = np.where(cond, ρR[thread_idx], ρR[0])
-        IR[0] = np.where(cond, IR[thread_idx], IR[0])
+        mask = ρR[0] < ρR[thread_idx]
+        ρR[0, mask] = ρR[thread_idx, mask]
+        IR[0, mask] = IR[thread_idx, mask]
 
     # Reverse top-k rho (and its associated I) to be in descending order and
     # then convert from Pearson correlations to Euclidean distances (ascending order)
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 1246bbb2c..d991c1304 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -273,14 +273,14 @@ def stumped(
         core._merge_topk_PI(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
-        cond = PL < profile_L
-        profile_L[:] = np.where(cond, PL, profile_L)
-        indices_L[:] = np.where(cond, IL, indices_L)
+        mask = PL < profile_L
+        profile_L[mask] = PL[mask]
+        indices_L[mask] = IL[mask]
 
         # Update top-1 right matrix profile and matrix profile index
-        cond = PR < profile_R
-        profile_R[:] = np.where(cond, PR, profile_R)
-        indices_R[:] = np.where(cond, IR, indices_R)
+        mask = PR < profile_R
+        profile_R[mask] = PR[mask]
+        indices_R[mask] = IR[mask]
 
     out = np.empty((l, 2 * k + 2), dtype=object)
     out[:, :k] = profile

From 719aefd2ed15c4cdb6d8f97aae87ebaeaa401a31 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 02:13:23 -0600
Subject: [PATCH 273/416] Remove unnecessary trailing colon

---
 stumpy/stumpi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 8c0f0335a..3a24cad92 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -210,8 +210,8 @@ def _update_egress(self, t):
         t_drop = self._T[l - 1]
         self._T_isfinite[:-1] = self._T_isfinite[1:]
 
-        self._I[:-1, :] = self._I[1:, :]
-        self._P[:-1, :] = self._P[1:, :]
+        self._I[:-1] = self._I[1:]
+        self._P[:-1] = self._P[1:]
         self._left_I[:-1] = self._left_I[1:]
         self._left_P[:-1] = self._left_P[1:]
 

From 528bf12f0f322e44688cf7c90b984e056a097774 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 02:15:15 -0600
Subject: [PATCH 274/416] Replace negative np.inf with np.NINF

---
 stumpy/stump.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 8f035c268..4c025db25 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -407,13 +407,13 @@ def _stump(
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
 
-    ρ = np.full((n_threads, l, k), -np.inf, dtype=np.float64)
+    ρ = np.full((n_threads, l, k), np.NINF, dtype=np.float64)
     I = np.full((n_threads, l, k), -1, dtype=np.int64)
 
-    ρL = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    ρL = np.full((n_threads, l), np.NINF, dtype=np.float64)
     IL = np.full((n_threads, l), -1, dtype=np.int64)
 
-    ρR = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    ρR = np.full((n_threads, l), np.NINF, dtype=np.float64)
     IR = np.full((n_threads, l), -1, dtype=np.int64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)

From ce58a5908778a46996b3a3547bb16aa99fef2d62 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 02:39:49 -0600
Subject: [PATCH 275/416] delete a wrong file

---
 stumpy/arimp_stump.py | 98 -------------------------------------------
 1 file changed, 98 deletions(-)
 delete mode 100644 stumpy/arimp_stump.py

diff --git a/stumpy/arimp_stump.py b/stumpy/arimp_stump.py
deleted file mode 100644
index 6951bd5bd..000000000
--- a/stumpy/arimp_stump.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# naive
-def arimp_naive(T_A, m, exclusion_zone=None, row_wise=False):
-    """
-    Traverse distance matrix diagonally and update the matrix profile and
-    matrix profile indices if the parameter `row_wise` is set to `False`.
-    If the parameter `row_wise` is set to `True`, it is a row-wise traversal.
-    """
-
-    distance_matrix = np.array(
-        [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
-    )
-    T_B = T_A.copy()
-
-    distance_matrix[np.isnan(distance_matrix)] = np.inf
-
-    n_A = T_A.shape[0]
-    n_B = T_B.shape[0]
-    l = n_A - m + 1
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-
-    SL = list([np.inf] for _ in range(l))
-    SLI = list([-1] for _ in range(l))
-
-    SR = list([np.inf] for _ in range(l))
-    ISR = list([-1] for _ in range(l))
-
-    RL = list([np.inf] for _ in range(l))
-    RLI = list([-1] for _ in range(l))
-
-    LR = list([np.inf] for _ in range(l))
-    LRI = list([-1] for _ in range(l))
-
-    if row_wise:
-        for i in range(l):
-            apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
-
-        for i, D in enumerate(distance_matrix):
-            # self-join / AB-join: matrix proifle and indices
-            idx = np.argmin(D)
-            P[i, 0] = D[idx]
-            if P[i, 0] == np.inf:
-                idx = -1
-            I[i, 0] = idx
-
-            # self-join: left matrix profile
-            if ignore_trivial and i > 0:
-                IL = np.argmin(D[:i])
-                if D[IL] == np.inf:
-                    IL = -1
-                I[i, 1] = IL
-
-            # self-join: right matrix profile
-            if ignore_trivial and i < D.shape[0]:
-                IR = i + np.argmin(D[i:])  # shift argmin by `i` to get true index
-                if D[IR] == np.inf:
-                    IR = -1
-                I[i, 2] = IR
-
-    else:  # diagonal traversal
-        if ignore_trivial:
-            diags = np.arange(exclusion_zone + 1, n_A - m + 1)
-        else:
-            diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
-
-        for k in diags:
-            if k >= 0:
-                iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
-            else:
-                iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
-
-            for i in iter_range:
-                D = distance_matrix[i, i + k]
-                if D < P[i, 0]:
-                    P[i, 0] = D
-                    I[i, 0] = i + k
-
-                if ignore_trivial:  # Self-joins only
-                    if D < P[i + k, 0]:
-                        P[i + k, 0] = D
-                        I[i + k, 0] = i
-
-                    if i < i + k:
-                        # Left matrix profile and left matrix profile index
-                        if D < P[i + k, 1]:
-                            P[i + k, 1] = D
-                            I[i + k, 1] = i
-
-                        if D < P[i, 2]:
-                            # right matrix profile and right matrix profile index
-                            P[i, 2] = D
-                            I[i, 2] = i + k
-
-    result = np.empty((l, 4), dtype=object)
-    result[:, 0] = P[:, 0]
-    result[:, 1:4] = I[:, :]
-
-    return result

From ba4986b6dd8c2b715773a508ae3ff5a7e5e5fd88 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 02:54:56 -0600
Subject: [PATCH 276/416] Avoid advance indexing by using chain slicing so it
 can be run by njit

---
 stumpy/stump.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 4c025db25..d64fd3532 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -476,13 +476,13 @@ def _stump(
 
         # update left matrix profile and  matrix profile indices
         mask = ρL[0] < ρL[thread_idx]
-        ρL[0, mask] = ρL[thread_idx, mask]
-        IL[0, mask] = IL[thread_idx, mask]
+        ρL[0][mask] = ρL[thread_idx][mask]
+        IL[0][mask] = IL[thread_idx][mask]
 
         # update right matrix profile and  matrix profile indices
         mask = ρR[0] < ρR[thread_idx]
-        ρR[0, mask] = ρR[thread_idx, mask]
-        IR[0, mask] = IR[thread_idx, mask]
+        ρR[0][mask] = ρR[thread_idx][mask]
+        IR[0][mask] = IR[thread_idx][mask]
 
     # Reverse top-k rho (and its associated I) to be in descending order and
     # then convert from Pearson correlations to Euclidean distances (ascending order)

From 2f0f53c013b63d9d67b36a3e6f7a8a8c9df984b0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 6 Jul 2022 03:44:33 -0600
Subject: [PATCH 277/416] Improve docstring

---
 stumpy/stump.py   | 3 ++-
 stumpy/stumped.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index d64fd3532..a21454aaa 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -548,7 +548,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
         Note that this will increase the total computational time and memory usage
-        when k > 1.
+        when k > 1. If you have access to a GPU device, then you may be able to
+        leverage `gpu_stump` for better performance and scalability.
 
     Returns
     -------
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index d991c1304..299f5c8a2 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -59,7 +59,8 @@ def stumped(
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
         Note that this will increase the total computational time and memory usage
-        when k > 1.
+        when k > 1. If you have access to a GPU device, then you may be able to
+        leverage `gpu_stump` for better performance and scalability.
 
     Returns
     -------

From 6b49de867ae146c501bb9926c4cc79c8ebaf76d3 Mon Sep 17 00:00:00 2001
From: Sean Law <seanmylaw@gmail.com>
Date: Wed, 6 Jul 2022 12:51:27 -0400
Subject: [PATCH 278/416] Added gpu_searchsorted checks when GPUs unavailable

---
 stumpy/core.py          | 14 ++++++++++++++
 test.sh                 |  4 +++-
 tests/test_gpu_stump.py | 11 ++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 26949a52a..fe231cd06 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -209,6 +209,20 @@ def _gpu_aamp_stimp_driver_not_found(*args, **kwargs):  # pragma: no cover
     driver_not_found()
 
 
+def _gpu_searchsorted_left_driver_not_found(*args, **kwargs):  # pragma: no cover
+    """
+    Dummy function to raise CudaSupportError driver not found error.
+    """
+    driver_not_found()
+
+
+def _gpu_searchsorted_right_driver_not_found(*args, **kwargs):  # pragma: no cover
+    """
+    Dummy function to raise CudaSupportError driver not found error.
+    """
+    driver_not_found()
+
+
 def get_pkg_name():  # pragma: no cover
     """
     Return package name.
diff --git a/test.sh b/test.sh
index 2db67e061..5fd98468c 100755
--- a/test.sh
+++ b/test.sh
@@ -32,7 +32,7 @@ done
 check_errs()
 {
   # Function. Parameter 1 is the return code
-  if [[ $1 -ne "0" ]]; then
+  if [[ $1 -ne "0" && $1 -ne "5" ]]; then
     echo "Error: pytest encountered exit code $1"
     # as a bonus, make our script exit with the right error code.
     exit $1
@@ -119,6 +119,7 @@ test_unit()
 {
     echo "Testing Numba JIT Compiled Functions"
     pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_stump.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_core.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_config.py
@@ -148,6 +149,7 @@ test_unit()
     check_errs $?
     # aamp
     pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp.py tests/test_maamp.py tests/test_scraamp.py tests/test_aampi.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_scraamp.py
diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 4d3093c99..3f86ab03f 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -3,10 +3,19 @@
 import numpy.testing as npt
 import pandas as pd
 from stumpy import core, gpu_stump
-from stumpy.gpu_stump import _gpu_searchsorted_left, _gpu_searchsorted_right
 from stumpy import config
 from numba import cuda
 
+if cuda.is_available():
+    from stumpy.gpu_stump import _gpu_searchsorted_left, _gpu_searchsorted_right
+else:  # pragma: no cover
+    from stumpy.core import (
+        _gpu_searchsorted_left_driver_not_found as _gpu_searchsorted_left,
+    )
+    from stumpy.core import (
+        _gpu_searchsorted_right_driver_not_found as _gpu_searchsorted_right,
+    )
+
 try:
     from numba.errors import NumbaPerformanceWarning
 except ModuleNotFoundError:

From 0d1e482e6748dbe1fc23f07911a5ab3e19495a3d Mon Sep 17 00:00:00 2001
From: Sean Law <seanmylaw@gmail.com>
Date: Wed, 6 Jul 2022 13:40:35 -0400
Subject: [PATCH 279/416] Added error checks and pytest ignore warning

---
 test.sh                                | 6 ++++++
 tests/test_non_normalized_decorator.py | 1 +
 2 files changed, 7 insertions(+)

diff --git a/test.sh b/test.sh
index 5fd98468c..373e03f17 100755
--- a/test.sh
+++ b/test.sh
@@ -135,6 +135,7 @@ test_unit()
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_ostinato.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_ostinato.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mpdist.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_motifs.py
@@ -142,9 +143,11 @@ test_unit()
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mmotifs.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_mpdist.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_snippets.py
     check_errs $?
     pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_stimp.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stimp.py
     check_errs $?
     # aamp
@@ -161,6 +164,7 @@ test_unit()
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_ostinato.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp_ostinato.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aampdist.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_motifs.py
@@ -168,9 +172,11 @@ test_unit()
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_mmotifs.py
     check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aampdist.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aampdist_snippets.py
     check_errs $?
     pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp_stimp.py
+    check_errs $?
     pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_stimp.py
     check_errs $?
     pytest -x -W ignore::DeprecationWarning tests/test_non_normalized_decorator.py
diff --git a/tests/test_non_normalized_decorator.py b/tests/test_non_normalized_decorator.py
index 2c6447d09..8da9354c8 100644
--- a/tests/test_non_normalized_decorator.py
+++ b/tests/test_non_normalized_decorator.py
@@ -340,6 +340,7 @@ def test_mmotifs(T, m):
     npt.assert_almost_equal(ref_distances, cmp_distances)
 
 
+@pytest.mark.filterwarnings("ignore:All-NaN slice encountered")
 def test_snippets():
     T = np.random.rand(64)
     m = 10

From f72ca7a6f71b0c41aa87893bc66faac6cf4af3ab Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 13:20:21 -0600
Subject: [PATCH 280/416] Improve docstrings

---
 stumpy/core.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index fe231cd06..4e87ffe68 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2587,9 +2587,9 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
     `PA` and `PB` are 2D arrays, with each row sorted ascendingly. To update `PA[i]`,
     the array `PB[i]` is traversed forward from index `0` to its last index, and
-    will update `PA[i]` if its element is smaller than `PA[i, -1]`, i.e. the greatest
-    value in `PA[i]`. In case of tied value `v`, it will be inserted to the right side
-    of the greatest index in `PA[i]` whose value is `v`.
+    if its element is smaller than `PA[i, -1]`, i.e. the greatest value in `PA[i]`,
+    then `PA[i]` will be updatd.  In case of tied value `v`, it will be inserted to
+    the right side of the greatest index in `PA[i]` whose value is `v`.
 
     Parameters
     ----------
@@ -2651,10 +2651,10 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     keeps `top-k` largest values.
 
     `ρA` and `ρB` are 2D arrays, with each row sorted ascendingly. To update `ρA[i]`,
-    the array `ρB[i]` is traversed backward from its last index to index 0, and will
-    update `ρA[i]` if its element is greater than `ρA[i, 0]`, i.e. the smallest value
-    in `ρA[i]`. In case of tied value `v`, it will be inserted to the left side of the
-    lowest index in `ρA[i]` whose value is `v`.
+    the array `ρB[i]` is traversed backward from its last index to index 0, and if
+    its element is greater than `ρA[i, 0]`, i.e. the smallest value in `ρA[i]`, then
+    `ρA[i]` will be updated. In case of tied value `v`, it will be inserted to the
+    left side of the lowest index in `ρA[i]` whose value is `v`.
 
     Parameters
     ----------

From af40906f0f761b18337e38797c89c68ec656cd7d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 13:34:30 -0600
Subject: [PATCH 281/416] minor changes in if-block and dosctring

---
 stumpy/core.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 4e87ffe68..2ca026ee9 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2709,11 +2709,14 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
 @njit
 def _shift_insert_at_index(a, idx, v, shift="right"):
     """
-    If `shift=right`, all elements in `a[idx:]` are shifted to the right by one element
-    and the last element is discarded. If `shift=left` (or any string value other
-    than "right") all elements in `a[:idx]` are shifted to the left by one element
-    and the first element is discarded. In both cases, the length of `a` remains
-    unchanged.
+    If `shift=right`(default), all elements in `a[idx:]` are shifted to the right by
+    one element and the last element is discarded. If  `shift=left`, all elements in
+    `a[:idx]` are shifted to the left by one element and the first element is discarded.
+    In both cases, the length of `a` remains unchanged.
+
+    Note that for any other string value for parameter `shift`, the parameter will be
+    reset to `shift="right"`.
+
 
     Parameters
     ----------
@@ -2723,7 +2726,7 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
     idx: int
         The index at which the value `v` should be inserted. This can be any
         integer number from `0` to `len(a)`. When `idx=len(a)` and `shift="right"`,
-        OR when `idx=0` and `shift != "right"`, then no change will occur on
+        OR when `idx=0` and `shift="left"`, then no change will occur on
         the input array `a`.
 
     v: float
@@ -2731,21 +2734,21 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
 
     shift: str, default "right"
         The value that indicates whether the shifting of elements should be to the
-        right or to the left. If "right" (default), all elements in `a[idx:]` are
-        shifted to right by one element. For any other string value, all elements
+        right or to the left. If `shift="right"` (default), all elements in `a[idx:]`
+        are shifted to the right by one element. If `shift="left"`, all elements
         in `a[:idx]` are shifted to the left by one element.
 
     Returns
     -------
     None
     """
-    if shift == "right":
-        if 0 <= idx < len(a):
-            a[idx + 1 :] = a[idx:-1]
-            a[idx] = v
-
-    else:
+    if shift == "left":
         if 0 < idx <= len(a):
             a[: idx - 1] = a[1:idx]
-            # elements were shifted to left, thus the insertion index becomes `idx-1`
+            # elements were shifted to the left, thus the insertion index becomes `idx-1`
             a[idx - 1] = v
+
+    else:
+        if 0 <= idx < len(a):
+            a[idx + 1 :] = a[idx:-1]
+            a[idx] = v

From 42ec617c0b7d71946c0624662027bdc3b0862525 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 13:42:58 -0600
Subject: [PATCH 282/416] Improve docstrings

---
 stumpy/scrump.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 3f535b11e..6394599ea 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -295,12 +295,12 @@ def _prescrump(
     out1 : numpy.ndarray
         The (top-k) Matrix profile. When k=1 (default), the first (and only) column
         in this 2D array consists of the matrix profile. When k > 1, the output
-        has exactly k columns consist of the top-k matrix profile.
+        has exactly `k` columns consisting of the top-k matrix profile.
 
     out2 : numpy.ndarray
         The (top-k) Matrix profile indices. When k=1 (default), the first (and only)
         column in this 2D array consists of the matrix profile indices. When k > 1,
-        the output has exactly k columns consist of the top-k matrix profile.
+        the output has exactly `k` columns consisting of the top-k matrix profile indices.
 
     Notes
     -----
@@ -381,14 +381,14 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     Returns
     -------
     P : numpy.ndarray
-        The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile. When k > 1, the output has exactly k columns
-        consist of the top-k matrix profile.
+        The (top-k) Matrix profile. When k = 1 (default), the first (and only) column
+        in this 2D array consists of the matrix profile. When k > 1, the output has
+        exactly `k` columns consisting of the top-k matrix profile.
 
     I : numpy.ndarray
-        The (top-k) Matrix profile. When k = 1 (default), the first and only column
-        consists of the matrix profile indices. When k > 1, the output has exactly
-        k columns consist of the top-k matrix profile indices.
+        The (top-k) Matrix profile indices. When k = 1 (default), the first (and only)
+        column in this 2D array consists of the matrix profile indices. When k > 1,
+        the output has exactly `k` columns consisting of the top-k matrix profile indices.
 
     Notes
     -----
@@ -489,10 +489,14 @@ class scrump:
     Attributes
     ----------
     P_ : numpy.ndarray
-        The updated (top-k) matrix profile
+        The updated (top-k) matrix profile. When k=1 (default), the first (and only)
+        column in this 2D array consists of the matrix profile. When k > 1, the output
+        has exactly k columns consisting of the top-k matrix profile.
 
     I_ : numpy.ndarray
-        The updated (top-k) matrix profile indices
+        The updated (top-k) matrix profile indices. When k=1 (default), the first (and only)
+        column in this 2D array consists of the matrix profile indices. When k > 1,
+        the output has exactly k columns consisting of the top-k matrix profile indices.
 
     left_I_ : numpy.ndarray
         The updated left (top-1) matrix profile indices
@@ -758,7 +762,7 @@ def P_(self):
         """
         Get the updated (top-k) matrix profile. When k=1 (default), the first (and only)
         column in this 2D array consists of the matrix profile. When k > 1, the output
-        has exactly k columns consisting of the top-k matrix profile.
+        has exactly `k` columns consisting of the top-k matrix profile.
         """
         return self._P.astype(np.float64)
 
@@ -767,7 +771,7 @@ def I_(self):
         """
         Get the updated (top-k) matrix profile indices. When k=1 (default), the
         first (and only) column in this 2D array consists of the matrix profile
-        indices. When k > 1, the output has exactly k columns consisting of the top-k
+        indices. When k > 1, the output has exactly `k` columns consisting of the top-k
         matrix profile indices.
         """
         return self._I.astype(np.int64)

From 0d8f5de06f3c429b55486d3e1dfa3fe19df0036f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 13:45:50 -0600
Subject: [PATCH 283/416] Improve  comments

---
 stumpy/stumpi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 3a24cad92..e059676c3 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -151,7 +151,7 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0, k=1):
         self._left_P[mask] = self._P[mask, 0]
 
         # Only re-compute the `i`-th left matrix profile value, `self._left_P[i]`,
-        # when `self._I[i, 0] != self._left_I[i]`
+        # when `self._left_I[i] != self._I[i, 0]`
         for i in np.flatnonzero(self._left_I >= 0 & ~mask):
             j = self._left_I[i]
             QT = np.dot(self._T[i : i + self._m], self._T[j : j + self._m])
@@ -319,8 +319,8 @@ def _update(self, t):
             core._shift_insert_at_index(self._P[i], pos, D[i])
             core._shift_insert_at_index(self._I[i], pos, l)
 
-        # Calculating top-k and left matrix profile for new subsequence whose
-        # distance profie is `D`
+        # Calculating top-k matrix profile and (top-1) left matrix profile (and thier
+        # corresponding indices) for new subsequence whose distance profie is `D`
         P_new = np.full(self._k, np.inf, dtype=np.float64)
         I_new = np.full(self._k, -1, dtype=np.int64)
         for i, d in enumerate(D):

From fe9c4dba2cddac5e38cef5e6421733c9d7c1798b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 13:51:54 -0600
Subject: [PATCH 284/416] minor changes

---
 tests/naive.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index f257dc4a0..8a2ae3a0e 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -169,10 +169,9 @@ def searchsorted_right(a, v):
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
     """
-    Traverse distance matrix diagonally and update the top-k nearest neighbor
-    matrix profile and matrix profile indices if the parameter `row_wise` is
-    set to `False`. If the parameter `row_wise` is set to `True`,
-    it is a row-wise traversal.
+    Traverse distance matrix diagonally and update the top-k matrix profile and
+    matrix profile indices if the parameter `row_wise` is set to `False`. If the
+    parameter `row_wise` is set to `True`, it is a row-wise traversal.
     """
     if T_B is None:  # self-join:
         ignore_trivial = True
@@ -194,7 +193,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
-    P = np.full((l, k + 2), np.inf)
+    P = np.full((l, k + 2), np.inf, dtype=np.float64)
     I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns are to store
     # ... left and right top-1 matrix profile indices
 
@@ -720,8 +719,10 @@ def __init__(self, T, m, excl_zone=None, p=2.0):
         self._T_isfinite = np.isfinite(self._T)
         self._m = m
         self._p = p
-        if excl_zone is None:  # see stumpi, and make similar changes here
-            self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
+
+        if excl_zone is None:  # apply similar changes in naive `class stumpi_egress`
+            excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
+        self._excl_zone = excl_zone
 
         self._l = self._T.shape[0] - m + 1
         mp = aamp(T, m, p=p)

From 9aba6d2ce7889eb91184d237f9042194168f1be8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 13:57:50 -0600
Subject: [PATCH 285/416] Correct format

---
 stumpy/core.py   |  3 ++-
 stumpy/scrump.py | 13 ++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 2ca026ee9..879c3ebdf 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2745,7 +2745,8 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
     if shift == "left":
         if 0 < idx <= len(a):
             a[: idx - 1] = a[1:idx]
-            # elements were shifted to the left, thus the insertion index becomes `idx-1`
+            # elements were shifted to the left, thus the insertion index becomes
+            # `idx-1`
             a[idx - 1] = v
 
     else:
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 6394599ea..8d265dc11 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -300,7 +300,8 @@ def _prescrump(
     out2 : numpy.ndarray
         The (top-k) Matrix profile indices. When k=1 (default), the first (and only)
         column in this 2D array consists of the matrix profile indices. When k > 1,
-        the output has exactly `k` columns consisting of the top-k matrix profile indices.
+        the output has exactly `k` columns consisting of the top-k matrix profile
+        indices.
 
     Notes
     -----
@@ -388,7 +389,8 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     I : numpy.ndarray
         The (top-k) Matrix profile indices. When k = 1 (default), the first (and only)
         column in this 2D array consists of the matrix profile indices. When k > 1,
-        the output has exactly `k` columns consisting of the top-k matrix profile indices.
+        the output has exactly `k` columns consisting of the top-k matrix profile
+        indices.
 
     Notes
     -----
@@ -494,9 +496,10 @@ class scrump:
         has exactly k columns consisting of the top-k matrix profile.
 
     I_ : numpy.ndarray
-        The updated (top-k) matrix profile indices. When k=1 (default), the first (and only)
-        column in this 2D array consists of the matrix profile indices. When k > 1,
-        the output has exactly k columns consisting of the top-k matrix profile indices.
+        The updated (top-k) matrix profile indices. When k=1 (default), the first
+        (and only) column in this 2D array consists of the matrix profile indices.
+        When k > 1, the output has exactly k columns consisting of the top-k matrix
+        profile indices.
 
     left_I_ : numpy.ndarray
         The updated left (top-1) matrix profile indices

From 2565c91eb09b3d63db21c48bacf4a3f3b41b63e3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 7 Jul 2022 14:07:12 -0600
Subject: [PATCH 286/416] Improve docstrings

---
 stumpy/scrump.py |  4 ++--
 stumpy/stumpi.py | 13 +++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 8d265dc11..1c1286e4e 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -513,9 +513,9 @@ class scrump:
     update()
         Update the matrix profile and the matrix profile indices by computing
         additional new distances (limited by `percentage`) that make up the full
-        distance matrix. The outputs are (top-k) matrix profile, (top-1) left
+        distance matrix. It updates the (top-k) matrix profile, (top-1) left
         matrix profile, (top-1) right matrix profile, (top-k) matrix profile indices,
-        (top-1) left matrix profile indices, (top-1) right matrix profile indices.
+        (top-1) left matrix profile indices, and (top-1) right matrix profile indices.
 
     See Also
     --------
diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index e059676c3..465c39db5 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -46,13 +46,13 @@ class stumpi:
     P_ : numpy.ndarray
         The updated (top-k) matrix profile for `T`. When `k=1` (default), the first
         (and only) column in this 2D array consists of the matrix profile. When
-        `k > 1`, the output has exactly `k` columns consist of the top-k matrix
+        `k > 1`, the output has exactly `k` columns consisting of the top-k matrix
         profile.
 
     I_ : numpy.ndarray
         The updated (top-k) matrix profile indices for `T`. When `k=1` (default),
         the first (and only) column in this 2D array consists of the matrix profile
-        indices. When `k > 1`, the output has exactly `k` columns consist of the
+        indices. When `k > 1`, the output has exactly `k` columns consisting of the
         top-k matrix profile indices.
 
     left_P_ : numpy.ndarray
@@ -344,14 +344,19 @@ def _update(self, t):
     @property
     def P_(self):
         """
-        Get the (top-k) matrix profile
+        Get the (top-k) matrix profile. When `k=1` (default), the first (and only)
+        column in this 2D array consists of the matrix profile. When `k > 1`, the
+        output has exactly `k` columns consisting of the top-k matrix profile.
         """
         return self._P.astype(np.float64)
 
     @property
     def I_(self):
         """
-        Get the (top-k) matrix profile indices
+        Get the (top-k) matrix profile indices. When `k=1` (default), the first
+        (and only) column in this 2D array consists of the matrix profile indices.
+        When `k > 1`, the output has exactly `k` columns consisting of the top-k
+        matrix profile indices.
         """
         return self._I.astype(np.int64)
 

From a89e21434a72b0f5ce841836530b613f17a78736 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 10 Jul 2022 15:25:23 -0600
Subject: [PATCH 287/416] optimize functions

---
 stumpy/core.py | 94 ++++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 56 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 05745ba97..1c0e48311 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2599,7 +2599,7 @@ def _check_P(P, threshold=1e-6):
         logger.warning("For a self-join, try setting `ignore_trivial=True`.")
 
 
-@njit(parallel=True)
+@njit
 def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles PA and PB, and update PA (in place) while
@@ -2635,36 +2635,27 @@ def _merge_topk_PI(PA, PB, IA, IB):
     -------
     None
     """
-    for i in prange(PB.shape[0]):
-        start = 0
-        stop = np.searchsorted(PA[i], PB[i, -1], side="right")
-
-        if stop == 0:
-            # means `PB[i, -1] < PA[i, 0]`, i.e. the maximum value in `PB[i]` is
-            # less than smallest value in `PA[i]`. So, we should replace `PA[i]`
-            # with `PB[i]` so that we have the top-k smallest.
-            PA[i] = PB[i]
-            IA[i] = IB[i]
-            continue
-
-        for j in range(PB.shape[1]):
-            if PB[i, j] >= PA[i, -1]:
-                # `PB[i]` is sorted ascaendingly.
-                # Hence, in next iteration: `PB[i, j+1] >= PB[i, j] >= PA[i, -1]`
-                break
-
-            # `PB[i, j]` is less than `PA[i, -1]`, the maximum value in `PA[i]`.
-            # so, we must update `PA[i]` to have the top-k smallest values.
-            idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
-
-            _shift_insert_at_index(PA[i], idx, PB[i, j], shift="right")
-            _shift_insert_at_index(IA[i], idx, IB[i, j], shift="right")
+    tmp_P = np.empty(PA.shape[1], dtype=np.float64)
+    tmp_I = np.empty(PA.shape[1], dtype=np.int64)
+    for i in range(len(PA)):
+        tmp_P[:] = np.empty(PA.shape[1], dtype=np.float64)
+        tmp_I[:] = np.empty(PA.shape[1], dtype=np.int64)
+        aj, bj = 0, 0
+        for k in range(len(tmp_P)):
+            if PB[i, bj] < PA[i, aj]:
+                tmp_P[k] = PB[i, bj]
+                tmp_I[k] = IB[i, bj]
+                bj += 1
+            else:
+                tmp_P[k] = PA[i, aj]
+                tmp_I[k] = IA[i, aj]
+                aj += 1
 
-            start = idx
-            stop += 1  # because of shifting elements to the right by one
+        PA[i] = tmp_P
+        IA[i] = tmp_I
 
 
-@njit(parallel=True)
+@njit
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
     Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) by
@@ -2700,34 +2691,25 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     -------
     None
     """
-    for i in prange(ρB.shape[0]):
-        start = np.searchsorted(ρA[i], ρB[i, 0], side="left")
-        stop = ρB.shape[1]
-
-        if start == ρB.shape[1]:
-            # means `ρB[i, 0] > ρA[i, -1]`, i.e. the minimum value in `ρB[i]` is
-            # greater than greatest value in `ρA[i]`. So, we should replace `ρA[i]`
-            # with `ρB[i]` so that we have top-k largest values
-            ρA[i] = ρB[i]
-            IA[i] = IB[i]
-            continue
-
-        for j in range(ρB.shape[1] - 1, -1, -1):
-            if ρB[i, j] <= ρA[i, 0]:
-                # `ρB[i]` is sorted ascaendingly.
-                # Hence, in the next iteration: `ρB[i, j-1] <= ρB[i, j] <= ρA[i, 0]`
-                break
-
-            # `ρB[i, j]` is greater than `ρA[i, 0]`, the minimum value in `ρA[i]`.
-            # so, we must update `ρA[i]` to make sure we have top-k largest values.
-            idx = np.searchsorted(ρA[i, start:stop], ρB[i, j], side="left") + start
-
-            _shift_insert_at_index(ρA[i], idx, ρB[i, j], shift="left")
-            _shift_insert_at_index(IA[i], idx, IB[i, j], shift="left")
-
-            stop = idx
-            if start > 0:
-                start -= 1  # because of shifting elements to the left by one
+    tmp_ρ = np.empty(ρA.shape[1], dtype=np.float64)
+    tmp_I = np.empty(ρA.shape[1], dtype=np.int64)
+    last_idx = len(tmp_ρ) - 1
+    for i in range(len(ρA)):
+        tmp_ρ[:] = np.empty(ρA.shape[1], dtype=np.float64)
+        tmp_I[:] = np.empty(ρA.shape[1], dtype=np.int64)
+        aj, bj = last_idx, last_idx
+        for k in range(last_idx, -1, -1):
+            if ρB[i, bj] > ρA[i, aj]:
+                tmp_ρ[k] = ρB[i, bj]
+                tmp_I[k] = IB[i, bj]
+                bj -= 1
+            else:
+                tmp_ρ[k] = ρA[i, aj]
+                tmp_I[k] = IA[i, aj]
+                aj -= 1
+
+        ρA[i] = tmp_ρ
+        IA[i] = tmp_I
 
 
 @njit

From 9b845b1e6e77ff2e3bb86243c943029ff688e8dd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 10 Jul 2022 15:30:35 -0600
Subject: [PATCH 288/416] Remove redundant import

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 1c0e48311..f2eafe5d6 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import njit, prange
+from numba import njit
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg

From 85f1226ed70722f11b601bbac47ba82fed722a31 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 10 Jul 2022 18:51:19 -0600
Subject: [PATCH 289/416] minor change

---
 stumpy/core.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f2eafe5d6..eb3d33c92 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2635,11 +2635,9 @@ def _merge_topk_PI(PA, PB, IA, IB):
     -------
     None
     """
-    tmp_P = np.empty(PA.shape[1], dtype=np.float64)
-    tmp_I = np.empty(PA.shape[1], dtype=np.int64)
     for i in range(len(PA)):
-        tmp_P[:] = np.empty(PA.shape[1], dtype=np.float64)
-        tmp_I[:] = np.empty(PA.shape[1], dtype=np.int64)
+        tmp_P = np.empty(PA.shape[1], dtype=np.float64)
+        tmp_I = np.empty(PA.shape[1], dtype=np.int64)
         aj, bj = 0, 0
         for k in range(len(tmp_P)):
             if PB[i, bj] < PA[i, aj]:
@@ -2691,12 +2689,10 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     -------
     None
     """
-    tmp_ρ = np.empty(ρA.shape[1], dtype=np.float64)
-    tmp_I = np.empty(ρA.shape[1], dtype=np.int64)
-    last_idx = len(tmp_ρ) - 1
+    last_idx = ρA.shape[1] - 1
     for i in range(len(ρA)):
-        tmp_ρ[:] = np.empty(ρA.shape[1], dtype=np.float64)
-        tmp_I[:] = np.empty(ρA.shape[1], dtype=np.int64)
+        tmp_ρ = np.empty(ρA.shape[1], dtype=np.float64)
+        tmp_I = np.empty(ρA.shape[1], dtype=np.int64)
         aj, bj = last_idx, last_idx
         for k in range(last_idx, -1, -1):
             if ρB[i, bj] > ρA[i, aj]:

From fb6ed07cc7f829db0ef0fb9b8ada64838f11ab1a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 11 Jul 2022 00:07:44 -0600
Subject: [PATCH 290/416] Revise docstrings

---
 stumpy/core.py | 50 ++++++++++++++++++++------------------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index eb3d33c92..394842076 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2602,34 +2602,29 @@ def _check_P(P, threshold=1e-6):
 @njit
 def _merge_topk_PI(PA, PB, IA, IB):
     """
-    Merge two top-k matrix profiles PA and PB, and update PA (in place) while
-    always choosing values of PA over values of PB in case of ties. Also, update
-    IA accordingly.
+    Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place) while
+    always prioritizing the values of `PA` over the values of `PB` in case of ties.
+    (i.e., values from `PB` are always inserted to the right of values from `PA`).
+    Also, update `IA` accordingly.
 
     Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
     keeps `top-k` smallest values.
 
-    `PA` and `PB` are 2D arrays, with each row sorted ascendingly. To update `PA[i]`,
-    the array `PB[i]` is traversed forward from index `0` to its last index, and
-    if its element is smaller than `PA[i, -1]`, i.e. the greatest value in `PA[i]`,
-    then `PA[i]` will be updatd.  In case of tied value `v`, it will be inserted to
-    the right side of the greatest index in `PA[i]` whose value is `v`.
-
     Parameters
     ----------
     PA : numpy.ndarray
-        A (top-k) matrix profile, with ndim of 2, where values in each row are
-        sorted in ascending order.
+        A (top-k) matrix profile where values in each row are sorted in ascending
+        order. `PA` must be 2-dimensional.
 
     PB : numpy.ndarray
-        A (top-k) matrix profile, with ndim of 2, where values in each row are
-        sorted in ascending order. `PB` must have the same shape as `PA`.
+        A (top-k) matrix profile where values in each row are sorted in ascending
+        order. `PB` must have the same shape as `PA`.
 
     IA : numpy.ndarray
-        A (top-k) matrix profile indices corresponding to PA
+        A (top-k) matrix profile indices corresponding to `PA`
 
     IB : numpy.ndarray
-        A (top-k) matrix profile indices corresponding to PB
+        A (top-k) matrix profile indices corresponding to `PB`
 
     Returns
     -------
@@ -2656,34 +2651,29 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
-    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) by
-    keeping the top-k largest values in merging two `top-k` rows `ρA[i]` and `ρB[i]`,
-    each sorted ascendingly.
+    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) while
+    always prioritizing the values of `ρA` over the values of `ρB` in case of ties.
+    (i.e., values from `ρB` are always inserted to the left of values from `ρA`).
+    Also, update `IA` accordingly.
 
     Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
     keeps `top-k` largest values.
 
-    `ρA` and `ρB` are 2D arrays, with each row sorted ascendingly. To update `ρA[i]`,
-    the array `ρB[i]` is traversed backward from its last index to index 0, and if
-    its element is greater than `ρA[i, 0]`, i.e. the smallest value in `ρA[i]`, then
-    `ρA[i]` will be updated. In case of tied value `v`, it will be inserted to the
-    left side of the lowest index in `ρA[i]` whose value is `v`.
-
     Parameters
     ----------
     ρA : numpy.ndarray
-        A (top-k) pearson profile, with ndim of 2, where values in each row are
-        sorted in ascending order.
+        A (top-k) pearson profile where values in each row are sorted in ascending
+        order. `ρA` must be 2-dimensional.
 
     ρB : numpy.ndarray
-        A (top-k) pearson profile, with ndim of 2, where values in each row are
-        sorted in ascending order. `ρB` must have the same shape as `ρA`.
+        A (top-k) pearson profile, where values in each row are sorted in ascending
+        order. `ρB` must have the same shape as `ρA`.
 
     IA : numpy.ndarray
-        A (top-k) matrix profile indices corresponding to ρA
+        A (top-k) matrix profile indices corresponding to `ρA`
 
     IB : numpy.ndarray
-        A (top-k) matrix profile indices corresponding to ρB
+        A (top-k) matrix profile indices corresponding to `ρB`
 
     Returns
     -------

From 9f7b6d83f9377bb78fe540d3f6682e048d3df3c6 Mon Sep 17 00:00:00 2001
From: Sean Law <seanmylaw@gmail.com>
Date: Mon, 11 Jul 2022 07:42:18 -0400
Subject: [PATCH 291/416] Fixed black formatting after conflict resolution

---
 stumpy/core.py     | 1 -
 tests/test_core.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 573edd1da..7c54b9bb5 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2777,4 +2777,3 @@ def _check_P(P, threshold=1e-6):
     if are_distances_too_small(P, threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values in `P` are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial=True`.")
-
diff --git a/tests/test_core.py b/tests/test_core.py
index 76ba34c67..2ce86a4bb 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1153,4 +1153,4 @@ def test_shift_insert_at_index():
 
 def test_check_P():
     with pytest.raises(ValueError):
-        core._check_P(np.random.rand(10).reshape(2, 5))
\ No newline at end of file
+        core._check_P(np.random.rand(10).reshape(2, 5))

From 54d1d1fecd58f2537046223cdeb7686e8b2914b7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 11 Jul 2022 08:08:23 -0600
Subject: [PATCH 292/416] Correct docstring

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 8d0bcafe9..5330e6046 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2547,7 +2547,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 
     k : int
         Specify the `k`th value in the concatenated matrix profiles to return. This
-        parameter is ignored when `k_func` is not None.
+        parameter is ignored when `custom_func` is not None.
 
     custom_func : object, default None
         A custom user defined function for selecting the desired value from the

From 67351671865ba38fcb5759274dde873b12d56d1d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 18:43:39 -0600
Subject: [PATCH 293/416] Revise docstrings

---
 stumpy/core.py   | 2 +-
 stumpy/scrump.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 5330e6046..167502fc3 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2680,7 +2680,7 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
     If `shift=right`(default), all elements in `a[idx:]` are shifted to the right by
     one element and the last element is discarded. If  `shift=left`, all elements in
     `a[:idx]` are shifted to the left by one element and the first element is discarded.
-    In both cases, the length of `a` remains unchanged.
+    In both cases, `a` is updated in palce and its length remains unchanged.
 
     Note that for any other string value for parameter `shift`, the parameter will be
     reset to `shift="right"`.
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 1c1286e4e..ad4c2f4df 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -293,12 +293,12 @@ def _prescrump(
     Returns
     -------
     out1 : numpy.ndarray
-        The (top-k) Matrix profile. When k=1 (default), the first (and only) column
+        The (top-k) matrix profile. When k=1 (default), the first (and only) column
         in this 2D array consists of the matrix profile. When k > 1, the output
         has exactly `k` columns consisting of the top-k matrix profile.
 
     out2 : numpy.ndarray
-        The (top-k) Matrix profile indices. When k=1 (default), the first (and only)
+        The (top-k) matrix profile indices. When k=1 (default), the first (and only)
         column in this 2D array consists of the matrix profile indices. When k > 1,
         the output has exactly `k` columns consisting of the top-k matrix profile
         indices.
@@ -382,12 +382,12 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     Returns
     -------
     P : numpy.ndarray
-        The (top-k) Matrix profile. When k = 1 (default), the first (and only) column
+        The (top-k) matrix profile. When k = 1 (default), the first (and only) column
         in this 2D array consists of the matrix profile. When k > 1, the output has
         exactly `k` columns consisting of the top-k matrix profile.
 
     I : numpy.ndarray
-        The (top-k) Matrix profile indices. When k = 1 (default), the first (and only)
+        The (top-k) matrix profile indices. When k = 1 (default), the first (and only)
         column in this 2D array consists of the matrix profile indices. When k > 1,
         the output has exactly `k` columns consisting of the top-k matrix profile
         indices.

From 0112989f76ce163c5ae2fdac6800f13fdf726226 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 18:46:46 -0600
Subject: [PATCH 294/416] minor change

---
 stumpy/scrump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index ad4c2f4df..5d191555a 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -684,9 +684,9 @@ def __init__(
 
         if pre_scrump:
             if self._ignore_trivial:
-                P, I = prescrump(T_A, m, s=s, k=k)
+                P, I = prescrump(T_A, m, s=s, k=self._k)
             else:
-                P, I = prescrump(T_A, m, T_B=T_B, s=s, k=k)
+                P, I = prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
 
             core._merge_topk_PI(self._P, P, self._I, I)
 

From ff322a0279adea92c78e2e08096f8ac74cb192dc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 18:58:26 -0600
Subject: [PATCH 295/416] Revise comments

---
 stumpy/stumpi.py | 5 +++--
 tests/naive.py   | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 465c39db5..8d30f3319 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -266,8 +266,9 @@ def _update_egress(self, t):
                 core._shift_insert_at_index(self._P[-1], pos, d)
                 core._shift_insert_at_index(self._I[-1], pos, i + self._n_appended)
 
-        # All neighbors of the last subsequence are on its left. So, its matrix profile
-        # value/index and its left matrix profile value/index must be equal.
+        # All neighbors of the last subsequence are on its left. So, its (top-1)
+        # matrix profile value/index and its left matrix profile value/index must
+        # be equal.
         self._left_P[-1] = self._P[-1, 0]
         self._left_I[-1] = self._I[-1, 0]
 
diff --git a/tests/naive.py b/tests/naive.py
index 8a2ae3a0e..928dee9d8 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -218,7 +218,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
 
             # self-join: right matrix profile index (top-1)
             if ignore_trivial and i < D.shape[0]:
-                IR = i + np.argmin(D[i:])  # shift arg by `i` to get true index
+                IR = i + np.argmin(D[i:])  # offset by `i` to get true index
                 if D[IR] == np.inf:
                     IR = -1
                 I[i, k + 1] = IR
@@ -239,7 +239,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
                 d = distance_matrix[i, i + g]
                 if d < P[i, k - 1]:
                     idx = searchsorted_right(P[i], d)
-                    # to keep the top-k, we must get rid of the last element.
+                    # to keep the top-k, we must discard the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, d)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
@@ -854,8 +854,8 @@ def update(self, t):
         self.I_[-1] = I_last_topk + self._n_appended
         self.I_[-1][self.P_[-1] == np.inf] = -1
 
-        # for  last indx, the left matrix profile value is self.P_[-1, 0]
-        # and the same goes for left matrix profile index
+        # for the last index, the left matrix profile value is self.P_[-1, 0]
+        # and the same goes for the left matrix profile index
         self.left_P_[-1] = self.P_[-1, 0]
         self.left_I_[-1] = self.I_[-1, 0]
 

From 598fcf40c3a25e4aa3728e59b77670dce896268b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 19:05:42 -0600
Subject: [PATCH 296/416] Avoid redundant allocation of memory

---
 stumpy/core.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 167502fc3..1350d6a6d 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2606,9 +2606,9 @@ def _merge_topk_PI(PA, PB, IA, IB):
     -------
     None
     """
+    tmp_P = np.empty(PA.shape[1], dtype=np.float64)
+    tmp_I = np.empty(PA.shape[1], dtype=np.int64)
     for i in range(len(PA)):
-        tmp_P = np.empty(PA.shape[1], dtype=np.float64)
-        tmp_I = np.empty(PA.shape[1], dtype=np.int64)
         aj, bj = 0, 0
         for k in range(len(tmp_P)):
             if PB[i, bj] < PA[i, aj]:
@@ -2655,10 +2655,10 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     -------
     None
     """
-    last_idx = ρA.shape[1] - 1
+    tmp_ρ = np.empty(ρA.shape[1], dtype=np.float64)
+    tmp_I = np.empty(ρA.shape[1], dtype=np.int64)
+    last_idx = len(tmp_ρ) - 1
     for i in range(len(ρA)):
-        tmp_ρ = np.empty(ρA.shape[1], dtype=np.float64)
-        tmp_I = np.empty(ρA.shape[1], dtype=np.int64)
         aj, bj = last_idx, last_idx
         for k in range(last_idx, -1, -1):
             if ρB[i, bj] > ρA[i, aj]:

From 54643e21d57b91899f9a5c2dd48d807afd2a8994 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 19:12:00 -0600
Subject: [PATCH 297/416] Revise docstrings and comments

---
 stumpy/core.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 1350d6a6d..655a4ef86 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2677,13 +2677,13 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
 @njit
 def _shift_insert_at_index(a, idx, v, shift="right"):
     """
-    If `shift=right`(default), all elements in `a[idx:]` are shifted to the right by
-    one element and the last element is discarded. If  `shift=left`, all elements in
-    `a[:idx]` are shifted to the left by one element and the first element is discarded.
-    In both cases, `a` is updated in palce and its length remains unchanged.
+    If `shift=right` (default), all elements in `a[idx:]` are shifted to the right by
+    one element, `v` in inserted at index `idx` and the last element is discarded.
+    If `shift=left`, all elements in `a[:idx]` are shifted to the left by one element,
+    `v` in inserted at index `idx-1`, and the first element is discarded. In both cases,
+    `a` is updated in place and its length remains unchanged.
 
-    Note that for any other string value for parameter `shift`, the parameter will be
-    reset to `shift="right"`.
+    Note that all unrecognized `shift` inputs will default to `shift=right`.
 
 
     Parameters
@@ -2701,8 +2701,8 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
         The value that should be inserted into array `a` at index `idx`
 
     shift: str, default "right"
-        The value that indicates whether the shifting of elements should be to the
-        right or to the left. If `shift="right"` (default), all elements in `a[idx:]`
+        The value that indicates whether the shifting of elements should be towards
+        the right or left. If `shift="right"` (default), all elements in `a[idx:]`
         are shifted to the right by one element. If `shift="left"`, all elements
         in `a[:idx]` are shifted to the left by one element.
 
@@ -2716,7 +2716,6 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
             # elements were shifted to the left, thus the insertion index becomes
             # `idx-1`
             a[idx - 1] = v
-
     else:
         if 0 <= idx < len(a):
             a[idx + 1 :] = a[idx:-1]

From 9433499e382cb6037e638b0c8d6ad6f7ae8a763e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 19:20:29 -0600
Subject: [PATCH 298/416] rename variables

---
 stumpy/scrump.py | 34 +++++++++++++++++-----------------
 stumpy/stump.py  | 12 ++++++------
 stumpy/stumpi.py | 24 ++++++++++++------------
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 5d191555a..97b0799e9 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -148,22 +148,22 @@ def _compute_PI(
                 σ_Q[j + g],
             )
             if D_squared < P_squared[thread_idx, i + g, -1]:
-                pos = np.searchsorted(
+                idx = np.searchsorted(
                     P_squared[thread_idx, i + g], D_squared, side="right"
                 )
                 core._shift_insert_at_index(
-                    P_squared[thread_idx, i + g], pos, D_squared
+                    P_squared[thread_idx, i + g], idx, D_squared
                 )
-                core._shift_insert_at_index(I[thread_idx, i + g], pos, j + g)
+                core._shift_insert_at_index(I[thread_idx, i + g], idx, j + g)
 
             if D_squared < P_squared[thread_idx, j + g, -1]:
-                pos = np.searchsorted(
+                idx = np.searchsorted(
                     P_squared[thread_idx, j + g], D_squared, side="right"
                 )
                 core._shift_insert_at_index(
-                    P_squared[thread_idx, j + g], pos, D_squared
+                    P_squared[thread_idx, j + g], idx, D_squared
                 )
-                core._shift_insert_at_index(I[thread_idx, j + g], pos, i + g)
+                core._shift_insert_at_index(I[thread_idx, j + g], idx, i + g)
 
         QT_j = QT_j_prime
         # Update top-k for both subsequences `S[i-g] = T[i-g:i-g+m]` and
@@ -180,22 +180,22 @@ def _compute_PI(
                 σ_Q[j - g],
             )
             if D_squared < P_squared[thread_idx, i - g, -1]:
-                pos = np.searchsorted(
+                idx = np.searchsorted(
                     P_squared[thread_idx, i - g], D_squared, side="right"
                 )
                 core._shift_insert_at_index(
-                    P_squared[thread_idx, i - g], pos, D_squared
+                    P_squared[thread_idx, i - g], idx, D_squared
                 )
-                core._shift_insert_at_index(I[thread_idx, i - g], pos, j - g)
+                core._shift_insert_at_index(I[thread_idx, i - g], idx, j - g)
 
             if D_squared < P_squared[thread_idx, j - g, -1]:
-                pos = np.searchsorted(
+                idx = np.searchsorted(
                     P_squared[thread_idx, j - g], D_squared, side="right"
                 )
                 core._shift_insert_at_index(
-                    P_squared[thread_idx, j - g], pos, D_squared
+                    P_squared[thread_idx, j - g], idx, D_squared
                 )
-                core._shift_insert_at_index(I[thread_idx, j - g], pos, i - g)
+                core._shift_insert_at_index(I[thread_idx, j - g], idx, i - g)
 
         # In the case of a self-join, the calculated distance profile can also be
         # used to refine the top-k for all non-trivial subsequences
@@ -205,17 +205,17 @@ def _compute_PI(
             # can be used to update the top-k for BOTH subsequence `i` and
             # subsequence `j`. We update the latter here.
 
-            idx = np.flatnonzero(
+            indices = np.flatnonzero(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
-            for j in idx:
-                pos = np.searchsorted(
+            for j in indices:
+                idx = np.searchsorted(
                     P_squared[thread_idx, j], squared_distance_profile[j], side="right"
                 )
                 core._shift_insert_at_index(
-                    P_squared[thread_idx, j], pos, squared_distance_profile[j]
+                    P_squared[thread_idx, j], idx, squared_distance_profile[j]
                 )
-                core._shift_insert_at_index(I[thread_idx, j], pos, i)
+                core._shift_insert_at_index(I[thread_idx, j], idx, i)
 
 
 @njit(
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 1144015af..3b8c74bed 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -217,22 +217,22 @@ def _compute_diagonal(
                 # first (i.e. smallest) element in this array. Note that a higher
                 # pearson value corresponds to a lower distance.
                 if pearson > ρ[thread_idx, i, 0]:
-                    pos = np.searchsorted(ρ[thread_idx, i], pearson)
+                    idx = np.searchsorted(ρ[thread_idx, i], pearson)
                     core._shift_insert_at_index(
-                        ρ[thread_idx, i], pos, pearson, shift="left"
+                        ρ[thread_idx, i], idx, pearson, shift="left"
                     )
                     core._shift_insert_at_index(
-                        I[thread_idx, i], pos, i + g, shift="left"
+                        I[thread_idx, i], idx, i + g, shift="left"
                     )
 
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, 0]:
-                        pos = np.searchsorted(ρ[thread_idx, i + g], pearson)
+                        idx = np.searchsorted(ρ[thread_idx, i + g], pearson)
                         core._shift_insert_at_index(
-                            ρ[thread_idx, i + g], pos, pearson, shift="left"
+                            ρ[thread_idx, i + g], idx, pearson, shift="left"
                         )
                         core._shift_insert_at_index(
-                            I[thread_idx, i + g], pos, i, shift="left"
+                            I[thread_idx, i + g], idx, i, shift="left"
                         )
 
                     if i < i + g:
diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 8d30f3319..22bc9b122 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -249,10 +249,10 @@ def _update_egress(self, t):
 
         update_idx = np.argwhere(D < self._P[:, -1]).flatten()
         for i in update_idx:
-            pos = np.searchsorted(self._P[i], D[i], side="right")
-            core._shift_insert_at_index(self._P[i], pos, D[i])
+            idx = np.searchsorted(self._P[i], D[i], side="right")
+            core._shift_insert_at_index(self._P[i], idx, D[i])
             core._shift_insert_at_index(
-                self._I[i], pos, D.shape[0] + self._n_appended - 1
+                self._I[i], idx, D.shape[0] + self._n_appended - 1
             )
             # D.shape[0] is base-1
 
@@ -262,9 +262,9 @@ def _update_egress(self, t):
         self._I[-1] = -1
         for i, d in enumerate(D):
             if d < self._P[-1, -1]:
-                pos = np.searchsorted(self._P[-1], d, side="right")
-                core._shift_insert_at_index(self._P[-1], pos, d)
-                core._shift_insert_at_index(self._I[-1], pos, i + self._n_appended)
+                idx = np.searchsorted(self._P[-1], d, side="right")
+                core._shift_insert_at_index(self._P[-1], idx, d)
+                core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
 
         # All neighbors of the last subsequence are on its left. So, its (top-1)
         # matrix profile value/index and its left matrix profile value/index must
@@ -316,9 +316,9 @@ def _update(self, t):
 
         update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
         for i in update_idx:
-            pos = np.searchsorted(self._P[i], D[i], side="right")
-            core._shift_insert_at_index(self._P[i], pos, D[i])
-            core._shift_insert_at_index(self._I[i], pos, l)
+            idx = np.searchsorted(self._P[i], D[i], side="right")
+            core._shift_insert_at_index(self._P[i], idx, D[i])
+            core._shift_insert_at_index(self._I[i], idx, l)
 
         # Calculating top-k matrix profile and (top-1) left matrix profile (and thier
         # corresponding indices) for new subsequence whose distance profie is `D`
@@ -326,9 +326,9 @@ def _update(self, t):
         I_new = np.full(self._k, -1, dtype=np.int64)
         for i, d in enumerate(D):
             if d < P_new[-1]:  # maximum value in sorted array P_new
-                pos = np.searchsorted(P_new, d, side="right")
-                core._shift_insert_at_index(P_new, pos, d)
-                core._shift_insert_at_index(I_new, pos, i)
+                idx = np.searchsorted(P_new, d, side="right")
+                core._shift_insert_at_index(P_new, idx, d)
+                core._shift_insert_at_index(I_new, idx, i)
 
         left_I_new = I_new[0]
         left_P_new = P_new[0]

From 902d7ab4f9aa6ed4e33720efb2070c6b5bf26194 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 15 Jul 2022 19:26:59 -0600
Subject: [PATCH 299/416] minor correction

---
 tests/test_scrump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 3bd43b423..b4ea27153 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -384,7 +384,7 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             comp_I = approx.I_
 
             naive.replace_inf(ref_P)
-            naive.replace_inf(comp_I)
+            naive.replace_inf(comp_P)
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
@@ -810,7 +810,7 @@ def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages):
                 comp_I = approx.I_
 
                 naive.replace_inf(ref_P)
-                naive.replace_inf(comp_I)
+                naive.replace_inf(comp_P)
 
                 npt.assert_almost_equal(ref_P, comp_P)
                 npt.assert_almost_equal(ref_I, comp_I)

From 3c16d3316d08e6ee6d6b6cf5f2260d872322ed7f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 18 Jul 2022 08:07:05 -0600
Subject: [PATCH 300/416] Fix indexing

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 928dee9d8..2690da75d 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1442,7 +1442,7 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
                 pos = np.searchsorted(P[i + g], d, side="right")
                 P[i + g] = np.insert(P[i + g], pos, d)[:-1]
                 I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
-            if d < P[j + g]:
+            if d < P[j + g, -1]:
                 pos = np.searchsorted(P[j + g], d, side="right")
                 P[j + g] = np.insert(P[j + g], pos, d)[:-1]
                 I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
@@ -1453,7 +1453,7 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
                 pos = np.searchsorted(P[i - g], d, side="right")
                 P[i - g] = np.insert(P[i - g], pos, d)[:-1]
                 I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
-            if d < P[j - g]:
+            if d < P[j - g, -1]:
                 pos = np.searchsorted(P[j - g], d, side="right")
                 P[j - g] = np.insert(P[j - g], pos, d)[:-1]
                 I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]

From 9bb8b16fd0f88d6dd6347d57d06836d42126b5a3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 18 Jul 2022 08:17:18 -0600
Subject: [PATCH 301/416] Add new test function

---
 tests/test_scrump.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index b4ea27153..f5797ad16 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -814,3 +814,23 @@ def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages):
 
                 npt.assert_almost_equal(ref_P, comp_P)
                 npt.assert_almost_equal(ref_I, comp_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_prescrump_self_join_larger_window_m_5_k_5(T_A, T_B):
+    m = 5
+    k = 5
+    zone = int(np.ceil(m / 4))
+
+    if len(T_B) > m:
+        for s in range(1, zone + 1):
+            seed = np.random.randint(100000)
+
+            np.random.seed(seed)
+            ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone)
+
+            np.random.seed(seed)
+            comp_P, comp_I = prescrump(T_B, m, s=s)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)

From 33c611231cfdfcfb15b2c29b2b6b2f2c77c2ed4d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 18 Jul 2022 08:23:40 -0600
Subject: [PATCH 302/416] Modify test function

---
 tests/test_scrump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index f5797ad16..3eb298bf4 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -827,10 +827,10 @@ def test_prescrump_self_join_larger_window_m_5_k_5(T_A, T_B):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone)
+            ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone, k=k)
 
             np.random.seed(seed)
-            comp_P, comp_I = prescrump(T_B, m, s=s)
+            comp_P, comp_I = prescrump(T_B, m, s=s, k=k)
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)

From 30f4bcff39e1a41d985d75d367038464f9b76bd6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 11:50:02 -0600
Subject: [PATCH 303/416] Avoid dumplicate in naive prescrump

---
 tests/naive.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 625e09320..9bc7513bb 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1426,10 +1426,15 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         if exclusion_zone is not None:
             apply_exclusion_zone(distance_profile, i, exclusion_zone, np.inf)
 
-        I[i, 1:] = I[i, :-1]
-        I[i, 0] = np.argmin(distance_profile)
-        P[i, 1:] = P[i, :-1]
-        P[i, 0] = distance_profile[I[i, 0]]
+        idx = np.argmin(distance_profile)
+        if idx not in I[i]:
+            I[i, 1:] = I[i, :-1]
+            I[i, 0] = idx
+            P[i, 1:] = P[i, :-1]
+            P[i, 0] = distance_profile[I[i, 0]]
+
+        # else: the idx, i.e. 1NN of `i`, was already obtained (it maynot be stored
+        # at the first index of array I[i] though!)
 
         if P[i, 0] == np.inf:
             I[i, 0] = -1
@@ -1440,31 +1445,36 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             d = dist_matrix[i + g, j + g]
             if d < P[i + g, -1]:
                 pos = np.searchsorted(P[i + g], d, side="right")
-                P[i + g] = np.insert(P[i + g], pos, d)[:-1]
-                I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
+                if (j + g) not in I[i + g, :pos]:
+                    P[i + g] = np.insert(P[i + g], pos, d)[:-1]
+                    I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
             if exclusion_zone is not None and d < P[j + g, -1]:
                 pos = np.searchsorted(P[j + g], d, side="right")
-                P[j + g] = np.insert(P[j + g], pos, d)[:-1]
-                I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
+                if (i + g) not in I[j + g, :pos]:
+                    P[j + g] = np.insert(P[j + g], pos, d)[:-1]
+                    I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
 
         for g in range(1, min(s, i + 1, j + 1)):
             d = dist_matrix[i - g, j - g]
             if d < P[i - g, -1]:
                 pos = np.searchsorted(P[i - g], d, side="right")
-                P[i - g] = np.insert(P[i - g], pos, d)[:-1]
-                I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
+                if (j - g) not in I[i - g, :pos]:
+                    P[i - g] = np.insert(P[i - g], pos, d)[:-1]
+                    I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
             if exclusion_zone is not None and d < P[j - g, -1]:
                 pos = np.searchsorted(P[j - g], d, side="right")
-                P[j - g] = np.insert(P[j - g], pos, d)[:-1]
-                I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
+                if (i - g) not in I[j - g, :pos]:
+                    P[j - g] = np.insert(P[j - g], pos, d)[:-1]
+                    I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
 
         # In the case of a self-join, the calculated distance profile can also be
         # used to refine the top-k for all non-trivial subsequences
         if exclusion_zone is not None:
             for idx in np.flatnonzero(distance_profile < P[:, -1]):
                 pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
-                P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
-                I[idx] = np.insert(I[idx], pos, i)[:-1]
+                if i not in I[idx, :pos]:
+                    P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
+                    I[idx] = np.insert(I[idx], pos, i)[:-1]
 
     return P, I
 

From 77e56f7335937d162530a26c53898ce166730d97 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 12:09:17 -0600
Subject: [PATCH 304/416] Add parameter assume_unique to handle duplicates

---
 tests/naive.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 9bc7513bb..87127dd13 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1800,16 +1800,29 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
     return total_ndists
 
 
-def merge_topk_PI(PA, PB, IA, IB):
+def merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
+    k = PA.shape[1]
     profile = np.column_stack((PA, PB))
     indices = np.column_stack((IA, IB))
 
-    idx = np.argsort(profile, axis=1)
-    profile[:, :] = np.take_along_axis(profile, idx, axis=1)
-    indices[:, :] = np.take_along_axis(indices, idx, axis=1)
+    IDX = np.argsort(profile, axis=1)
+    if assume_unique:
+        profile[:, :] = np.take_along_axis(profile, IDX, axis=1)
+        indices[:, :] = np.take_along_axis(indices, IDX, axis=1)
 
-    PA[:, :] = profile[:, : PA.shape[1]]
-    IA[:, :] = indices[:, : PA.shape[1]]
+        PA[:, :] = profile[:, :k]
+        IA[:, :] = indices[:, :k]
+    else:
+        # avoid duplicates while merging IA[i] and IB[i]
+        IDX_merged = np.full_like(PA, -1, dtype=np.int64)
+        for i, idx in enumerate(IDX):
+            _, arg_unique = np.unique(indices[i, idx], return_index=True)
+            arg_unique = np.sort(arg_unique)[:k]  # preserving order of their appearence
+            idx = idx[arg_unique]
+            IDX_merged[i, : len(idx)] = idx
+
+        PA[:, :] = np.take_along_axis(profile, IDX_merged, axis=1)
+        IA[:, :] = np.take_along_axis(indices, IDX_merged, axis=1)
 
 
 def merge_topk_ρI(ρA, ρB, IA, IB):

From 7a93a7c957e1844dfa44c661779086f5c2eb4720 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 12:23:33 -0600
Subject: [PATCH 305/416] Add test function to test for duplicates in
 topk_merge

---
 tests/test_core.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 2ce86a4bb..703f7e6f7 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1062,6 +1062,7 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
+    # `assume_unique = True`
     n = 50
     for k in range(1, 6):
         PA = np.random.rand(n * k).reshape(n, k)
@@ -1088,6 +1089,41 @@ def test_merge_topk_PI():
         npt.assert_array_equal(ref_P, comp_P)
         npt.assert_array_equal(ref_I, comp_I)
 
+    # `assume_unique = False`
+    n = 50
+    for k in range(1, 6):
+        PA = np.random.rand(n * k).reshape(n, k)
+        PB = np.random.rand(n * k).reshape(n, k)
+
+        IA = np.arange(n * k).reshape(n, k)
+        IB = IA + n * k
+
+        col_idx_A = np.random.randint(0, k, size=n)
+        col_idx_B = np.random.randint(0, k, size=n)
+        for i in range(n):  # creating random duplicates between A and B
+            PB[i, col_idx_B[i]] = PA[i, col_idx_A[i]] + np.random.rand(1) * 1e-8
+            IB[i, col_idx_B[i]] = IA[i, col_idx_A[i]]
+
+        IDX = np.argsort(PA, axis=1)
+        PA[:, :] = np.take_along_axis(PA, IDX, axis=1)
+        IA[:, :] = np.take_along_axis(IA, IDX, axis=1)
+
+        IDX = np.argsort(PB, axis=1)
+        PB[:, :] = np.take_along_axis(PB, IDX, axis=1)
+        IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
+
+        ref_P = PA.copy()
+        ref_I = IA.copy()
+
+        comp_P = PA.copy()
+        comp_I = IA.copy()
+
+        naive.merge_topk_PI(ref_P, PB, ref_I, IB, assume_unique=False)
+        core._merge_topk_PI(comp_P, PB, comp_I, IB)
+
+        npt.assert_array_equal(ref_P, comp_P)
+        npt.assert_array_equal(ref_I, comp_I)
+
 
 def test_merge_topk_ρI():
     n = 50

From fefcaa9b2163c4793706668b65a2cd183cac815b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 12:38:31 -0600
Subject: [PATCH 306/416] Add parameter assume_unique to performant merge_topk

---
 stumpy/core.py | 51 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 655a4ef86..1dc4f47b3 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2576,7 +2576,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 
 
 @njit
-def _merge_topk_PI(PA, PB, IA, IB):
+def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
     """
     Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place) while
     always prioritizing the values of `PA` over the values of `PB` in case of ties.
@@ -2602,22 +2602,55 @@ def _merge_topk_PI(PA, PB, IA, IB):
     IB : numpy.ndarray
         A (top-k) matrix profile indices corresponding to `PB`
 
+    assume_unique : bool, default True
+        If True (default), each row of IA and its corresponding row in IB have no
+        duplicates. False otherwise.
+
     Returns
     -------
     None
     """
-    tmp_P = np.empty(PA.shape[1], dtype=np.float64)
-    tmp_I = np.empty(PA.shape[1], dtype=np.int64)
-    for i in range(len(PA)):
+    k = PA.shape[1]
+    tmp_P = np.empty(k, dtype=np.float64)
+    tmp_I = np.empty(k, dtype=np.int64)
+    for i in range(PA.shape[0]):
         aj, bj = 0, 0
-        for k in range(len(tmp_P)):
+        idx = 0
+        prev_val = np.inf
+        for _ in range(2 * k):  # 2 * k to traverse both A and B
+            if idx >= k:
+                break
+            if aj >= k:  # PA is already fully traversed.
+                tmp_P[idx:] = PB[i, bj : bj + k - idx]
+                tmp_I[idx:] = IB[i, bj : bj + k - idx]
+                break
+            if bj >= k:  # PB is already fully traversed.
+                tmp_P[idx:] = PA[i, aj : aj + k - idx]
+                tmp_I[idx:] = IA[i, aj : aj + k - idx]
+                break
+
             if PB[i, bj] < PA[i, aj]:
-                tmp_P[k] = PB[i, bj]
-                tmp_I[k] = IB[i, bj]
+                if (
+                    assume_unique
+                    or abs(PB[i, bj] - prev_val) > 1e-6
+                    or IB[i, bj] not in tmp_I[:idx]
+                ):
+                    tmp_P[idx] = PB[i, bj]
+                    tmp_I[idx] = IB[i, bj]
+                    prev_val = tmp_P[idx]
+                    idx += 1
                 bj += 1
+
             else:
-                tmp_P[k] = PA[i, aj]
-                tmp_I[k] = IA[i, aj]
+                if (
+                    assume_unique
+                    or abs(PB[i, bj] - prev_val) > 1e-6
+                    or IB[i, bj] not in tmp_I[:idx]
+                ):
+                    tmp_P[k] = PA[i, aj]
+                    tmp_I[k] = IA[i, aj]
+                    prev_val = tmp_P[idx]
+                    idx += 1
                 aj += 1
 
         PA[i] = tmp_P

From 5e9c5fccf6d60b5aa9123c1c7c3f2e4a02f1b20b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 12:40:10 -0600
Subject: [PATCH 307/416] fix test function

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 703f7e6f7..4b448ce37 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1119,7 +1119,7 @@ def test_merge_topk_PI():
         comp_I = IA.copy()
 
         naive.merge_topk_PI(ref_P, PB, ref_I, IB, assume_unique=False)
-        core._merge_topk_PI(comp_P, PB, comp_I, IB)
+        core._merge_topk_PI(comp_P, PB, comp_I, IB, assume_unique=False)
 
         npt.assert_array_equal(ref_P, comp_P)
         npt.assert_array_equal(ref_I, comp_I)

From 9685e440733580dec6830e39ce85bc34f4f3e76f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:03:56 -0600
Subject: [PATCH 308/416] Fix bug

---
 stumpy/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 1dc4f47b3..eebd6930e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2644,11 +2644,11 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
             else:
                 if (
                     assume_unique
-                    or abs(PB[i, bj] - prev_val) > 1e-6
+                    or abs(PA[i, bj] - prev_val) > 1e-6
                     or IB[i, bj] not in tmp_I[:idx]
                 ):
-                    tmp_P[k] = PA[i, aj]
-                    tmp_I[k] = IA[i, aj]
+                    tmp_P[idx] = PA[i, aj]
+                    tmp_I[idx] = IA[i, aj]
                     prev_val = tmp_P[idx]
                     idx += 1
                 aj += 1

From 9dd452be2d563d877d9ef3aa4d5aec9e54094229 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:09:27 -0600
Subject: [PATCH 309/416] Revise prescrump to avoid duplicates

---
 stumpy/scrump.py | 62 +++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 63737f597..4c4ce4dc7 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -115,10 +115,11 @@ def _compute_PI(
             core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf)
 
         nn_idx = np.argmin(squared_distance_profile)
-        core._shift_insert_at_index(
-            P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
-        )
-        core._shift_insert_at_index(I[thread_idx, i], 0, nn_idx)
+        if nn_idx not in I[thread_idx, i]:
+            core._shift_insert_at_index(
+                P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
+            )
+            core._shift_insert_at_index(I[thread_idx, i], 0, nn_idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
@@ -151,19 +152,21 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i + g], D_squared, side="right"
                 )
-                core._shift_insert_at_index(
-                    P_squared[thread_idx, i + g], idx, D_squared
-                )
-                core._shift_insert_at_index(I[thread_idx, i + g], idx, j + g)
+                if (j + g) not in I[thread_idx, i + g, :idx]:
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, i + g], idx, D_squared
+                    )
+                    core._shift_insert_at_index(I[thread_idx, i + g], idx, j + g)
 
             if excl_zone is not None and D_squared < P_squared[thread_idx, j + g, -1]:
                 idx = np.searchsorted(
                     P_squared[thread_idx, j + g], D_squared, side="right"
                 )
-                core._shift_insert_at_index(
-                    P_squared[thread_idx, j + g], idx, D_squared
-                )
-                core._shift_insert_at_index(I[thread_idx, j + g], idx, i + g)
+                if (i + g) not in I[thread_idx, j + g, :idx]:
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, j + g], idx, D_squared
+                    )
+                    core._shift_insert_at_index(I[thread_idx, j + g], idx, i + g)
 
         QT_j = QT_j_prime
         # Update top-k for both subsequences `S[i-g] = T[i-g:i-g+m]` and
@@ -183,19 +186,21 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i - g], D_squared, side="right"
                 )
-                core._shift_insert_at_index(
-                    P_squared[thread_idx, i - g], idx, D_squared
-                )
-                core._shift_insert_at_index(I[thread_idx, i - g], idx, j - g)
+                if (j - g) not in I[thread_idx, i - g, :idx]:
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, i - g], idx, D_squared
+                    )
+                    core._shift_insert_at_index(I[thread_idx, i - g], idx, j - g)
 
             if excl_zone is not None and D_squared < P_squared[thread_idx, j - g, -1]:
                 idx = np.searchsorted(
                     P_squared[thread_idx, j - g], D_squared, side="right"
                 )
-                core._shift_insert_at_index(
-                    P_squared[thread_idx, j - g], idx, D_squared
-                )
-                core._shift_insert_at_index(I[thread_idx, j - g], idx, i - g)
+                if (i - g) not in I[thread_idx, j - g, :idx]:
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, j - g], idx, D_squared
+                    )
+                    core._shift_insert_at_index(I[thread_idx, j - g], idx, i - g)
 
         # In the case of a self-join, the calculated distance profile can also be
         # used to refine the top-k for all non-trivial subsequences
@@ -212,10 +217,11 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j], squared_distance_profile[j], side="right"
                 )
-                core._shift_insert_at_index(
-                    P_squared[thread_idx, j], idx, squared_distance_profile[j]
-                )
-                core._shift_insert_at_index(I[thread_idx, j], idx, i)
+                if i not in I[thread_idx, j, :idx]:
+                    core._shift_insert_at_index(
+                        P_squared[thread_idx, j], idx, squared_distance_profile[j]
+                    )
+                    core._shift_insert_at_index(I[thread_idx, j], idx, i)
 
 
 @njit(
@@ -337,7 +343,13 @@ def _prescrump(
         )
 
     for thread_idx in range(1, n_threads):
-        core._merge_topk_PI(P_squared[0], P_squared[thread_idx], I[0], I[thread_idx])
+        core._merge_topk_PI(
+            P_squared[0],
+            P_squared[thread_idx],
+            I[0],
+            I[thread_idx],
+            assume_unique=False,
+        )
 
     return np.sqrt(P_squared[0]), I[0]
 

From 3d68ae70c1c0606ea8d2c7102261c57f62a072e5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:12:08 -0600
Subject: [PATCH 310/416] Avoid duplocates in scrump

---
 stumpy/scrump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 4c4ce4dc7..dee399334 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -700,7 +700,7 @@ def __init__(
             else:
                 P, I = prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
 
-            core._merge_topk_PI(self._P, P, self._I, I)
+            core._merge_topk_PI(self._P, P, self._I, I, assume_unique=False)
 
         if self._ignore_trivial:
             self._diags = np.random.permutation(
@@ -758,7 +758,7 @@ def update(self):
             )
 
             # Update (top-k) matrix profile and indices
-            core._merge_topk_PI(self._P, P, self._I, I)
+            core._merge_topk_PI(self._P, P, self._I, I, assume_unique=False)
 
             # update left matrix profile and indices
             mask = PL < self._PL

From 4c171198e74858ac4ef9f32b317341cc465b562b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:14:23 -0600
Subject: [PATCH 311/416] Revise test function to consider new parameter

---
 tests/test_scrump.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 3eb298bf4..7b0ca6393 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -373,7 +373,7 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             ref_P_aux, ref_I_aux, _, _ = naive.scrump(
                 T_B, m, T_B, percentage, zone, True, s
             )
-            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
+            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux, assume_unique=False)
 
             np.random.seed(seed)
             approx = scrump(
@@ -406,7 +406,7 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
             ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump(
                 T_A, m, T_B, percentage, None, False, None
             )
-            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
+            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux, assume_unique=False)
             ref_left_I = ref_left_I_aux
             ref_right_I = ref_right_I_aux
 
@@ -793,7 +793,9 @@ def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages):
                 ref_P_aux, ref_I_aux, _, _ = naive.scrump(
                     T_B, m, T_B, percentage, zone, True, s, k=k
                 )
-                naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
+                naive.merge_topk_PI(
+                    ref_P, ref_P_aux, ref_I, ref_I_aux, assume_unique=False
+                )
 
                 np.random.seed(seed)
                 approx = scrump(

From 2c662a93e47d66caf9dac4eb6b0fe0ed16f09460 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:27:22 -0600
Subject: [PATCH 312/416] Fix bug

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index eebd6930e..20f8d2853 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2644,8 +2644,8 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
             else:
                 if (
                     assume_unique
-                    or abs(PA[i, bj] - prev_val) > 1e-6
-                    or IB[i, bj] not in tmp_I[:idx]
+                    or abs(PA[i, aj] - prev_val) > 1e-6
+                    or IA[i, aj] not in tmp_I[:idx]
                 ):
                     tmp_P[idx] = PA[i, aj]
                     tmp_I[idx] = IA[i, aj]

From 3a0f4dacf750474cdee8f98fb95e265d356b27e2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:30:27 -0600
Subject: [PATCH 313/416] Revise naive scrump to avoid duplicates

---
 tests/naive.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 87127dd13..9a6070ef3 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1518,13 +1518,15 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
                     d = dist_matrix[i, j]
                     if d < P[i, -1]:  # update TopK of P[i]
                         idx = searchsorted_right(P[i], d)
-                        P[i] = np.insert(P[i], idx, d)[:-1]
-                        I[i] = np.insert(I[i], idx, i + g)[:-1]
+                        if (i + g) not in I[i, :idx]:
+                            P[i] = np.insert(P[i], idx, d)[:-1]
+                            I[i] = np.insert(I[i], idx, i + g)[:-1]
 
                     if exclusion_zone is not None and d < P[i + g, -1]:
                         idx = searchsorted_right(P[i + g], d)
-                        P[i + g] = np.insert(P[i + g], idx, d)[:-1]
-                        I[i + g] = np.insert(I[i + g], idx, i)[:-1]
+                        if i not in I[i + g, :idx]:
+                            P[i + g] = np.insert(P[i + g], idx, d)[:-1]
+                            I[i + g] = np.insert(I[i + g], idx, i)[:-1]
 
                     # left matrix profile and left matrix profile indices
                     if exclusion_zone is not None and i < i + g and d < PL[i + g]:

From d8728c98702efd9b67d9eee4edf6ab746dd22180 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:41:18 -0600
Subject: [PATCH 314/416] Add comment

---
 stumpy/scrump.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index dee399334..387787f16 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -116,6 +116,8 @@ def _compute_PI(
 
         nn_idx = np.argmin(squared_distance_profile)
         if nn_idx not in I[thread_idx, i]:
+            # It is more than likely that the top-k values for the `i`-th subsequence
+            # will be already populated. So, we must shift-insert here 
             core._shift_insert_at_index(
                 P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
             )

From 561b4281f73ab35102ff9bd2b64033e305113651 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:42:06 -0600
Subject: [PATCH 315/416] minor optimization

---
 stumpy/core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 20f8d2853..30ffabd69 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2633,7 +2633,8 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
                 if (
                     assume_unique
                     or abs(PB[i, bj] - prev_val) > 1e-6
-                    or IB[i, bj] not in tmp_I[:idx]
+                    or IB[i, bj] not in tmp_I[:idx][::-1] # traverse in reverse to
+                    # find duplicate in shorter time
                 ):
                     tmp_P[idx] = PB[i, bj]
                     tmp_I[idx] = IB[i, bj]
@@ -2645,7 +2646,8 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
                 if (
                     assume_unique
                     or abs(PA[i, aj] - prev_val) > 1e-6
-                    or IA[i, aj] not in tmp_I[:idx]
+                    or IA[i, aj] not in tmp_I[:idx][::-1] # traverse in reverse to
+                    # find duplicate in shorter time
                 ):
                     tmp_P[idx] = PA[i, aj]
                     tmp_I[idx] = IA[i, aj]

From 44b85a826b1b4854e099e38da0a4893776ad3d73 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:43:30 -0600
Subject: [PATCH 316/416] Correct style

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 387787f16..a224f2a1c 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -117,7 +117,7 @@ def _compute_PI(
         nn_idx = np.argmin(squared_distance_profile)
         if nn_idx not in I[thread_idx, i]:
             # It is more than likely that the top-k values for the `i`-th subsequence
-            # will be already populated. So, we must shift-insert here 
+            # will be already populated. So, we must shift-insert here
             core._shift_insert_at_index(
                 P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
             )

From dbdc7c9fc39670d65e079adf37a5e02a9a9e6bea Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:48:09 -0600
Subject: [PATCH 317/416] Correct style

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 30ffabd69..bbb4524fd 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2633,7 +2633,7 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
                 if (
                     assume_unique
                     or abs(PB[i, bj] - prev_val) > 1e-6
-                    or IB[i, bj] not in tmp_I[:idx][::-1] # traverse in reverse to
+                    or IB[i, bj] not in tmp_I[:idx][::-1]  # traverse in reverse to
                     # find duplicate in shorter time
                 ):
                     tmp_P[idx] = PB[i, bj]
@@ -2646,7 +2646,7 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
                 if (
                     assume_unique
                     or abs(PA[i, aj] - prev_val) > 1e-6
-                    or IA[i, aj] not in tmp_I[:idx][::-1] # traverse in reverse to
+                    or IA[i, aj] not in tmp_I[:idx][::-1]  # traverse in reverse to
                     # find duplicate in shorter time
                 ):
                     tmp_P[idx] = PA[i, aj]

From 19129ab7c2a0add61d8cb5a8e905b3baa528fecf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 13:56:33 -0600
Subject: [PATCH 318/416] increase threshold

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index bbb4524fd..e1db51fd3 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2632,7 +2632,7 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
             if PB[i, bj] < PA[i, aj]:
                 if (
                     assume_unique
-                    or abs(PB[i, bj] - prev_val) > 1e-6
+                    or abs(PB[i, bj] - prev_val) > 1e-3
                     or IB[i, bj] not in tmp_I[:idx][::-1]  # traverse in reverse to
                     # find duplicate in shorter time
                 ):
@@ -2645,7 +2645,7 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
             else:
                 if (
                     assume_unique
-                    or abs(PA[i, aj] - prev_val) > 1e-6
+                    or abs(PA[i, aj] - prev_val) > 1e-3
                     or IA[i, aj] not in tmp_I[:idx][::-1]  # traverse in reverse to
                     # find duplicate in shorter time
                 ):

From 5d96bbdd5a194efb3ed952ece98f341ce9650c7e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 16:35:08 -0600
Subject: [PATCH 319/416] Specifiy kind in sort

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 9a6070ef3..466ebadf6 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1807,7 +1807,7 @@ def merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
     profile = np.column_stack((PA, PB))
     indices = np.column_stack((IA, IB))
 
-    IDX = np.argsort(profile, axis=1)
+    IDX = np.argsort(profile, axis=1, kind="mergesort")
     if assume_unique:
         profile[:, :] = np.take_along_axis(profile, IDX, axis=1)
         indices[:, :] = np.take_along_axis(indices, IDX, axis=1)

From d3a9b3175e9224d7802610040da8ab9694265723 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 19 Jul 2022 17:32:44 -0600
Subject: [PATCH 320/416] minor change

---
 tests/test_scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 7b0ca6393..5add152cc 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -677,8 +677,8 @@ def test_prescrump_self_join_KNN(T_A, T_B):
             np.random.seed(seed)
             comp_P, comp_I = prescrump(T_B, m, s=s, k=k)
 
-            npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_P, comp_P)
 
 
 @pytest.mark.parametrize("T_A, T_B", test_data)

From 970efc7a0b881adf4b076c9b30e412424c9f0c42 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 12:40:30 -0600
Subject: [PATCH 321/416] specify kind in sort

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 466ebadf6..d602c9bf5 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1850,7 +1850,7 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     profile = np.column_stack((ρB, ρA))
     indices = np.column_stack((IB, IA))
 
-    idx = np.argsort(profile, axis=1)
+    idx = np.argsort(profile, axis=1, kind="mergesort")
     profile[:, :] = np.take_along_axis(profile, idx, axis=1)
     indices[:, :] = np.take_along_axis(indices, idx, axis=1)
 

From cd7fe1a4d792d8028db3a2192e57c93ebef6bc46 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 12:44:22 -0600
Subject: [PATCH 322/416] minor changes

---
 tests/naive.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index d602c9bf5..c23e36a3e 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1426,10 +1426,10 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         if exclusion_zone is not None:
             apply_exclusion_zone(distance_profile, i, exclusion_zone, np.inf)
 
-        idx = np.argmin(distance_profile)
-        if idx not in I[i]:
+        nn_idx = np.argmin(distance_profile)
+        if nn_idx not in I[i]:
             I[i, 1:] = I[i, :-1]
-            I[i, 0] = idx
+            I[i, 0] = nn_idx
             P[i, 1:] = P[i, :-1]
             P[i, 0] = distance_profile[I[i, 0]]
 
@@ -1440,17 +1440,17 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             I[i, 0] = -1
             continue
 
-        j = I[i, 0]  # index of 1st NN
+        j = nn_idx
         for g in range(1, min(s, l - max(i, j))):
             d = dist_matrix[i + g, j + g]
             if d < P[i + g, -1]:
                 pos = np.searchsorted(P[i + g], d, side="right")
-                if (j + g) not in I[i + g, :pos]:
+                if (j + g) not in I[i + g, :]:
                     P[i + g] = np.insert(P[i + g], pos, d)[:-1]
                     I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
             if exclusion_zone is not None and d < P[j + g, -1]:
                 pos = np.searchsorted(P[j + g], d, side="right")
-                if (i + g) not in I[j + g, :pos]:
+                if (i + g) not in I[j + g, :]:
                     P[j + g] = np.insert(P[j + g], pos, d)[:-1]
                     I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
 
@@ -1458,12 +1458,12 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             d = dist_matrix[i - g, j - g]
             if d < P[i - g, -1]:
                 pos = np.searchsorted(P[i - g], d, side="right")
-                if (j - g) not in I[i - g, :pos]:
+                if (j - g) not in I[i - g, :]:
                     P[i - g] = np.insert(P[i - g], pos, d)[:-1]
                     I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
             if exclusion_zone is not None and d < P[j - g, -1]:
                 pos = np.searchsorted(P[j - g], d, side="right")
-                if (i - g) not in I[j - g, :pos]:
+                if (i - g) not in I[j - g, :]:
                     P[j - g] = np.insert(P[j - g], pos, d)[:-1]
                     I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
 
@@ -1472,7 +1472,7 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         if exclusion_zone is not None:
             for idx in np.flatnonzero(distance_profile < P[:, -1]):
                 pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
-                if i not in I[idx, :pos]:
+                if i not in I[idx, :]:
                     P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
                     I[idx] = np.insert(I[idx], pos, i)[:-1]
 
@@ -1518,13 +1518,13 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
                     d = dist_matrix[i, j]
                     if d < P[i, -1]:  # update TopK of P[i]
                         idx = searchsorted_right(P[i], d)
-                        if (i + g) not in I[i, :idx]:
+                        if (i + g) not in I[i, :]:
                             P[i] = np.insert(P[i], idx, d)[:-1]
                             I[i] = np.insert(I[i], idx, i + g)[:-1]
 
                     if exclusion_zone is not None and d < P[i + g, -1]:
                         idx = searchsorted_right(P[i + g], d)
-                        if i not in I[i + g, :idx]:
+                        if i not in I[i + g, :]:
                             P[i + g] = np.insert(P[i + g], idx, d)[:-1]
                             I[i + g] = np.insert(I[i + g], idx, i)[:-1]
 

From 6ca36d01abe376098f92e4b049ca7b8d56e7c198 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 12:59:04 -0600
Subject: [PATCH 323/416] De-otpimize if condition

Due to numerical erorrs, we need to avoid partial traversal of array
---
 stumpy/scrump.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index a224f2a1c..a94d46280 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -127,7 +127,7 @@ def _compute_PI(
             I[thread_idx, i, 0] = -1
             continue
 
-        j = I[thread_idx, i, 0]
+        j = nn_idx
         # Given the squared distance, work backwards and compute QT
         QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
             m * M_T[j] * μ_Q[i]
@@ -154,7 +154,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i + g], D_squared, side="right"
                 )
-                if (j + g) not in I[thread_idx, i + g, :idx]:
+                if (j + g) not in I[thread_idx, i + g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i + g], idx, D_squared
                     )
@@ -164,7 +164,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j + g], D_squared, side="right"
                 )
-                if (i + g) not in I[thread_idx, j + g, :idx]:
+                if (i + g) not in I[thread_idx, j + g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j + g], idx, D_squared
                     )
@@ -188,7 +188,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i - g], D_squared, side="right"
                 )
-                if (j - g) not in I[thread_idx, i - g, :idx]:
+                if (j - g) not in I[thread_idx, i - g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i - g], idx, D_squared
                     )
@@ -198,7 +198,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j - g], D_squared, side="right"
                 )
-                if (i - g) not in I[thread_idx, j - g, :idx]:
+                if (i - g) not in I[thread_idx, j - g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j - g], idx, D_squared
                     )
@@ -219,7 +219,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j], squared_distance_profile[j], side="right"
                 )
-                if i not in I[thread_idx, j, :idx]:
+                if i not in I[thread_idx, j]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j], idx, squared_distance_profile[j]
                     )

From 5d930b236ac2331e3b7efab65e14a010572c134f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 13:20:45 -0600
Subject: [PATCH 324/416] Update scrump

---
 stumpy/scrump.py | 80 ++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index a94d46280..484a1867b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -19,10 +19,10 @@ def _compute_PI(
     T_A,
     T_B,
     m,
-    M_T,
-    Σ_T,
     μ_Q,
     σ_Q,
+    M_T,
+    Σ_T,
     indices,
     start,
     stop,
@@ -49,18 +49,17 @@ def _compute_PI(
     m : int
         Window size
 
-    M_T : numpy.ndarray
-        Sliding window mean for T_A
-
-    Σ_T : numpy.ndarray
-        Sliding window standard deviation for T_A
-
     μ_Q : numpy.ndarray
-        Mean of the query sequence, `Q`, relative to the current sliding window in `T_B`
+        Sliding window mean for `T_A`
 
     σ_Q : numpy.ndarray
-        Standard deviation of the query sequence, `Q`, relative to the current
-        sliding window in `T_B`
+        Sliding window standard deviation for `T_A`
+
+    M_T : numpy.ndarray
+        Sliding window mean for `T_B`
+
+    Σ_T : numpy.ndarray
+        Sliding window standard deviation for `T_B`
 
     indices : numpy.ndarray
         The subsequence indices to compute `prescrump` for
@@ -103,9 +102,10 @@ def _compute_PI(
 
     See Algorithm 2
     """
-    l = T_B.shape[0] - m + 1
-    squared_distance_profile = np.empty(l)
-    QT = np.empty(l, dtype=np.float64)
+    l = T_A.shape[0] - m + 1  # length of matrix profile
+    w = T_B.shape[0] - m + 1  # length of distance profile
+    squared_distance_profile = np.empty(w)
+    QT = np.empty(w, dtype=np.float64)
     for i in indices[start:stop]:
         Q = T_A[i : i + m]
         QT[:] = core._sliding_dot_product(Q, T_B)
@@ -136,19 +136,19 @@ def _compute_PI(
         # Update top-k for both subsequences `S[i+g] = T[i+g:i+g+m]`` and
         # `S[j+g] = T[j+g:j+g+m]` (i.e., the right neighbors of `T[i : i+m]` and
         # `T[j:j+m]`) by using the distance between `S[i+g]` and `S[j+g]`
-        for g in range(1, min(s, l - max(i, j))):
+        for g in range(1, min(s, l - l - i, w - j)):
             QT_j = (
                 QT_j
-                - T_B[i + g - 1] * T_A[j + g - 1]
-                + T_B[i + g + m - 1] * T_A[j + g + m - 1]
+                - T_B[j + g - 1] * T_A[i + g - 1]
+                + T_B[j + g + m - 1] * T_A[i + g + m - 1]
             )
             D_squared = core._calculate_squared_distance(
                 m,
                 QT_j,
-                M_T[i + g],
-                Σ_T[i + g],
-                μ_Q[j + g],
-                σ_Q[j + g],
+                M_T[j + g],
+                Σ_T[j + g],
+                μ_Q[i + g],
+                σ_Q[i + g],
             )
             if D_squared < P_squared[thread_idx, i + g, -1]:
                 idx = np.searchsorted(
@@ -175,14 +175,14 @@ def _compute_PI(
         # `S[j-g] = T[j-g:j-g+m]` (i.e., the left neighbors of `T[i : i+m]` and
         # `T[j:j+m]`) by using the distance between `S[i-g]` and `S[j-g]`
         for g in range(1, min(s, i + 1, j + 1)):
-            QT_j = QT_j - T_B[i - g + m] * T_A[j - g + m] + T_B[i - g] * T_A[j - g]
+            QT_j = QT_j - T_B[j - g + m] * T_A[i - g + m] + T_B[j - g] * T_A[i - g]
             D_squared = core._calculate_squared_distance(
                 m,
                 QT_j,
-                M_T[i - g],
-                Σ_T[i - g],
-                μ_Q[j - g],
-                σ_Q[j - g],
+                M_T[j - g],
+                Σ_T[j - g],
+                μ_Q[i - g],
+                σ_Q[i - g],
             )
             if D_squared < P_squared[thread_idx, i - g, -1]:
                 idx = np.searchsorted(
@@ -236,10 +236,10 @@ def _prescrump(
     T_A,
     T_B,
     m,
-    M_T,
-    Σ_T,
     μ_Q,
     σ_Q,
+    M_T,
+    Σ_T,
     indices,
     s,
     excl_zone=None,
@@ -260,18 +260,18 @@ def _prescrump(
     m : int
         Window size
 
+    μ_Q : numpy.ndarray
+        Sliding window mean for `T_A`
+
+    σ_Q : numpy.ndarray
+        Sliding window standard deviation for `T_A`
+
     M_T : numpy.ndarray
-        Sliding window mean for T_A
+        Sliding window mean for `T_B`
 
     Σ_T : numpy.ndarray
-        Sliding window standard deviation for T_A
+        Sliding window standard deviation for `T_B`
 
-    μ_Q : numpy.ndarray
-        Mean of the query sequence, `Q`, relative to the current sliding window in `T_B`
-
-    σ_Q : numpy.ndarray
-        Standard deviation of the query sequence, `Q`, relative to the current
-        sliding window in `T_B`
 
     indices : numpy.ndarray
         The subsequence indices to compute `prescrump` for
@@ -329,10 +329,10 @@ def _prescrump(
             T_A,
             T_B,
             m,
-            M_T,
-            Σ_T,
             μ_Q,
             σ_Q,
+            M_T,
+            Σ_T,
             indices,
             idx_ranges[thread_idx, 0],
             idx_ranges[thread_idx, 1],
@@ -433,10 +433,10 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
         T_A,
         T_B,
         m,
-        M_T,
-        Σ_T,
         μ_Q,
         σ_Q,
+        M_T,
+        Σ_T,
         indices,
         s,
         excl_zone,

From 4b5876512142fc0e921d295e6ee58ea683b1f548 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 13:32:43 -0600
Subject: [PATCH 325/416] minor changes

---
 stumpy/scrump.py     | 2 +-
 tests/test_scrump.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 484a1867b..ea2aca927 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -136,7 +136,7 @@ def _compute_PI(
         # Update top-k for both subsequences `S[i+g] = T[i+g:i+g+m]`` and
         # `S[j+g] = T[j+g:j+g+m]` (i.e., the right neighbors of `T[i : i+m]` and
         # `T[j:j+m]`) by using the distance between `S[i+g]` and `S[j+g]`
-        for g in range(1, min(s, l - l - i, w - j)):
+        for g in range(1, min(s, l - i, w - j)):
             QT_j = (
                 QT_j
                 - T_B[j + g - 1] * T_A[i + g - 1]
diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 3a635a07b..6f9ee719c 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -662,6 +662,7 @@ def test_scrump_nan_zero_mean_self_join(percentages):
         npt.assert_almost_equal(ref_left_I, comp_left_I)
         npt.assert_almost_equal(ref_right_I, comp_right_I)
 
+
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_prescrump_A_B_join_larger_window(T_A, T_B):
     m = 5
@@ -679,6 +680,7 @@ def test_prescrump_A_B_join_larger_window(T_A, T_B):
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
 
+
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_prescrump_self_join_KNN(T_A, T_B):
     m = 3

From aaa8ff7bbe07bf4205e1d04476e11c95d01d038b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 13:39:55 -0600
Subject: [PATCH 326/416] add new test function

---
 tests/test_scrump.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 6f9ee719c..8b19b40a9 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -854,3 +854,22 @@ def test_prescrump_self_join_larger_window_m_5_k_5(T_A, T_B):
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B):
+    m = 5
+    k = 5
+    zone = int(np.ceil(m / 4))
+    if len(T_A) > m and len(T_B) > m:
+        for s in range(1, zone + 1):
+            seed = np.random.randint(100000)
+
+            np.random.seed(seed)
+            ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s, k=k)
+
+            np.random.seed(seed)
+            comp_P, comp_I = prescrump(T_A, m, T_B, s=s, k=k)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)

From 6c8eddcd2d8dfed25abca15761303018d059640c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 20 Jul 2022 14:52:36 -0600
Subject: [PATCH 327/416] optimize if condition

---
 stumpy/scrump.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index ea2aca927..d5fe898cc 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -154,7 +154,13 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i + g], D_squared, side="right"
                 )
-                if (j + g) not in I[thread_idx, i + g]:
+                # Due to numerical error, it is possible that the element that is
+                # about to insert at idx is identical to an element of array located
+                # at idx, idx + 1, .... Hence, we should traverse full array.
+                # This is optimized in the if conditon.
+                if ((j + g) not in I[thread_idx, i + g, :idx][::-1]) and (
+                    (j + g) not in I[thread_idx, i + g, idx:]
+                ):
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i + g], idx, D_squared
                     )
@@ -164,7 +170,9 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j + g], D_squared, side="right"
                 )
-                if (i + g) not in I[thread_idx, j + g]:
+                if ((i + g) not in I[thread_idx, j + g, :idx][::-1]) and (
+                    (i + g) not in I[thread_idx, j + g, idx:]
+                ):
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j + g], idx, D_squared
                     )
@@ -188,7 +196,9 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i - g], D_squared, side="right"
                 )
-                if (j - g) not in I[thread_idx, i - g]:
+                if ((j - g) not in I[thread_idx, i - g, :idx][::-1]) and (
+                    (j - g) not in I[thread_idx, i - g, idx:]
+                ):
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i - g], idx, D_squared
                     )
@@ -198,7 +208,9 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j - g], D_squared, side="right"
                 )
-                if (i - g) not in I[thread_idx, j - g]:
+                if ((i - g) not in I[thread_idx, j - g, :idx][::-1]) and (
+                    (i - g) not in I[thread_idx, j - g, idx:]
+                ):
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j - g], idx, D_squared
                     )
@@ -219,7 +231,9 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j], squared_distance_profile[j], side="right"
                 )
-                if i not in I[thread_idx, j]:
+                if (i not in I[thread_idx, j, :idx][::-1]) and (
+                    i not in I[thread_idx, j, idx:]
+                ):
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j], idx, squared_distance_profile[j]
                     )

From 5bb6879a7e464b9cffb76952c893730c97d243d6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 16:54:09 -0600
Subject: [PATCH 328/416] Give priority to PA in case of ties between IA and IB

---
 tests/naive.py | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index dc9e01b8a..8dd1ef6f9 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1802,29 +1802,20 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
     return total_ndists
 
 
-def merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
+def merge_topk_PI(PA, PB, IA, IB):
     k = PA.shape[1]
+    for i in range(PA.shape[0]):
+        _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i])
+        PB[i, overlap_idx_B] = np.inf
+
     profile = np.column_stack((PA, PB))
     indices = np.column_stack((IA, IB))
-
     IDX = np.argsort(profile, axis=1, kind="mergesort")
-    if assume_unique:
-        profile[:, :] = np.take_along_axis(profile, IDX, axis=1)
-        indices[:, :] = np.take_along_axis(indices, IDX, axis=1)
+    profile[:, :] = np.take_along_axis(profile, IDX, axis=1)
+    indices[:, :] = np.take_along_axis(indices, IDX, axis=1)
 
-        PA[:, :] = profile[:, :k]
-        IA[:, :] = indices[:, :k]
-    else:
-        # avoid duplicates while merging IA[i] and IB[i]
-        IDX_merged = np.full_like(PA, -1, dtype=np.int64)
-        for i, idx in enumerate(IDX):
-            _, arg_unique = np.unique(indices[i, idx], return_index=True)
-            arg_unique = np.sort(arg_unique)[:k]  # preserving order of their appearence
-            idx = idx[arg_unique]
-            IDX_merged[i, : len(idx)] = idx
-
-        PA[:, :] = np.take_along_axis(profile, IDX_merged, axis=1)
-        IA[:, :] = np.take_along_axis(indices, IDX_merged, axis=1)
+    PA[:, :] = profile[:, :k]
+    IA[:, :] = indices[:, :k]
 
 
 def merge_topk_ρI(ρA, ρB, IA, IB):

From fc10e8acb94a4c34b1d7c4327bcc8bf8be9afb2d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 16:56:54 -0600
Subject: [PATCH 329/416] Remove trailing colon

---
 tests/naive.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 8dd1ef6f9..64bad3caa 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1445,12 +1445,12 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             d = dist_matrix[i + g, j + g]
             if d < P[i + g, -1]:
                 pos = np.searchsorted(P[i + g], d, side="right")
-                if (j + g) not in I[i + g, :]:
+                if (j + g) not in I[i + g]:
                     P[i + g] = np.insert(P[i + g], pos, d)[:-1]
                     I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
             if exclusion_zone is not None and d < P[j + g, -1]:
                 pos = np.searchsorted(P[j + g], d, side="right")
-                if (i + g) not in I[j + g, :]:
+                if (i + g) not in I[j + g]:
                     P[j + g] = np.insert(P[j + g], pos, d)[:-1]
                     I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
 
@@ -1458,12 +1458,12 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             d = dist_matrix[i - g, j - g]
             if d < P[i - g, -1]:
                 pos = np.searchsorted(P[i - g], d, side="right")
-                if (j - g) not in I[i - g, :]:
+                if (j - g) not in I[i - g]:
                     P[i - g] = np.insert(P[i - g], pos, d)[:-1]
                     I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
             if exclusion_zone is not None and d < P[j - g, -1]:
                 pos = np.searchsorted(P[j - g], d, side="right")
-                if (i - g) not in I[j - g, :]:
+                if (i - g) not in I[j - g]:
                     P[j - g] = np.insert(P[j - g], pos, d)[:-1]
                     I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
 
@@ -1472,7 +1472,7 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         if exclusion_zone is not None:
             for idx in np.flatnonzero(distance_profile < P[:, -1]):
                 pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
-                if i not in I[idx, :]:
+                if i not in I[idx]:
                     P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
                     I[idx] = np.insert(I[idx], pos, i)[:-1]
 
@@ -1518,13 +1518,13 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
                     d = dist_matrix[i, j]
                     if d < P[i, -1]:  # update TopK of P[i]
                         idx = searchsorted_right(P[i], d)
-                        if (i + g) not in I[i, :]:
+                        if (i + g) not in I[i]:
                             P[i] = np.insert(P[i], idx, d)[:-1]
                             I[i] = np.insert(I[i], idx, i + g)[:-1]
 
                     if exclusion_zone is not None and d < P[i + g, -1]:
                         idx = searchsorted_right(P[i + g], d)
-                        if i not in I[i + g, :]:
+                        if i not in I[i + g]:
                             P[i + g] = np.insert(P[i + g], idx, d)[:-1]
                             I[i + g] = np.insert(I[i + g], idx, i)[:-1]
 

From ef1309bf6c698d791d0119cea7f47253ff2f61de Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:01:19 -0600
Subject: [PATCH 330/416] update test function

---
 tests/test_core.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 4b448ce37..0b795235e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1083,8 +1083,8 @@ def test_merge_topk_PI():
         comp_P = PA.copy()
         comp_I = IA.copy()
 
-        naive.merge_topk_PI(ref_P, PB, ref_I, IB)
-        core._merge_topk_PI(comp_P, PB, comp_I, IB)
+        naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy())
+        core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy())
 
         npt.assert_array_equal(ref_P, comp_P)
         npt.assert_array_equal(ref_I, comp_I)
@@ -1098,12 +1098,14 @@ def test_merge_topk_PI():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
-        col_idx_A = np.random.randint(0, k, size=n)
-        col_idx_B = np.random.randint(0, k, size=n)
-        for i in range(n):  # creating random duplicates between A and B
-            PB[i, col_idx_B[i]] = PA[i, col_idx_A[i]] + np.random.rand(1) * 1e-8
-            IB[i, col_idx_B[i]] = IA[i, col_idx_A[i]]
+        cols_idx_A = np.random.randint(0, k, size=n)
+        for i in range(n):
+            # create overlaps
+            IDX = np.random.choice(np.arange(k), cols_idx_A[i], replace=False)
+            PB[i, IDX] = PA[i, IDX]
+            IB[i, IDX]] = IA[i, IDX]
 
+        # sort each row of PA/PB (and update  IA/IB accordingly)
         IDX = np.argsort(PA, axis=1)
         PA[:, :] = np.take_along_axis(PA, IDX, axis=1)
         IA[:, :] = np.take_along_axis(IA, IDX, axis=1)
@@ -1118,8 +1120,8 @@ def test_merge_topk_PI():
         comp_P = PA.copy()
         comp_I = IA.copy()
 
-        naive.merge_topk_PI(ref_P, PB, ref_I, IB, assume_unique=False)
-        core._merge_topk_PI(comp_P, PB, comp_I, IB, assume_unique=False)
+        naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy())
+        core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy())
 
         npt.assert_array_equal(ref_P, comp_P)
         npt.assert_array_equal(ref_I, comp_I)

From c2fe4d2bfb469d550f6b3d2f1b98e076ddc73bbc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:04:47 -0600
Subject: [PATCH 331/416] revise function to avoid adding new parameter

---
 stumpy/core.py | 39 +++++++--------------------------------
 1 file changed, 7 insertions(+), 32 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e1db51fd3..f3eff3db6 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2576,7 +2576,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 
 
 @njit
-def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
+def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place) while
     always prioritizing the values of `PA` over the values of `PB` in case of ties.
@@ -2602,10 +2602,6 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
     IB : numpy.ndarray
         A (top-k) matrix profile indices corresponding to `PB`
 
-    assume_unique : bool, default True
-        If True (default), each row of IA and its corresponding row in IB have no
-        duplicates. False otherwise.
-
     Returns
     -------
     None
@@ -2614,45 +2610,24 @@ def _merge_topk_PI(PA, PB, IA, IB, assume_unique=True):
     tmp_P = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     for i in range(PA.shape[0]):
+        overlap = np.intersect1d(IA[i], IB[i])
         aj, bj = 0, 0
         idx = 0
-        prev_val = np.inf
         for _ in range(2 * k):  # 2 * k to traverse both A and B
             if idx >= k:
                 break
-            if aj >= k:  # PA is already fully traversed.
-                tmp_P[idx:] = PB[i, bj : bj + k - idx]
-                tmp_I[idx:] = IB[i, bj : bj + k - idx]
-                break
-            if bj >= k:  # PB is already fully traversed.
-                tmp_P[idx:] = PA[i, aj : aj + k - idx]
-                tmp_I[idx:] = IA[i, aj : aj + k - idx]
-                break
 
-            if PB[i, bj] < PA[i, aj]:
-                if (
-                    assume_unique
-                    or abs(PB[i, bj] - prev_val) > 1e-3
-                    or IB[i, bj] not in tmp_I[:idx][::-1]  # traverse in reverse to
-                    # find duplicate in shorter time
-                ):
+            if bj < k and PB[i, bj] < PA[i, aj]:
+                if IB[i, bj] not in overlap:
                     tmp_P[idx] = PB[i, bj]
                     tmp_I[idx] = IB[i, bj]
-                    prev_val = tmp_P[idx]
                     idx += 1
                 bj += 1
 
             else:
-                if (
-                    assume_unique
-                    or abs(PA[i, aj] - prev_val) > 1e-3
-                    or IA[i, aj] not in tmp_I[:idx][::-1]  # traverse in reverse to
-                    # find duplicate in shorter time
-                ):
-                    tmp_P[idx] = PA[i, aj]
-                    tmp_I[idx] = IA[i, aj]
-                    prev_val = tmp_P[idx]
-                    idx += 1
+                tmp_P[idx] = PA[i, aj]
+                tmp_I[idx] = IA[i, aj]
+                idx += 1
                 aj += 1
 
         PA[i] = tmp_P

From 99806a9223cd3ed23a68eaf0c17487c0e71b70d1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:12:32 -0600
Subject: [PATCH 332/416] Update module scrump and improvee its readability

---
 stumpy/scrump.py | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index d5fe898cc..206c4e304 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -158,9 +158,7 @@ def _compute_PI(
                 # about to insert at idx is identical to an element of array located
                 # at idx, idx + 1, .... Hence, we should traverse full array.
                 # This is optimized in the if conditon.
-                if ((j + g) not in I[thread_idx, i + g, :idx][::-1]) and (
-                    (j + g) not in I[thread_idx, i + g, idx:]
-                ):
+                if (j + g) not in I[thread_idx, i + g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i + g], idx, D_squared
                     )
@@ -170,9 +168,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j + g], D_squared, side="right"
                 )
-                if ((i + g) not in I[thread_idx, j + g, :idx][::-1]) and (
-                    (i + g) not in I[thread_idx, j + g, idx:]
-                ):
+                if (i + g) not in I[thread_idx, j + g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j + g], idx, D_squared
                     )
@@ -196,9 +192,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i - g], D_squared, side="right"
                 )
-                if ((j - g) not in I[thread_idx, i - g, :idx][::-1]) and (
-                    (j - g) not in I[thread_idx, i - g, idx:]
-                ):
+                if (j - g) not in I[thread_idx, i - g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i - g], idx, D_squared
                     )
@@ -208,9 +202,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j - g], D_squared, side="right"
                 )
-                if ((i - g) not in I[thread_idx, j - g, :idx][::-1]) and (
-                    (i - g) not in I[thread_idx, j - g, idx:]
-                ):
+                if (i - g) not in I[thread_idx, j - g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j - g], idx, D_squared
                     )
@@ -231,9 +223,7 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, j], squared_distance_profile[j], side="right"
                 )
-                if (i not in I[thread_idx, j, :idx][::-1]) and (
-                    i not in I[thread_idx, j, idx:]
-                ):
+                if i not in I[thread_idx, j]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j], idx, squared_distance_profile[j]
                     )
@@ -359,13 +349,7 @@ def _prescrump(
         )
 
     for thread_idx in range(1, n_threads):
-        core._merge_topk_PI(
-            P_squared[0],
-            P_squared[thread_idx],
-            I[0],
-            I[thread_idx],
-            assume_unique=False,
-        )
+        core._merge_topk_PI(P_squared[0], P_squared[thread_idx], I[0], I[thread_idx])
 
     return np.sqrt(P_squared[0]), I[0]
 
@@ -716,7 +700,7 @@ def __init__(
             else:
                 P, I = prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
 
-            core._merge_topk_PI(self._P, P, self._I, I, assume_unique=False)
+            core._merge_topk_PI(self._P, P, self._I, I)
 
         if self._ignore_trivial:
             self._diags = np.random.permutation(
@@ -774,7 +758,7 @@ def update(self):
             )
 
             # Update (top-k) matrix profile and indices
-            core._merge_topk_PI(self._P, P, self._I, I, assume_unique=False)
+            core._merge_topk_PI(self._P, P, self._I, I)
 
             # update left matrix profile and indices
             mask = PL < self._PL

From b57c69174fe11b788d4f99b2a22a1ccdea845924 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:13:32 -0600
Subject: [PATCH 333/416] Fix syntax

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 0b795235e..fa4553d62 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1103,7 +1103,7 @@ def test_merge_topk_PI():
             # create overlaps
             IDX = np.random.choice(np.arange(k), cols_idx_A[i], replace=False)
             PB[i, IDX] = PA[i, IDX]
-            IB[i, IDX]] = IA[i, IDX]
+            IB[i, IDX] = IA[i, IDX]
 
         # sort each row of PA/PB (and update  IA/IB accordingly)
         IDX = np.argsort(PA, axis=1)

From e499057441d403a531f58653e98fbb8a9d51820c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:17:58 -0600
Subject: [PATCH 334/416] update test functions

---
 tests/test_scrump.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 8b19b40a9..0d5869776 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -373,7 +373,7 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             ref_P_aux, ref_I_aux, _, _ = naive.scrump(
                 T_B, m, T_B, percentage, zone, True, s
             )
-            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux, assume_unique=False)
+            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
 
             np.random.seed(seed)
             approx = scrump(
@@ -406,7 +406,7 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
             ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump(
                 T_A, m, T_B, percentage, None, False, None
             )
-            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux, assume_unique=False)
+            naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
             ref_left_I = ref_left_I_aux
             ref_right_I = ref_right_I_aux
 
@@ -812,7 +812,7 @@ def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages):
                     T_B, m, T_B, percentage, zone, True, s, k=k
                 )
                 naive.merge_topk_PI(
-                    ref_P, ref_P_aux, ref_I, ref_I_aux, assume_unique=False
+                    ref_P, ref_P_aux, ref_I, ref_I_aux
                 )
 
                 np.random.seed(seed)

From b81131996ae6aafd82b8f2c8e6db2389965730a4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:20:58 -0600
Subject: [PATCH 335/416] minor fix

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 64bad3caa..063bf9bf7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1805,7 +1805,7 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
 def merge_topk_PI(PA, PB, IA, IB):
     k = PA.shape[1]
     for i in range(PA.shape[0]):
-        _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i])
+        _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
         PB[i, overlap_idx_B] = np.inf
 
     profile = np.column_stack((PA, PB))

From 11ee8ded823cb34a452cb1685f7a93526bf71be1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 21 Jul 2022 17:22:45 -0600
Subject: [PATCH 336/416] correct format

---
 tests/test_scrump.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 0d5869776..b7d82854c 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -811,9 +811,7 @@ def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages):
                 ref_P_aux, ref_I_aux, _, _ = naive.scrump(
                     T_B, m, T_B, percentage, zone, True, s, k=k
                 )
-                naive.merge_topk_PI(
-                    ref_P, ref_P_aux, ref_I, ref_I_aux
-                )
+                naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
 
                 np.random.seed(seed)
                 approx = scrump(

From 7925119ed5adb07e2b25bf0afd521599ca06f36d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 23 Jul 2022 11:55:48 -0600
Subject: [PATCH 337/416] Improve docstring

---
 stumpy/core.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f3eff3db6..c30bf8b6a 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2581,7 +2581,9 @@ def _merge_topk_PI(PA, PB, IA, IB):
     Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place) while
     always prioritizing the values of `PA` over the values of `PB` in case of ties.
     (i.e., values from `PB` are always inserted to the right of values from `PA`).
-    Also, update `IA` accordingly.
+    Also, update `IA` accordingly. In case of overlapping values between two arrays
+    IA[i] and IB[i], the ones in IB[i] (and their corresponding values in PB[i])
+    are ignored throughout the updating process of IA[i] (and PA[i]).
 
     Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
     keeps `top-k` smallest values.

From c3b82dd39270b406f4e09c480a29a12b8957974a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 12:09:03 -0600
Subject: [PATCH 338/416] Avoid overlap while merging matrix profiles

---
 stumpy/core.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index c30bf8b6a..48795b6f8 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2667,19 +2667,29 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     -------
     None
     """
-    tmp_ρ = np.empty(ρA.shape[1], dtype=np.float64)
-    tmp_I = np.empty(ρA.shape[1], dtype=np.int64)
-    last_idx = len(tmp_ρ) - 1
+    k = ρA.shape[1]
+
+    tmp_ρ = np.empty(k, dtype=np.float64)
+    tmp_I = np.empty(k, dtype=np.int64)
+    last_idx = k - 1
     for i in range(len(ρA)):
+        overlap = np.intersect1d(IA[i], IB[i])
+
         aj, bj = last_idx, last_idx
-        for k in range(last_idx, -1, -1):
-            if ρB[i, bj] > ρA[i, aj]:
-                tmp_ρ[k] = ρB[i, bj]
-                tmp_I[k] = IB[i, bj]
+        idx = last_idx
+        for _ in range(2 * k):  # 2 * k to traverse both A and B if needed
+            if idx < 0:
+                break
+            if bj >= 0 and ρB[i, bj] > ρA[i, aj]:
+                if IB[i, bj] not in overlap:
+                    tmp_ρ[idx] = ρB[i, bj]
+                    tmp_I[idx] = IB[i, bj]
+                    idx -= 1
                 bj -= 1
             else:
-                tmp_ρ[k] = ρA[i, aj]
-                tmp_I[k] = IA[i, aj]
+                tmp_ρ[idx] = ρA[i, aj]
+                tmp_I[idx] = IA[i, aj]
+                idx -= 1
                 aj -= 1
 
         ρA[i] = tmp_ρ

From f073d6c5440e469e5eceb5089e3ea7c77faed49b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 12:25:48 -0600
Subject: [PATCH 339/416] Add function to find overlapping values

---
 stumpy/core.py     | 24 ++++++++++++++++++++++++
 tests/test_core.py | 18 ++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 48795b6f8..410c0124b 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2766,3 +2766,27 @@ def _check_P(P, threshold=1e-6):
     if are_distances_too_small(P, threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values in `P` are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial=True`.")
+
+
+@njit
+def _intersect1d_int(arr1, arr2):
+    """
+    Returns the overlapping values between two 1D arrays `arr1` and `arr2` that
+    consist of integer values.
+
+    Parameters
+    ----------
+    arr1 : numpy.ndarray
+        a 1D numpy array consisting of interget values
+
+    arr2 : numpy.ndarray
+        a 1D numpy array consisting of interget values
+
+    Returns
+    -------
+    out :  numpy.ndarray
+        a numpy array consits of the overlapping values between `arr1` and `arr2`
+    """
+    return np.array(
+        list(set(arr1).intersection(set(arr2))), dtype=np.int64  # Basic set comparison
+    )
diff --git a/tests/test_core.py b/tests/test_core.py
index fa4553d62..293bd9a28 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1192,3 +1192,21 @@ def test_shift_insert_at_index():
 def test_check_P():
     with pytest.raises(ValueError):
         core._check_P(np.random.rand(10).reshape(2, 5))
+
+
+def test_intersect1d_int():
+    max_len_arr = 20
+    for n in range(1, max_len_arr):
+        arr1 = np.random.randint(0, 100, size=n)
+        arr2 = np.random.randint(0, 100, size=n)
+
+        # creating overlaps between `arr1` and `arr2`
+        s = np.random.randint(0, high=n)
+        IDX_1 = np.random.choice(np.arange(n), s, replace=False)
+        IDX_2 = np.random.choice(np.arange(n), s, replace=False)
+        arr2[IDX_2] = arr1[IDX_1]
+
+        ref = np.intersect1d(arr1, arr2)
+        comp = core._intersect1d_int(arr1, arr2)
+
+        npt.assert_array_equal(np.sort(ref), np.sort(comp))

From a5149438f056228ac93ecc786b111e96a2258908 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 12:38:24 -0600
Subject: [PATCH 340/416] replace numpy function with our implementation

---
 stumpy/core.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 410c0124b..606dd00ee 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2673,8 +2673,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     tmp_I = np.empty(k, dtype=np.int64)
     last_idx = k - 1
     for i in range(len(ρA)):
-        overlap = np.intersect1d(IA[i], IB[i])
-
+        overlap = _intersect1d_int(IA[i], IB[i])
         aj, bj = last_idx, last_idx
         idx = last_idx
         for _ in range(2 * k):  # 2 * k to traverse both A and B if needed

From edb62a23c31ed32aea6a0ac1bf2b0c0282621a8a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 12:51:11 -0600
Subject: [PATCH 341/416] Avoid unnecessary call of a function

---
 stumpy/core.py     | 28 ++--------------------------
 tests/test_core.py | 18 ------------------
 2 files changed, 2 insertions(+), 44 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 606dd00ee..c873b4a90 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2612,7 +2612,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
     tmp_P = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     for i in range(PA.shape[0]):
-        overlap = np.intersect1d(IA[i], IB[i])
+        overlap = set(IB[i]).intersection(set(IA[i]))
         aj, bj = 0, 0
         idx = 0
         for _ in range(2 * k):  # 2 * k to traverse both A and B
@@ -2673,7 +2673,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     tmp_I = np.empty(k, dtype=np.int64)
     last_idx = k - 1
     for i in range(len(ρA)):
-        overlap = _intersect1d_int(IA[i], IB[i])
+        overlap = set(IB[i]).intersection(set(IA[i]))
         aj, bj = last_idx, last_idx
         idx = last_idx
         for _ in range(2 * k):  # 2 * k to traverse both A and B if needed
@@ -2765,27 +2765,3 @@ def _check_P(P, threshold=1e-6):
     if are_distances_too_small(P, threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values in `P` are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial=True`.")
-
-
-@njit
-def _intersect1d_int(arr1, arr2):
-    """
-    Returns the overlapping values between two 1D arrays `arr1` and `arr2` that
-    consist of integer values.
-
-    Parameters
-    ----------
-    arr1 : numpy.ndarray
-        a 1D numpy array consisting of interget values
-
-    arr2 : numpy.ndarray
-        a 1D numpy array consisting of interget values
-
-    Returns
-    -------
-    out :  numpy.ndarray
-        a numpy array consits of the overlapping values between `arr1` and `arr2`
-    """
-    return np.array(
-        list(set(arr1).intersection(set(arr2))), dtype=np.int64  # Basic set comparison
-    )
diff --git a/tests/test_core.py b/tests/test_core.py
index 293bd9a28..fa4553d62 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1192,21 +1192,3 @@ def test_shift_insert_at_index():
 def test_check_P():
     with pytest.raises(ValueError):
         core._check_P(np.random.rand(10).reshape(2, 5))
-
-
-def test_intersect1d_int():
-    max_len_arr = 20
-    for n in range(1, max_len_arr):
-        arr1 = np.random.randint(0, 100, size=n)
-        arr2 = np.random.randint(0, 100, size=n)
-
-        # creating overlaps between `arr1` and `arr2`
-        s = np.random.randint(0, high=n)
-        IDX_1 = np.random.choice(np.arange(n), s, replace=False)
-        IDX_2 = np.random.choice(np.arange(n), s, replace=False)
-        arr2[IDX_2] = arr1[IDX_1]
-
-        ref = np.intersect1d(arr1, arr2)
-        comp = core._intersect1d_int(arr1, arr2)
-
-        npt.assert_array_equal(np.sort(ref), np.sort(comp))

From ec020e0b83c66e40c6f9a181319aed6528b7b58b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 12:58:23 -0600
Subject: [PATCH 342/416] Revise docsting and comment

---
 stumpy/scrump.py | 7 +++++--
 stumpy/stumpi.py | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 206c4e304..d9ee59a98 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -116,8 +116,11 @@ def _compute_PI(
 
         nn_idx = np.argmin(squared_distance_profile)
         if nn_idx not in I[thread_idx, i]:
-            # It is more than likely that the top-k values for the `i`-th subsequence
-            # will be already populated. So, we must shift-insert here
+            # Since the top-k values for the `i`-th subsequence may already
+            # be updated/populated in other previous iterations (i.e., not all
+            # values in `I[thread_idx]` are equal to `-1` or not all values in
+            # `P_squared[thread_idx, i]` are equal  to `np.inf`), we must
+            # shift-insert here rather than assign values to the first element.
             core._shift_insert_at_index(
                 P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
             )
diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index 22bc9b122..b5c4b5a93 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -371,7 +371,7 @@ def left_P_(self):
     @property
     def left_I_(self):
         """
-        Get the (top-1) sleft matrix profile indices
+        Get the (top-1) left matrix profile indices
         """
         return self._left_I.astype(np.int64)
 

From 7ab480e5b0faa16ac66bf09ed784d83014344f80 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 13:02:07 -0600
Subject: [PATCH 343/416] Improve test function

---
 tests/test_core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index fa4553d62..be03ee3bd 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1102,7 +1102,8 @@ def test_merge_topk_PI():
         for i in range(n):
             # create overlaps
             IDX = np.random.choice(np.arange(k), cols_idx_A[i], replace=False)
-            PB[i, IDX] = PA[i, IDX]
+            imprecision = np.random.uniform(low=-1e6, high=1e6, size=len(IDX))
+            PB[i, IDX] = PA[i, IDX] + imprecision
             IB[i, IDX] = IA[i, IDX]
 
         # sort each row of PA/PB (and update  IA/IB accordingly)

From b2bc5005e7e36fe0be4cabb23fcec8b4c718bb6a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 13:03:40 -0600
Subject: [PATCH 344/416] Remove comment

---
 stumpy/scrump.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index d9ee59a98..039ca13e5 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -157,10 +157,6 @@ def _compute_PI(
                 idx = np.searchsorted(
                     P_squared[thread_idx, i + g], D_squared, side="right"
                 )
-                # Due to numerical error, it is possible that the element that is
-                # about to insert at idx is identical to an element of array located
-                # at idx, idx + 1, .... Hence, we should traverse full array.
-                # This is optimized in the if conditon.
                 if (j + g) not in I[thread_idx, i + g]:
                     core._shift_insert_at_index(
                         P_squared[thread_idx, i + g], idx, D_squared

From c156530740bf06ea0e4505ddd79370865ca0142d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 13:45:42 -0600
Subject: [PATCH 345/416] Add test function to ensure duplicates are avoided

---
 tests/test_scrump.py | 89 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index b7d82854c..f9f6f1a53 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -871,3 +871,92 @@ def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B):
 
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
+
+
+def test_prescrump_self_join_KNN_no_overlap():
+    # This test is designed to ensure that the performant version prescrump avoids
+    # overlap while computing the top-k matrix  profiles
+    T = np.array(
+        [
+            -916.64703784,
+            -327.42056679,
+            379.19386284,
+            -281.80427628,
+            -189.85401773,
+            -38.69610569,
+            187.89889345,
+            578.65862523,
+            528.09687811,
+            -667.42973795,
+            -285.27749324,
+            -211.28930925,
+            -703.93802657,
+            -820.53780562,
+            -955.91174663,
+            383.65471851,
+            932.08809422,
+            -563.57569746,
+            784.0546579,
+            -343.14886064,
+            -612.72329848,
+            -270.09273091,
+            -448.39346549,
+            578.03202014,
+            867.15436674,
+            -783.55167049,
+            -494.78062922,
+            -311.18567747,
+            522.70052256,
+            933.45474094,
+            192.34822368,
+            -162.11374908,
+            -612.95359279,
+            -449.62297051,
+            -351.79138459,
+            -77.70189101,
+            -439.46519487,
+            -660.48431174,
+            548.69362177,
+            485.36004744,
+            -535.3566627,
+            -568.0955257,
+            755.26647273,
+            736.1079588,
+            -597.65672557,
+            379.3299783,
+            731.38211912,
+            247.34827447,
+            545.41888454,
+            644.94300763,
+            20.99042666,
+            788.19859515,
+            -898.24325898,
+            -929.47841134,
+            -738.45875181,
+            66.01030291,
+            512.945841,
+            -44.07720164,
+            302.97141464,
+            -696.95271302,
+            662.98385163,
+            -712.3807531,
+            -43.62688539,
+            74.16927482,
+        ]
+    )
+
+    mk_seeds = {
+        (3, 2): [4279, 9133, 8190],
+        (3, 5): [1267, 4016, 4046],
+        (5, 2): [6327, 4926, 3712],
+        (5, 5): [3032, 3032, 8117],
+    }
+    for (m, k), seeds in mk_seeds.items():
+        zone = int(np.ceil(m / 4))
+        for seed in seeds:
+            np.random.seed(seed)
+            ref_P, ref_I = naive.prescrump(T, m, T, s=1, exclusion_zone=zone, k=k)
+            comp_P, comp_I = prescrump(T, m, s=1, k=k)
+
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_array_equal(ref_I, comp_I)

From 5f1acaedfe490adb07634121d9b89df36c813031 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 14:29:32 -0600
Subject: [PATCH 346/416] Improve comments

---
 tests/naive.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 063bf9bf7..bf7cb6197 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1433,18 +1433,26 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             P[i, 1:] = P[i, :-1]
             P[i, 0] = distance_profile[I[i, 0]]
 
-        # else: the idx, i.e. 1NN of `i`, was already obtained (it maynot be stored
-        # at the first index of array I[i] though!)
+        # else: the idx, i.e. 1NN of `i`, was already obtained. it may not be
+        # at the first index of array I[i] though! e.g. P[i] = [1e-10, 1e-9]),
+        # I[i] = [a, b]. Here, `b` can be the actual nn of i. However, the
+        # distance between `seq i` and `seq b` might be calculated alrady in one
+        # of previous iteration and, due to slight numerical error, it might have
+        # been inserted to the rigth of a.
 
         if P[i, 0] == np.inf:
             I[i, 0] = -1
             continue
 
-        j = nn_idx
+        j = nn_idx  # to follow the original paper even in top-k version, we use
+        # the actual nn_idx rather than I[i, 0].
         for g in range(1, min(s, l - i, w - j)):
             d = dist_matrix[i + g, j + g]
             if d < P[i + g, -1]:
                 pos = np.searchsorted(P[i + g], d, side="right")
+                # Do NOT optimize the `condition` in the following if statement
+                # and similar ones in this naive function. This is to ensure
+                # we are avoiding duplicates in each row of I.
                 if (j + g) not in I[i + g]:
                     P[i + g] = np.insert(P[i + g], pos, d)[:-1]
                     I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]

From 5f9c537464e2a6214cc3403da1fa835d67cf5877 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 14:50:17 -0600
Subject: [PATCH 347/416] Enhance naive version to avoid duplicates while
 merging

---
 tests/naive.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/naive.py b/tests/naive.py
index bf7cb6197..4cc99e791 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1815,6 +1815,7 @@ def merge_topk_PI(PA, PB, IA, IB):
     for i in range(PA.shape[0]):
         _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
         PB[i, overlap_idx_B] = np.inf
+        IB[i, overlap_idx_B] = -1
 
     profile = np.column_stack((PA, PB))
     indices = np.column_stack((IA, IB))
@@ -1846,6 +1847,12 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     # merging `ρB` and `ρA` ascendingly while choosing `ρB` over `ρA` in case of
     # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second
     # half of this array, and discard the first half.
+    k = ρA.shape[1]
+    for i in range(ρA.shape[0]):
+        _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
+        ρB[i, overlap_idx_B] = np.NINF
+        IB[i, overlap_idx_B] = -1
+
     profile = np.column_stack((ρB, ρA))
     indices = np.column_stack((IB, IA))
 

From f37bc29882d91ac0e7f16298d1a6fb37e82b7ab5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 15:00:33 -0600
Subject: [PATCH 348/416] Add test function and revise naive version

---
 tests/naive.py     |  4 +--
 tests/test_core.py | 65 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 4cc99e791..2bde6bd36 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1861,5 +1861,5 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     indices[:, :] = np.take_along_axis(indices, idx, axis=1)
 
     # keep the last k elements (top-k largest values)
-    ρA[:, :] = profile[:, ρA.shape[1] :]
-    IA[:, :] = indices[:, ρA.shape[1] :]
+    ρA[:, :] = profile[:, k:]
+    IA[:, :] = indices[:, k:]
diff --git a/tests/test_core.py b/tests/test_core.py
index be03ee3bd..7b4034f08 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1061,8 +1061,9 @@ def test_select_P_ABBA_val_inf():
     npt.assert_almost_equal(ref, comp)
 
 
-def test_merge_topk_PI():
-    # `assume_unique = True`
+def test_merge_topk_PI_without_overlap():
+    # This is to test function `core._merge_topk_PI(PA, PB, IA, IB)` when there
+    # is no overlap between row IA[i] and row IB[i].
     n = 50
     for k in range(1, 6):
         PA = np.random.rand(n * k).reshape(n, k)
@@ -1089,7 +1090,10 @@ def test_merge_topk_PI():
         npt.assert_array_equal(ref_P, comp_P)
         npt.assert_array_equal(ref_I, comp_I)
 
-    # `assume_unique = False`
+
+def test_merge_topk_PI_with_overlap():
+    # This is to test function `core._merge_topk_PI(PA, PB, IA, IB)` when there
+    # is overlap between row IA[i] and row IB[i].
     n = 50
     for k in range(1, 6):
         PA = np.random.rand(n * k).reshape(n, k)
@@ -1098,13 +1102,13 @@ def test_merge_topk_PI():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
-        cols_idx_A = np.random.randint(0, k, size=n)
+        num_overlaps = np.random.randint(1, k + 1, size=n)
         for i in range(n):
             # create overlaps
-            IDX = np.random.choice(np.arange(k), cols_idx_A[i], replace=False)
-            imprecision = np.random.uniform(low=-1e6, high=1e6, size=len(IDX))
-            PB[i, IDX] = PA[i, IDX] + imprecision
-            IB[i, IDX] = IA[i, IDX]
+            col_IDX = np.random.choice(np.arange(k), num_overlaps[i], replace=False)
+            imprecision = np.random.uniform(low=-1e6, high=1e6, size=len(col_IDX))
+            PB[i, col_IDX] = PA[i, col_IDX] + imprecision
+            IB[i, col_IDX] = IA[i, col_IDX]
 
         # sort each row of PA/PB (and update  IA/IB accordingly)
         IDX = np.argsort(PA, axis=1)
@@ -1128,7 +1132,9 @@ def test_merge_topk_PI():
         npt.assert_array_equal(ref_I, comp_I)
 
 
-def test_merge_topk_ρI():
+def test_merge_topk_ρI_without_overlap():
+    # This is to test function `core._merge_topk_ρI(ρA, ρB, IA, IB)` when there
+    # is no overlap between row IA[i] and row IB[i].
     n = 50
     for k in range(1, 6):
         ρA = np.random.rand(n * k).reshape(n, k)
@@ -1156,6 +1162,47 @@ def test_merge_topk_ρI():
         npt.assert_array_equal(ref_I, comp_I)
 
 
+def test_merge_topk_ρI_with_overlap():
+    # This is to test function `core._merge_topk_ρI(ρA, ρB, IA, IB)` when there
+    # is overlap between row IA[i] and row IB[i].
+    n = 50
+    for k in range(1, 6):
+        ρA = np.random.rand(n * k).reshape(n, k)
+        ρB = np.random.rand(n * k).reshape(n, k)
+
+        IA = np.arange(n * k).reshape(n, k)
+        IB = IA + n * k
+
+        num_overlaps = np.random.randint(1, k + 1, size=n)
+        for i in range(n):
+            # create overlaps
+            col_IDX = np.random.choice(np.arange(k), num_overlaps[i], replace=False)
+            imprecision = np.random.uniform(low=-1e6, high=1e6, size=len(col_IDX))
+            ρB[i, col_IDX] = ρA[i, col_IDX] + imprecision
+            IB[i, col_IDX] = IA[i, col_IDX]
+
+        # sort each row of PA/PB (and update  IA/IB accordingly)
+        IDX = np.argsort(ρA, axis=1)
+        ρA[:, :] = np.take_along_axis(ρA, IDX, axis=1)
+        IA[:, :] = np.take_along_axis(IA, IDX, axis=1)
+
+        IDX = np.argsort(ρB, axis=1)
+        ρB[:, :] = np.take_along_axis(ρB, IDX, axis=1)
+        IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
+
+        ref_ρ = ρA.copy()
+        ref_I = IA.copy()
+
+        comp_ρ = ρA.copy()
+        comp_I = IA.copy()
+
+        naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
+        core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy())
+
+        npt.assert_array_equal(ref_ρ, comp_ρ)
+        npt.assert_array_equal(ref_I, comp_I)
+
+
 def test_shift_insert_at_index():
     for k in range(1, 6):
         a = np.random.rand(k)

From dc97a12ce9ec68acb80d52268199670260a1caa9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 26 Jul 2022 15:08:56 -0600
Subject: [PATCH 349/416] Improve code readability and comment

---
 tests/test_scrump.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index f9f6f1a53..332eca78a 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -875,7 +875,9 @@ def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B):
 
 def test_prescrump_self_join_KNN_no_overlap():
     # This test is designed to ensure that the performant version prescrump avoids
-    # overlap while computing the top-k matrix  profiles
+    # overlap while computing the top-k matrix profiles and matrix profile indices.
+    # So, there would be no duplicates in each row of top-k matrix  profile indices
+    # excluding the elements filled with `-1`.
     T = np.array(
         [
             -916.64703784,
@@ -945,15 +947,17 @@ def test_prescrump_self_join_KNN_no_overlap():
         ]
     )
 
-    mk_seeds = {
+    # test_cases: dict() with `key: value` pair, where key is `(m, k)`, and value
+    # is a list of random `seeds`
+    test_cases = {
         (3, 2): [4279, 9133, 8190],
         (3, 5): [1267, 4016, 4046],
         (5, 2): [6327, 4926, 3712],
         (5, 5): [3032, 3032, 8117],
     }
-    for (m, k), seeds in mk_seeds.items():
+    for (m, k), specified_seeds in test_cases.items():
         zone = int(np.ceil(m / 4))
-        for seed in seeds:
+        for seed in specified_seeds:
             np.random.seed(seed)
             ref_P, ref_I = naive.prescrump(T, m, T, s=1, exclusion_zone=zone, k=k)
             comp_P, comp_I = prescrump(T, m, s=1, k=k)

From fa340ba27a63ec3f3ab81e5cc981eddd8e54df75 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 16:55:12 -0600
Subject: [PATCH 350/416] Update top-k profile by getting insertion index

In NearestNeighbor case, the distance  between sequence i and its NN
is the smallest. However, due to `imprecision` in calculation, it is
possible that its corresponding distance, i.e. distance  between seq i
and its NN, is not the smallest value in its  top-k neighbors. So,
instead of inserting it at index 0, we use numpy.searchsorted to find
the correct insertion index.
---
 stumpy/scrump.py | 14 ++++++++++----
 tests/naive.py   |  8 ++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 039ca13e5..7d9d86c09 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -121,10 +121,16 @@ def _compute_PI(
             # values in `I[thread_idx]` are equal to `-1` or not all values in
             # `P_squared[thread_idx, i]` are equal  to `np.inf`), we must
             # shift-insert here rather than assign values to the first element.
-            core._shift_insert_at_index(
-                P_squared[thread_idx, i], 0, squared_distance_profile[nn_idx]
-            )
-            core._shift_insert_at_index(I[thread_idx, i], 0, nn_idx)
+            if squared_distance_profile[nn_idx] < P_squared[thread_idx, i, -1]:
+                idx = np.searchsorted(
+                    P_squared[thread_idx, i],
+                    squared_distance_profile[nn_idx],
+                    side="right",
+                )
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, i], idx, squared_distance_profile[nn_idx]
+                )
+                core._shift_insert_at_index(I[thread_idx, i], idx, nn_idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
diff --git a/tests/naive.py b/tests/naive.py
index 2bde6bd36..f1f6fb8e3 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1428,10 +1428,10 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
 
         nn_idx = np.argmin(distance_profile)
         if nn_idx not in I[i]:
-            I[i, 1:] = I[i, :-1]
-            I[i, 0] = nn_idx
-            P[i, 1:] = P[i, :-1]
-            P[i, 0] = distance_profile[I[i, 0]]
+            if distance_profile[nn_idx] < P[i, -1]:
+                pos = np.searchsorted(P[i], distance_profile[nn_idx], side="right")
+                P[i] = np.insert(P[i], pos, distance_profile[nn_idx])[:-1]
+                I[i] = np.insert(I[i], pos, nn_idx)[:-1]
 
         # else: the idx, i.e. 1NN of `i`, was already obtained. it may not be
         # at the first index of array I[i] though! e.g. P[i] = [1e-10, 1e-9]),

From d9a997d3dc7ff2e3a636b1eabbade255c4f14ce5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:16:13 -0600
Subject: [PATCH 351/416] Merge nested if statements into one

---
 stumpy/scrump.py | 90 ++++++++++++++++++++++++++++--------------------
 tests/naive.py   | 47 +++++++++++++------------
 2 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 7d9d86c09..de514f6cf 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -115,22 +115,24 @@ def _compute_PI(
             core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf)
 
         nn_idx = np.argmin(squared_distance_profile)
-        if nn_idx not in I[thread_idx, i]:
+        if (
+            squared_distance_profile[nn_idx] < P_squared[thread_idx, i, -1]
+            and nn_idx not in I[thread_idx, i]
+        ):
             # Since the top-k values for the `i`-th subsequence may already
             # be updated/populated in other previous iterations (i.e., not all
             # values in `I[thread_idx]` are equal to `-1` or not all values in
             # `P_squared[thread_idx, i]` are equal  to `np.inf`), we must
             # shift-insert here rather than assign values to the first element.
-            if squared_distance_profile[nn_idx] < P_squared[thread_idx, i, -1]:
-                idx = np.searchsorted(
-                    P_squared[thread_idx, i],
-                    squared_distance_profile[nn_idx],
-                    side="right",
-                )
-                core._shift_insert_at_index(
-                    P_squared[thread_idx, i], idx, squared_distance_profile[nn_idx]
-                )
-                core._shift_insert_at_index(I[thread_idx, i], idx, nn_idx)
+            idx = np.searchsorted(
+                P_squared[thread_idx, i],
+                squared_distance_profile[nn_idx],
+                side="right",
+            )
+            core._shift_insert_at_index(
+                P_squared[thread_idx, i], idx, squared_distance_profile[nn_idx]
+            )
+            core._shift_insert_at_index(I[thread_idx, i], idx, nn_idx)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
@@ -159,25 +161,30 @@ def _compute_PI(
                 μ_Q[i + g],
                 σ_Q[i + g],
             )
-            if D_squared < P_squared[thread_idx, i + g, -1]:
+            if (
+                D_squared < P_squared[thread_idx, i + g, -1]
+                and (j + g) not in I[thread_idx, i + g]
+            ):
                 idx = np.searchsorted(
                     P_squared[thread_idx, i + g], D_squared, side="right"
                 )
-                if (j + g) not in I[thread_idx, i + g]:
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, i + g], idx, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, i + g], idx, j + g)
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, i + g], idx, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, i + g], idx, j + g)
 
-            if excl_zone is not None and D_squared < P_squared[thread_idx, j + g, -1]:
+            if (
+                excl_zone is not None
+                and D_squared < P_squared[thread_idx, j + g, -1]
+                and (i + g) not in I[thread_idx, j + g]
+            ):
                 idx = np.searchsorted(
                     P_squared[thread_idx, j + g], D_squared, side="right"
                 )
-                if (i + g) not in I[thread_idx, j + g]:
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, j + g], idx, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, j + g], idx, i + g)
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, j + g], idx, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, j + g], idx, i + g)
 
         QT_j = QT_j_prime
         # Update top-k for both subsequences `S[i-g] = T[i-g:i-g+m]` and
@@ -193,25 +200,30 @@ def _compute_PI(
                 μ_Q[i - g],
                 σ_Q[i - g],
             )
-            if D_squared < P_squared[thread_idx, i - g, -1]:
+            if (
+                D_squared < P_squared[thread_idx, i - g, -1]
+                and (j - g) not in I[thread_idx, i - g]
+            ):
                 idx = np.searchsorted(
                     P_squared[thread_idx, i - g], D_squared, side="right"
                 )
-                if (j - g) not in I[thread_idx, i - g]:
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, i - g], idx, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, i - g], idx, j - g)
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, i - g], idx, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, i - g], idx, j - g)
 
-            if excl_zone is not None and D_squared < P_squared[thread_idx, j - g, -1]:
+            if (
+                excl_zone is not None
+                and D_squared < P_squared[thread_idx, j - g, -1]
+                and (i - g) not in I[thread_idx, j - g]
+            ):
                 idx = np.searchsorted(
                     P_squared[thread_idx, j - g], D_squared, side="right"
                 )
-                if (i - g) not in I[thread_idx, j - g]:
-                    core._shift_insert_at_index(
-                        P_squared[thread_idx, j - g], idx, D_squared
-                    )
-                    core._shift_insert_at_index(I[thread_idx, j - g], idx, i - g)
+                core._shift_insert_at_index(
+                    P_squared[thread_idx, j - g], idx, D_squared
+                )
+                core._shift_insert_at_index(I[thread_idx, j - g], idx, i - g)
 
         # In the case of a self-join, the calculated distance profile can also be
         # used to refine the top-k for all non-trivial subsequences
@@ -225,10 +237,12 @@ def _compute_PI(
                 squared_distance_profile < P_squared[thread_idx, :, -1]
             )
             for j in indices:
-                idx = np.searchsorted(
-                    P_squared[thread_idx, j], squared_distance_profile[j], side="right"
-                )
                 if i not in I[thread_idx, j]:
+                    idx = np.searchsorted(
+                        P_squared[thread_idx, j],
+                        squared_distance_profile[j],
+                        side="right",
+                    )
                     core._shift_insert_at_index(
                         P_squared[thread_idx, j], idx, squared_distance_profile[j]
                     )
diff --git a/tests/naive.py b/tests/naive.py
index f1f6fb8e3..deee097ca 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1427,11 +1427,10 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             apply_exclusion_zone(distance_profile, i, exclusion_zone, np.inf)
 
         nn_idx = np.argmin(distance_profile)
-        if nn_idx not in I[i]:
-            if distance_profile[nn_idx] < P[i, -1]:
-                pos = np.searchsorted(P[i], distance_profile[nn_idx], side="right")
-                P[i] = np.insert(P[i], pos, distance_profile[nn_idx])[:-1]
-                I[i] = np.insert(I[i], pos, nn_idx)[:-1]
+        if distance_profile[nn_idx] < P[i, -1] and nn_idx not in I[i]:
+            pos = np.searchsorted(P[i], distance_profile[nn_idx], side="right")
+            P[i] = np.insert(P[i], pos, distance_profile[nn_idx])[:-1]
+            I[i] = np.insert(I[i], pos, nn_idx)[:-1]
 
         # else: the idx, i.e. 1NN of `i`, was already obtained. it may not be
         # at the first index of array I[i] though! e.g. P[i] = [1e-10, 1e-9]),
@@ -1448,39 +1447,43 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         # the actual nn_idx rather than I[i, 0].
         for g in range(1, min(s, l - i, w - j)):
             d = dist_matrix[i + g, j + g]
-            if d < P[i + g, -1]:
+            if d < P[i + g, -1] and (j + g) not in I[i + g]:
                 pos = np.searchsorted(P[i + g], d, side="right")
                 # Do NOT optimize the `condition` in the following if statement
                 # and similar ones in this naive function. This is to ensure
                 # we are avoiding duplicates in each row of I.
-                if (j + g) not in I[i + g]:
-                    P[i + g] = np.insert(P[i + g], pos, d)[:-1]
-                    I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
-            if exclusion_zone is not None and d < P[j + g, -1]:
+                P[i + g] = np.insert(P[i + g], pos, d)[:-1]
+                I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
+            if (
+                exclusion_zone is not None
+                and d < P[j + g, -1]
+                and (i + g) not in I[j + g]
+            ):
                 pos = np.searchsorted(P[j + g], d, side="right")
-                if (i + g) not in I[j + g]:
-                    P[j + g] = np.insert(P[j + g], pos, d)[:-1]
-                    I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
+                P[j + g] = np.insert(P[j + g], pos, d)[:-1]
+                I[j + g] = np.insert(I[j + g], pos, i + g)[:-1]
 
         for g in range(1, min(s, i + 1, j + 1)):
             d = dist_matrix[i - g, j - g]
-            if d < P[i - g, -1]:
+            if d < P[i - g, -1] and (j - g) not in I[i - g]:
                 pos = np.searchsorted(P[i - g], d, side="right")
-                if (j - g) not in I[i - g]:
-                    P[i - g] = np.insert(P[i - g], pos, d)[:-1]
-                    I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
-            if exclusion_zone is not None and d < P[j - g, -1]:
+                P[i - g] = np.insert(P[i - g], pos, d)[:-1]
+                I[i - g] = np.insert(I[i - g], pos, j - g)[:-1]
+            if (
+                exclusion_zone is not None
+                and d < P[j - g, -1]
+                and (i - g) not in I[j - g]
+            ):
                 pos = np.searchsorted(P[j - g], d, side="right")
-                if (i - g) not in I[j - g]:
-                    P[j - g] = np.insert(P[j - g], pos, d)[:-1]
-                    I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
+                P[j - g] = np.insert(P[j - g], pos, d)[:-1]
+                I[j - g] = np.insert(I[j - g], pos, i - g)[:-1]
 
         # In the case of a self-join, the calculated distance profile can also be
         # used to refine the top-k for all non-trivial subsequences
         if exclusion_zone is not None:
             for idx in np.flatnonzero(distance_profile < P[:, -1]):
-                pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
                 if i not in I[idx]:
+                    pos = np.searchsorted(P[idx], distance_profile[idx], side="right")
                     P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
                     I[idx] = np.insert(I[idx], pos, i)[:-1]
 

From a52564fd3c0cd3fe41a09fda80283f9255e69781 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:18:09 -0600
Subject: [PATCH 352/416] Remove blank lines

---
 stumpy/core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index c873b4a90..0bb59bcbe 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2618,14 +2618,12 @@ def _merge_topk_PI(PA, PB, IA, IB):
         for _ in range(2 * k):  # 2 * k to traverse both A and B
             if idx >= k:
                 break
-
             if bj < k and PB[i, bj] < PA[i, aj]:
                 if IB[i, bj] not in overlap:
                     tmp_P[idx] = PB[i, bj]
                     tmp_I[idx] = IB[i, bj]
                     idx += 1
                 bj += 1
-
             else:
                 tmp_P[idx] = PA[i, aj]
                 tmp_I[idx] = IA[i, aj]

From 526618cff40a77abdf948edee0db23fde209481c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:20:48 -0600
Subject: [PATCH 353/416] Fix typo

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 7b4034f08..33a61c427 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1181,7 +1181,7 @@ def test_merge_topk_ρI_with_overlap():
             ρB[i, col_IDX] = ρA[i, col_IDX] + imprecision
             IB[i, col_IDX] = IA[i, col_IDX]
 
-        # sort each row of PA/PB (and update  IA/IB accordingly)
+        # sort each row of ρA/ρB (and update IA/IB accordingly)
         IDX = np.argsort(ρA, axis=1)
         ρA[:, :] = np.take_along_axis(ρA, IDX, axis=1)
         IA[:, :] = np.take_along_axis(IA, IDX, axis=1)

From 36077119655b924d52d25df5961ca1da5d645d96 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:33:19 -0600
Subject: [PATCH 354/416] Improve comment

---
 tests/test_scrump.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 332eca78a..64a7e39f8 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -878,6 +878,19 @@ def test_prescrump_self_join_KNN_no_overlap():
     # overlap while computing the top-k matrix profiles and matrix profile indices.
     # So, there would be no duplicates in each row of top-k matrix  profile indices
     # excluding the elements filled with `-1`.
+    # Let's denote `I[i]` as the array with length `k` that contains the start index
+    # of best-so-far top-k neighbors of `subseq i`. Also, we denote `P[i]` as their
+    # corresponding distances to `subseq i`. After calculating the distance
+    # between `subseq i` to its neighbor `j` (let's call it `d`), we can insert `j`
+    # into I[i] only if j is not already in I[i], and for that we need to check
+    # the whole array I[i]. Although one might think to perform
+    # `idx = np.searchosrted(P[i], d)` first followed by the check `if j not in I[i, :idx]`
+    # HOWEVER, the latter approach may result in duplicates(!) due to the imprecision
+    # in calculation of ditances. In other words, it is possible that the distance
+    # between subseq i and subseq j was calculated in one of previous iterations of
+    # updating process and its value might be slightly higher than `d`. So, althought
+    # j might be already in I[i], it might not be in `I[i, :idx]`.
+
     T = np.array(
         [
             -916.64703784,

From 1b19a4574cdd11fc0507e514c6619cfd0e089a5a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:35:50 -0600
Subject: [PATCH 355/416] Improve comments

---
 stumpy/core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 0bb59bcbe..e825ddd89 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2615,7 +2615,8 @@ def _merge_topk_PI(PA, PB, IA, IB):
         overlap = set(IB[i]).intersection(set(IA[i]))
         aj, bj = 0, 0
         idx = 0
-        for _ in range(2 * k):  # 2 * k to traverse both A and B
+        # 2 * k iterations are required to traverse both A and B if needed.
+        for _ in range(2 * k):
             if idx >= k:
                 break
             if bj < k and PB[i, bj] < PA[i, aj]:
@@ -2674,7 +2675,8 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
         overlap = set(IB[i]).intersection(set(IA[i]))
         aj, bj = last_idx, last_idx
         idx = last_idx
-        for _ in range(2 * k):  # 2 * k to traverse both A and B if needed
+        # 2 * k iterations are required to traverse both A and B if needed.
+        for _ in range(2 * k):
             if idx < 0:
                 break
             if bj >= 0 and ρB[i, bj] > ρA[i, aj]:

From bba35e11493f68ef3b49db9dfd53531c4a0eedd9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:37:16 -0600
Subject: [PATCH 356/416] Improve docstring

---
 stumpy/core.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e825ddd89..70fa9dd7c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2641,7 +2641,9 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) while
     always prioritizing the values of `ρA` over the values of `ρB` in case of ties.
     (i.e., values from `ρB` are always inserted to the left of values from `ρA`).
-    Also, update `IA` accordingly.
+    Also, update `IA` accordingly. In case of overlapping values between two arrays
+    IA[i] and IB[i], the ones in IB[i] (and their corresponding values in ρB[i])
+    are ignored throughout the updating process of IA[i] (and ρA[i]).
 
     Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
     keeps `top-k` largest values.

From 6d4d1272231f2af9bc85c1dec6f74b53adaf2556 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:39:17 -0600
Subject: [PATCH 357/416] Remove unnecessary comments

---
 tests/naive.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index deee097ca..6c5404161 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1432,19 +1432,11 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
             P[i] = np.insert(P[i], pos, distance_profile[nn_idx])[:-1]
             I[i] = np.insert(I[i], pos, nn_idx)[:-1]
 
-        # else: the idx, i.e. 1NN of `i`, was already obtained. it may not be
-        # at the first index of array I[i] though! e.g. P[i] = [1e-10, 1e-9]),
-        # I[i] = [a, b]. Here, `b` can be the actual nn of i. However, the
-        # distance between `seq i` and `seq b` might be calculated alrady in one
-        # of previous iteration and, due to slight numerical error, it might have
-        # been inserted to the rigth of a.
-
         if P[i, 0] == np.inf:
             I[i, 0] = -1
             continue
 
-        j = nn_idx  # to follow the original paper even in top-k version, we use
-        # the actual nn_idx rather than I[i, 0].
+        j = nn_idx
         for g in range(1, min(s, l - i, w - j)):
             d = dist_matrix[i + g, j + g]
             if d < P[i + g, -1] and (j + g) not in I[i + g]:

From e53138506dbd00982670cedb8cd2ba6fb1d1019b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:43:01 -0600
Subject: [PATCH 358/416] passing copy of variable as input

---
 tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 33a61c427..b2025a712 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1155,8 +1155,8 @@ def test_merge_topk_ρI_without_overlap():
         comp_ρ = ρA.copy()
         comp_I = IA.copy()
 
-        naive.merge_topk_ρI(ref_ρ, ρB, ref_I, IB)
-        core._merge_topk_ρI(comp_ρ, ρB, comp_I, IB)
+        naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
+        core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy())
 
         npt.assert_array_equal(ref_ρ, comp_ρ)
         npt.assert_array_equal(ref_I, comp_I)

From 8e28aeb946bae2415807e956ff5f56ecb2a1a6a1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 17:45:46 -0600
Subject: [PATCH 359/416] minor change in test functions

---
 tests/test_core.py   | 20 ++++++++++----------
 tests/test_scrump.py |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index b2025a712..eaa4dab28 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1087,8 +1087,8 @@ def test_merge_topk_PI_without_overlap():
         naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy())
         core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy())
 
-        npt.assert_array_equal(ref_P, comp_P)
-        npt.assert_array_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_P, comp_P)
+        npt.assert_almost_equal(ref_I, comp_I)
 
 
 def test_merge_topk_PI_with_overlap():
@@ -1128,8 +1128,8 @@ def test_merge_topk_PI_with_overlap():
         naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy())
         core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy())
 
-        npt.assert_array_equal(ref_P, comp_P)
-        npt.assert_array_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_P, comp_P)
+        npt.assert_almost_equal(ref_I, comp_I)
 
 
 def test_merge_topk_ρI_without_overlap():
@@ -1158,8 +1158,8 @@ def test_merge_topk_ρI_without_overlap():
         naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
         core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy())
 
-        npt.assert_array_equal(ref_ρ, comp_ρ)
-        npt.assert_array_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_ρ, comp_ρ)
+        npt.assert_almost_equal(ref_I, comp_I)
 
 
 def test_merge_topk_ρI_with_overlap():
@@ -1199,8 +1199,8 @@ def test_merge_topk_ρI_with_overlap():
         naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
         core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy())
 
-        npt.assert_array_equal(ref_ρ, comp_ρ)
-        npt.assert_array_equal(ref_I, comp_I)
+        npt.assert_almost_equal(ref_ρ, comp_ρ)
+        npt.assert_almost_equal(ref_I, comp_I)
 
 
 def test_shift_insert_at_index():
@@ -1222,7 +1222,7 @@ def test_shift_insert_at_index():
                 comp, idx, v, shift="right"
             )  # update comp in place
 
-            npt.assert_array_equal(ref, comp)
+            npt.assert_almost_equal(ref, comp)
 
         # test shift = "left"
         for (idx, v) in zip(indices, values):
@@ -1234,7 +1234,7 @@ def test_shift_insert_at_index():
                 comp, idx, v, shift="left"
             )  # update comp in place
 
-            npt.assert_array_equal(ref, comp)
+            npt.assert_almost_equal(ref, comp)
 
 
 def test_check_P():
diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 64a7e39f8..5dfeddf52 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -976,4 +976,4 @@ def test_prescrump_self_join_KNN_no_overlap():
             comp_P, comp_I = prescrump(T, m, s=1, k=k)
 
             npt.assert_almost_equal(ref_P, comp_P)
-            npt.assert_array_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_I, comp_I)

From be1d1e77ea04c6f89b031ac10121e1b7a7897795 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Jul 2022 18:21:05 -0600
Subject: [PATCH 360/416] Correct style

---
 tests/test_scrump.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 5dfeddf52..1153f2790 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -884,12 +884,13 @@ def test_prescrump_self_join_KNN_no_overlap():
     # between `subseq i` to its neighbor `j` (let's call it `d`), we can insert `j`
     # into I[i] only if j is not already in I[i], and for that we need to check
     # the whole array I[i]. Although one might think to perform
-    # `idx = np.searchosrted(P[i], d)` first followed by the check `if j not in I[i, :idx]`
-    # HOWEVER, the latter approach may result in duplicates(!) due to the imprecision
-    # in calculation of ditances. In other words, it is possible that the distance
-    # between subseq i and subseq j was calculated in one of previous iterations of
-    # updating process and its value might be slightly higher than `d`. So, althought
-    # j might be already in I[i], it might not be in `I[i, :idx]`.
+    # `idx = np.searchosrted(P[i], d)` first followed by the check `if j not in
+    # `I[i, :idx]`. HOWEVER, the latter approach may result in duplicates(!) due
+    # to the imprecision in calculation of ditances. In other words, it is possible
+    # that the distance between subseq i and subseq j was calculated in one of
+    # previous iterations of updating process and its value might be slightly higher
+    # than `d`. So, althought j might be already in I[i], it might not be in
+    # `I[i, :idx]`.
 
     T = np.array(
         [

From 956fc31679fec83f673d7f6a363409fd1c6f0bd6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Jul 2022 11:04:27 -0600
Subject: [PATCH 361/416] Revise comment

---
 tests/test_scrump.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 1153f2790..ead48130f 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -874,23 +874,21 @@ def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B):
 
 
 def test_prescrump_self_join_KNN_no_overlap():
-    # This test is designed to ensure that the performant version prescrump avoids
-    # overlap while computing the top-k matrix profiles and matrix profile indices.
-    # So, there would be no duplicates in each row of top-k matrix  profile indices
-    # excluding the elements filled with `-1`.
+    # This test is particularly designed to raise error in some rare cases.
     # Let's denote `I[i]` as the array with length `k` that contains the start index
     # of best-so-far top-k neighbors of `subseq i`. Also, we denote `P[i]` as their
-    # corresponding distances to `subseq i`. After calculating the distance
-    # between `subseq i` to its neighbor `j` (let's call it `d`), we can insert `j`
-    # into I[i] only if j is not already in I[i], and for that we need to check
-    # the whole array I[i]. Although one might think to perform
-    # `idx = np.searchosrted(P[i], d)` first followed by the check `if j not in
-    # `I[i, :idx]`. HOWEVER, the latter approach may result in duplicates(!) due
-    # to the imprecision in calculation of ditances. In other words, it is possible
+    # corresponding ascendingly-sorted distances to `subseq i`. After calculating
+    # the distance between `subseq i` to its neighbor `j` (let's call it `d`), `j`
+    # is eligible to be inserted into I[i] only if `j` is not already in I[i].
+    # Otherwise, we will have  duplicates in I[i]. One might think to first perform
+    # `idx = np.searchosrted(P[i], d, side="right")` and then check if `j` is in
+    # `I[i, :idx]` or not. HOWEVER, the latter approach may result in duplicates(!)
+    # due to the imprecision in calculation of ditances. In other words, it is possible
     # that the distance between subseq i and subseq j was calculated in one of
-    # previous iterations of updating process and its value might be slightly higher
-    # than `d`. So, althought j might be already in I[i], it might not be in
-    # `I[i, :idx]`.
+    # previous iterations that value might be slightly higher than `d` (In theory,
+    # they should be exactly the same). So, althought j might be already in I[i],
+    # it might not be in `I[i, :idx]`. Hence, we need to perform a full traversal
+    # of I[i] and check all of its elemenets.
 
     T = np.array(
         [

From 9b3daefe5a3c8e4af0d985cfb731a5facd333400 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Jul 2022 11:07:52 -0600
Subject: [PATCH 362/416] Remove comment

---
 stumpy/scrump.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index de514f6cf..2867d4872 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -119,11 +119,6 @@ def _compute_PI(
             squared_distance_profile[nn_idx] < P_squared[thread_idx, i, -1]
             and nn_idx not in I[thread_idx, i]
         ):
-            # Since the top-k values for the `i`-th subsequence may already
-            # be updated/populated in other previous iterations (i.e., not all
-            # values in `I[thread_idx]` are equal to `-1` or not all values in
-            # `P_squared[thread_idx, i]` are equal  to `np.inf`), we must
-            # shift-insert here rather than assign values to the first element.
             idx = np.searchsorted(
                 P_squared[thread_idx, i],
                 squared_distance_profile[nn_idx],

From 6abd601804fd8a53722e7aa2faee8fa0974841b2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Jul 2022 11:24:37 -0600
Subject: [PATCH 363/416] Revise comment

---
 tests/test_scrump.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index ead48130f..b927ebf92 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -874,21 +874,25 @@ def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B):
 
 
 def test_prescrump_self_join_KNN_no_overlap():
-    # This test is particularly designed to raise error in some rare cases.
-    # Let's denote `I[i]` as the array with length `k` that contains the start index
-    # of best-so-far top-k neighbors of `subseq i`. Also, we denote `P[i]` as their
-    # corresponding ascendingly-sorted distances to `subseq i`. After calculating
-    # the distance between `subseq i` to its neighbor `j` (let's call it `d`), `j`
-    # is eligible to be inserted into I[i] only if `j` is not already in I[i].
-    # Otherwise, we will have  duplicates in I[i]. One might think to first perform
-    # `idx = np.searchosrted(P[i], d, side="right")` and then check if `j` is in
-    # `I[i, :idx]` or not. HOWEVER, the latter approach may result in duplicates(!)
-    # due to the imprecision in calculation of ditances. In other words, it is possible
-    # that the distance between subseq i and subseq j was calculated in one of
-    # previous iterations that value might be slightly higher than `d` (In theory,
-    # they should be exactly the same). So, althought j might be already in I[i],
-    # it might not be in `I[i, :idx]`. Hence, we need to perform a full traversal
-    # of I[i] and check all of its elemenets.
+    # This test is particularly designed to raise error in a rare case described
+    # as follows: Let's denote `I[i]` as the array with length `k` that contains
+    # the start index of the best-so-far top-k nearest neighbors of `subseq i`,
+    # (`S_i`). Also, we denote `P[i]` as their corresponding ascendingly-sorted
+    # distances to `subseq i`. After calculating the distance between `subseq i`
+    # to its neighbor `subseq j` (`S_j`). Let's denote `d` as the distance between
+    # these two subseqs. `j` is eligible to be inserted into I[i] if `d` is less
+    # than the `P[i, -1]` and if `j` is not in I[i]. One might think to first perform
+    # `idx = np.searchosrted(P[i], d, side="right")` and then check if `j`
+    # is in `I[i, :idx]` or not. HOWEVER, this does  not suffice! The latter approach
+    # may result in duplicates(!) due to the imprecision in calculation of ditances.
+    # It is possible  that the distance between `S_i` and `S_j` was
+    # calculated in one of previous iterations and that value might be slightly
+    # higher than `d` (In theory, they should be exactly the same!!). Thus, althought
+    # `j` might be already in I[i], it might not appear in `I[i, :idx]`. In other
+    # words, we might have `j` as the element `I[i, w]`, where `w >= idx` and hence
+    # P[i, w] > d).  In theory, P[i, w] and d should be equal as they both show the
+    # same distance, i.e. the distance between `S_i` and `S_j`.
+    # To sum up, we need to search whole I[i] for `j`.
 
     T = np.array(
         [

From 355c8e51af172e972ba0200f209c53751320ddbe Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 8 Aug 2022 18:01:29 -0600
Subject: [PATCH 364/416] Fix format

---
 tests/naive.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index a5f8574d1..af14a9016 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1880,4 +1880,3 @@ def find_matches(D, excl_zone, max_distance, max_matches=None):
         matches = [x for x in matches if x < idx - excl_zone or x > idx + excl_zone]
 
     return np.array(result[:max_matches], dtype=object)
-

From 04685c7e17125fdf01e5debe1ef116358afccfba Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 8 Aug 2022 18:06:54 -0600
Subject: [PATCH 365/416] Remove unnecessary newline

---
 stumpy/core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 7573b19f8..5cd511a9f 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2669,7 +2669,6 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     None
     """
     k = ρA.shape[1]
-
     tmp_ρ = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     last_idx = k - 1

From ba7b6ca8ba57a0b0fa9b759fc1967a063efd123b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 8 Aug 2022 18:26:45 -0600
Subject: [PATCH 366/416] Return 1D array for matrix profile when `k` is 1

---
 stumpy/scrump.py     | 40 ++++++++++++++++++++++++----------------
 tests/naive.py       |  4 ++++
 tests/test_scrump.py | 32 ++++++++++++++++----------------
 3 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 2867d4872..2328bf09f 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -517,15 +517,16 @@ class scrump:
     Attributes
     ----------
     P_ : numpy.ndarray
-        The updated (top-k) matrix profile. When k=1 (default), the first (and only)
-        column in this 2D array consists of the matrix profile. When k > 1, the output
-        has exactly k columns consisting of the top-k matrix profile.
+        The updated (top-k) matrix profile. When `k=1` (default), this output is
+        a 1D array consisting of the matrix profile values. When `k > 1`, the output
+        is a 2D array that has exactly `k` columns consisting of the top-k matrix
+        profile values.
 
     I_ : numpy.ndarray
-        The updated (top-k) matrix profile indices. When k=1 (default), the first
-        (and only) column in this 2D array consists of the matrix profile indices.
-        When k > 1, the output has exactly k columns consisting of the top-k matrix
-        profile indices.
+        The updated (top-k) matrix profile indices. When `k=1` (default), this output is
+        a 1D array consisting of the matrix profile indices. When `k > 1`, the output
+        is a 2D array that has exactly `k` columns consisting of the top-k matrix
+        profile indiecs.
 
     left_I_ : numpy.ndarray
         The updated left (top-1) matrix profile indices
@@ -789,21 +790,28 @@ def update(self):
     @property
     def P_(self):
         """
-        Get the updated (top-k) matrix profile. When k=1 (default), the first (and only)
-        column in this 2D array consists of the matrix profile. When k > 1, the output
-        has exactly `k` columns consisting of the top-k matrix profile.
+        Get the updated (top-k) matrix profile. When `k=1` (default), this output is
+        a 1D array consisting of the updated matrix profile values. When `k > 1`,
+        the output is a 2D array that has exactly `k` columns consisting of the
+        updated top-k matrix profile values.
         """
-        return self._P.astype(np.float64)
+        if self._k == 1:
+            return self._P.flatten().astype(np.float64)
+        else:
+            return self._P.astype(np.float64)
 
     @property
     def I_(self):
         """
-        Get the updated (top-k) matrix profile indices. When k=1 (default), the
-        first (and only) column in this 2D array consists of the matrix profile
-        indices. When k > 1, the output has exactly `k` columns consisting of the top-k
-        matrix profile indices.
+        Get the updated (top-k) matrix profile indices. When `k=1` (default), this
+        output is a 1D array consisting of the updated matrix profile indices. When
+        `k > 1`, the output is a 2D array that has exactly `k` columns consisting
+        of the updated top-k matrix profile indices.
         """
-        return self._I.astype(np.int64)
+        if self._k == 1:
+            return self._I.flatten().astype(np.int64)
+        else:
+            return self._I.astype(np.int64)
 
     @property
     def left_I_(self):
diff --git a/tests/naive.py b/tests/naive.py
index af14a9016..da88a6176 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1541,6 +1541,10 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1):
                         PR[i] = d
                         IR[i] = i + g
 
+    if k == 1:
+        P = P.flatten()
+        I = I.flatten()
+
     return P, I, IL, IR
 
 
diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index b927ebf92..41843df33 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -254,8 +254,8 @@ def test_scrump_self_join_full(T_A, T_B):
     npt.assert_almost_equal(ref_right_I, comp_right_I)
 
     ref_mp = stump(T_B, m, ignore_trivial=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -271,8 +271,8 @@ def test_scrump_A_B_join_full(T_A, T_B):
     m = 3
 
     ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -292,8 +292,8 @@ def test_scrump_A_B_join_full(T_A, T_B):
     npt.assert_almost_equal(ref_right_I, comp_right_I)
 
     ref_mp = stump(T_A, m, T_B=T_B, ignore_trivial=False)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -309,8 +309,8 @@ def test_scrump_A_B_join_full_swap(T_A, T_B):
     m = 3
 
     ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -337,8 +337,8 @@ def test_scrump_self_join_full_larger_window(T_A, T_B, m):
         zone = int(np.ceil(m / 4))
 
         ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-        ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+        ref_P = ref_mp[:, 0]
+        ref_I = ref_mp[:, 1]
         ref_left_I = ref_mp[:, 2]
         ref_right_I = ref_mp[:, 3]
 
@@ -440,8 +440,8 @@ def test_scrump_plus_plus_self_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -469,8 +469,8 @@ def test_scrump_plus_plus_A_B_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -498,8 +498,8 @@ def test_scrump_plus_plus_A_B_join_full_swap(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 

From 36a7fcbd20d1c7b4637c7db86a0b7b929779495c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 8 Aug 2022 18:30:43 -0600
Subject: [PATCH 367/416] Remove unnecessary flattening operatiton on array

---
 stumpy/stimp.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stimp.py b/stumpy/stimp.py
index c67c005a6..c377a69aa 100644
--- a/stumpy/stimp.py
+++ b/stumpy/stimp.py
@@ -214,11 +214,12 @@ def update(self):
                     ignore_trivial=True,
                     percentage=self._percentage,
                     pre_scrump=self._pre_scrump,
+                    k=1,
                 )
                 approx.update()
                 self._PAN[
                     self._bfs_indices[self._n_processed], : approx.P_.shape[0]
-                ] = approx.P_.flatten()
+                ] = approx.P_
             else:
                 out = self._mp_func(
                     self._T,

From 4f1b2dcf9d651526cbc0ff95f840cf77f0af8f00 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 8 Aug 2022 18:35:38 -0600
Subject: [PATCH 368/416] Fix comments

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 3b8c74bed..282cea0f9 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -474,12 +474,12 @@ def _stump(
         # update top-k arrays
         core._merge_topk_ρI(ρ[0], ρ[thread_idx], I[0], I[thread_idx])
 
-        # update left matrix profile and  matrix profile indices
+        # update left matrix profile and matrix profile indices
         mask = ρL[0] < ρL[thread_idx]
         ρL[0][mask] = ρL[thread_idx][mask]
         IL[0][mask] = IL[thread_idx][mask]
 
-        # update right matrix profile and  matrix profile indices
+        # update right matrix profile and matrix profile indices
         mask = ρR[0] < ρR[thread_idx]
         ρR[0][mask] = ρR[thread_idx][mask]
         IR[0][mask] = IR[thread_idx][mask]

From aa52529c60b4955ba85619ba61196251418bde8c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 8 Aug 2022 19:09:49 -0600
Subject: [PATCH 369/416] Make matrix profile and mp index 1D when k=1

---
 stumpy/stumpi.py     | 25 ++++++++++++++++---------
 tests/naive.py       | 18 +++++++++++++++---
 tests/test_stumpi.py | 16 ++++++++--------
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
index b5c4b5a93..82fac2da8 100644
--- a/stumpy/stumpi.py
+++ b/stumpy/stumpi.py
@@ -345,21 +345,28 @@ def _update(self, t):
     @property
     def P_(self):
         """
-        Get the (top-k) matrix profile. When `k=1` (default), the first (and only)
-        column in this 2D array consists of the matrix profile. When `k > 1`, the
-        output has exactly `k` columns consisting of the top-k matrix profile.
+        Get the (top-k) matrix profile. When `k=1` (default), the output is
+        a 1D array consisting of the matrix profile. When `k > 1`, the
+        output is a 2D array that has exactly `k` columns and it consists of the
+        top-k matrix profile.
         """
-        return self._P.astype(np.float64)
+        if self._k == 1:
+            return self._P.flatten().astype(np.float64)
+        else:
+            return self._P.astype(np.float64)
 
     @property
     def I_(self):
         """
-        Get the (top-k) matrix profile indices. When `k=1` (default), the first
-        (and only) column in this 2D array consists of the matrix profile indices.
-        When `k > 1`, the output has exactly `k` columns consisting of the top-k
-        matrix profile indices.
+        Get the (top-k) matrix profile indices. When `k=1` (default), the output is
+        a 1D array consisting of the matrix profile indices. When `k > 1`, the
+        output is a 2D array that has exactly `k` columns and it consists of the
+        top-k matrix profile indices.
         """
-        return self._I.astype(np.int64)
+        if self._k == 1:
+            return self._I.flatten().astype(np.int64)
+        else:
+            return self._I.astype(np.int64)
 
     @property
     def left_P_(self):
diff --git a/tests/naive.py b/tests/naive.py
index da88a6176..198915e97 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -800,7 +800,7 @@ def __init__(self, T, m, excl_zone=None, k=1):
             self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
 
         self._l = self._T.shape[0] - m + 1
-        mp = stump(T, m, exclusion_zone=self._excl_zone, k=k)
+        mp = stump(T, m, exclusion_zone=self._excl_zone, k=self._k)
         self.P_ = mp[:, :k].astype(np.float64)
         self.I_ = mp[:, k : 2 * k].astype(np.int64)
 
@@ -814,7 +814,15 @@ def __init__(self, T, m, excl_zone=None, k=1):
 
         self._n_appended = 0
 
+        if self._k == 1:
+            self.P_ = self.P_.flatten()
+            self.I_ = self.I_.flatten()
+
     def update(self, t):
+        if self._k == 1:
+            self.P_ = self.P_.reshape(-1, 1)
+            self.I_ = self.I_.reshape(-1, 1)
+
         self._T[:] = np.roll(self._T, -1)
         self._T_isfinite[:] = np.roll(self._T_isfinite, -1)
         if np.isfinite(t):
@@ -825,8 +833,8 @@ def update(self, t):
             self._T[-1] = 0
         self._n_appended += 1
 
-        self.P_[:, :] = np.roll(self.P_, -1, axis=0)
-        self.I_[:, :] = np.roll(self.I_, -1, axis=0)
+        self.P_ = np.roll(self.P_, -1, axis=0)
+        self.I_ = np.roll(self.I_, -1, axis=0)
         self.left_P_[:] = np.roll(self.left_P_, -1)
         self.left_I_[:] = np.roll(self.left_I_, -1)
 
@@ -859,6 +867,10 @@ def update(self, t):
         self.left_P_[-1] = self.P_[-1, 0]
         self.left_I_[-1] = self.I_[-1, 0]
 
+        if self._k == 1:
+            self.P_ = self.P_.flatten()
+            self.I_ = self.I_.flatten()
+
 
 def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m):
     """
diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index 13c4a6ee4..c794d9e6b 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -33,8 +33,8 @@ def test_stumpi_self_join():
     comp_left_I = stream.left_I_
 
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)
-    ref_I = ref_mp[:, 1].reshape(-1, 1)
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2].astype(np.int64)
     ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64)
     for i, j in enumerate(ref_left_I):
@@ -210,8 +210,8 @@ def test_stumpi_init_nan_inf_self_join(substitute, substitution_locations):
 
         stream.T_[substitution_location] = substitute
         ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)
-        ref_I = ref_mp[:, 1].reshape(-1, 1)
+        ref_P = ref_mp[:, 0]
+        ref_I = ref_mp[:, 1]
 
         naive.replace_inf(ref_P)
         naive.replace_inf(comp_P)
@@ -385,8 +385,8 @@ def test_stumpi_stream_nan_inf_self_join(substitute, substitution_locations):
 
         stream.T_[30:][substitution_location] = substitute
         ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-        ref_P = ref_mp[:, 0].reshape(-1, 1)
-        ref_I = ref_mp[:, 1].reshape(-1, 1)
+        ref_P = ref_mp[:, 0]
+        ref_I = ref_mp[:, 1]
 
         naive.replace_inf(ref_P)
         naive.replace_inf(comp_P)
@@ -546,7 +546,7 @@ def test_stumpi_constant_subsequence_self_join():
     # comp_I = stream.I_
 
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)
+    ref_P = ref_mp[:, 0]
     # ref_I = ref_mp[:, 1]
 
     naive.replace_inf(ref_P)
@@ -701,7 +701,7 @@ def test_stumpi_identical_subsequence_self_join():
     # comp_I = stream.I_
 
     ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)
+    ref_P = ref_mp[:, 0]
     # ref_I = ref_mp[:, 1]
 
     naive.replace_inf(ref_P)

From ab22972dbb541da1d06a6125b9129d03ef6d4233 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 9 Aug 2022 11:19:00 -0600
Subject: [PATCH 370/416] Revise tests functions

---
 tests/test_scrump.py | 56 ++++++++++++++++++++++++++------------------
 tests/test_stumpi.py | 25 +++++++++-----------
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 41843df33..87d93ac03 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -233,8 +233,8 @@ def test_scrump_self_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -369,10 +369,15 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone)
+            ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone, k=1)
             ref_P_aux, ref_I_aux, _, _ = naive.scrump(
-                T_B, m, T_B, percentage, zone, True, s
+                T_B, m, T_B, percentage, zone, True, s, k=1
             )
+
+            # ref_P and ref_I are always 2D arrays. naive.scrump, howeve, gives
+            # 1D array when k=1.
+            ref_P_aux = ref_P_aux.reshape(-1, 1)
+            ref_I_aux = ref_I_aux.reshape(-1, 1)
             naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
 
             np.random.seed(seed)
@@ -386,6 +391,8 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             naive.replace_inf(ref_P)
             naive.replace_inf(comp_P)
 
+            ref_P = ref_P.flatten()
+            ref_I = ref_I.flatten()
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
 
@@ -401,11 +408,16 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
             seed = np.random.randint(100000)
 
             np.random.seed(seed)
-            ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s)
+            ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s, k=1)
 
             ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump(
-                T_A, m, T_B, percentage, None, False, None
+                T_A, m, T_B, percentage, None, False, None, k=1
             )
+
+            # ref_P and ref_I are always 2D arrays. naive.scrump, howeve, gives
+            # 1D array when k=1
+            ref_P_aux = ref_P_aux.reshape(-1, 1)
+            ref_I_aux = ref_I_aux.reshape(-1, 1)
             naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
             ref_left_I = ref_left_I_aux
             ref_right_I = ref_right_I_aux
@@ -428,6 +440,8 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
             naive.replace_inf(ref_P)
             naive.replace_inf(comp_P)
 
+            ref_P = ref_P.flatten()
+            ref_I = ref_I.flatten()
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
             npt.assert_almost_equal(ref_left_I, comp_left_I)
@@ -876,24 +890,20 @@ def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B):
 def test_prescrump_self_join_KNN_no_overlap():
     # This test is particularly designed to raise error in a rare case described
     # as follows: Let's denote `I[i]` as the array with length `k` that contains
-    # the start index of the best-so-far top-k nearest neighbors of `subseq i`,
+    # the start indices of the best-so-far top-k nearest neighbors of `subseq i`,
     # (`S_i`). Also, we denote `P[i]` as their corresponding ascendingly-sorted
-    # distances to `subseq i`. After calculating the distance between `subseq i`
-    # to its neighbor `subseq j` (`S_j`). Let's denote `d` as the distance between
-    # these two subseqs. `j` is eligible to be inserted into I[i] if `d` is less
-    # than the `P[i, -1]` and if `j` is not in I[i]. One might think to first perform
-    # `idx = np.searchosrted(P[i], d, side="right")` and then check if `j`
-    # is in `I[i, :idx]` or not. HOWEVER, this does  not suffice! The latter approach
-    # may result in duplicates(!) due to the imprecision in calculation of ditances.
-    # It is possible  that the distance between `S_i` and `S_j` was
-    # calculated in one of previous iterations and that value might be slightly
-    # higher than `d` (In theory, they should be exactly the same!!). Thus, althought
-    # `j` might be already in I[i], it might not appear in `I[i, :idx]`. In other
-    # words, we might have `j` as the element `I[i, w]`, where `w >= idx` and hence
-    # P[i, w] > d).  In theory, P[i, w] and d should be equal as they both show the
-    # same distance, i.e. the distance between `S_i` and `S_j`.
-    # To sum up, we need to search whole I[i] for `j`.
-
+    # distances. Let's denote `d` as the distane betweeen `S_i` and `S_j`. P[i] and
+    # I[i] must be updated if (1) `j` is not in I[i] and (2) `d` < P[i,-1]. Regarding
+    # the former condition, one needs to check the whole array I[i]. Checking the
+    # array I[i, :idx], where `idx = np.searchsorted(P[i], 'd', side='right')` is
+    # not completly correct and that is due to imprecision in numerical calculation.
+    # It may happen that `j` is not in `I[i, :idx]`, but it is in fact at `I[i, idx]`
+    # (or any other position in array I[i]). And, its corresponding distance, i.e
+    # P[i, idx], is d + 1e-5, for instance. In theory, this should be exactly `d`.
+    #  However, due to imprecision, we may calculated a slightly different value
+    # for such distance in one of previous iterations in function prescrump. This
+    #  test results in error if someone tries to change the performant code of prescrump
+    # function and check `I[i, :idx]` rather than the full array `I[i]`.
     T = np.array(
         [
             -916.64703784,
diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py
index c794d9e6b..5ab2023a7 100644
--- a/tests/test_stumpi.py
+++ b/tests/test_stumpi.py
@@ -868,22 +868,19 @@ def test_stumpi_profile_index_match():
         t = T_full[i]
         stream.update(t)
 
-        P[:, :] = np.inf
-        mask = stream.I_ >= 0
-
-        for j in range(P.shape[1]):  # `j` as j-th nearest neighbor
-            IDX = np.flatnonzero(mask[:, j])
-            P[IDX, j] = naive.distance(
-                naive.z_norm(T_full_subseq[IDX + n + 1], axis=1),
-                naive.z_norm(T_full_subseq[stream.I_[IDX, j]], axis=1),
-                axis=1,
-            )
+        P[:] = np.inf
+        indices = np.argwhere(stream.I_ >= 0).flatten()
+        P[indices] = naive.distance(
+            naive.z_norm(T_full_subseq[indices + n + 1], axis=1),
+            naive.z_norm(T_full_subseq[stream.I_[indices]], axis=1),
+            axis=1,
+        )
 
         left_P[:] = np.inf
-        idx = np.argwhere(stream.left_I_ >= 0).flatten()
-        left_P[idx] = naive.distance(
-            naive.z_norm(T_full_subseq[idx + n + 1], axis=1),
-            naive.z_norm(T_full_subseq[stream.left_I_[idx]], axis=1),
+        indices = np.argwhere(stream.left_I_ >= 0).flatten()
+        left_P[indices] = naive.distance(
+            naive.z_norm(T_full_subseq[indices + n + 1], axis=1),
+            naive.z_norm(T_full_subseq[stream.left_I_[indices]], axis=1),
             axis=1,
         )
 

From 249d928ea98ff7bf0c78f5c9746897d526192a54 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 9 Aug 2022 13:27:27 -0600
Subject: [PATCH 371/416] Improve Docstrings

---
 stumpy/scrump.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 2328bf09f..d259d4307 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -518,9 +518,9 @@ class scrump:
     ----------
     P_ : numpy.ndarray
         The updated (top-k) matrix profile. When `k=1` (default), this output is
-        a 1D array consisting of the matrix profile values. When `k > 1`, the output
+        a 1D array consisting of the matrix profile. When `k > 1`, the output
         is a 2D array that has exactly `k` columns consisting of the top-k matrix
-        profile values.
+        profile.
 
     I_ : numpy.ndarray
         The updated (top-k) matrix profile indices. When `k=1` (default), this output is
@@ -790,10 +790,10 @@ def update(self):
     @property
     def P_(self):
         """
-        Get the updated (top-k) matrix profile. When `k=1` (default), this output is
-        a 1D array consisting of the updated matrix profile values. When `k > 1`,
-        the output is a 2D array that has exactly `k` columns consisting of the
-        updated top-k matrix profile values.
+        Get the updated (top-k) matrix profile. When `k=1` (default), this output
+        is a 1D array consisting of the updated matrix profile. When `k > 1`, the
+        output is a 2D array that has exactly `k` columns consisting of the updated
+        top-k matrix profile.
         """
         if self._k == 1:
             return self._P.flatten().astype(np.float64)

From 5e515c4d1b83d6c8a1b41ae44366b0bf54fb8efa Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 12:34:06 -0600
Subject: [PATCH 372/416] Make prescrump output 1D when k is one

---
 stumpy/scrump.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 2328bf09f..ea5ebaf5d 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -455,7 +455,10 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
         k,
     )
 
-    return P, I
+    if k == 1:
+        return P.flatten().astype(np.float64), I.flatten().astype(np.int64)
+    else:
+        return P, I
 
 
 @core.non_normalized(
@@ -715,7 +718,11 @@ def __init__(
             else:
                 P, I = prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
 
-            core._merge_topk_PI(self._P, P, self._I, I)
+            # P and I are 1D when `self._k` is 1. So, we should reshape them
+            # before passing them to `_merge_topk_PI`
+            core._merge_topk_PI(
+                self._P, P.reshape(-1, self._k), self._I, I.reshape(-1, self._k)
+            )
 
         if self._ignore_trivial:
             self._diags = np.random.permutation(

From 752a22c89ab8fe6fa2230027ac3f98347b558690 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 12:40:19 -0600
Subject: [PATCH 373/416] minor change

---
 tests/naive.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 198915e97..1d03627e2 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -819,9 +819,9 @@ def __init__(self, T, m, excl_zone=None, k=1):
             self.I_ = self.I_.flatten()
 
     def update(self, t):
-        if self._k == 1:
-            self.P_ = self.P_.reshape(-1, 1)
-            self.I_ = self.I_.reshape(-1, 1)
+        # ensure than self.P_ and self.I_ are 2D
+        self.P_ = self.P_.reshape(-1, self._k)
+        self.I_ = self.I_.reshape(-1, self._k)
 
         self._T[:] = np.roll(self._T, -1)
         self._T_isfinite[:] = np.roll(self._T_isfinite, -1)
@@ -1491,6 +1491,10 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
                     P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1]
                     I[idx] = np.insert(I[idx], pos, i)[:-1]
 
+    if k == 1:
+        P = P.flatten()
+        I = I.flatten()
+
     return P, I
 
 
From e1f49afb497afbdeff890d922decf61af7563750 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 12:54:53 -0600
Subject: [PATCH 374/416] update test functions

---
 tests/naive.py       | 7 +++++++
 tests/test_scrump.py | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 1d03627e2..d6ce20adb 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -867,6 +867,7 @@ def update(self, t):
         self.left_P_[-1] = self.P_[-1, 0]
         self.left_I_[-1] = self.I_[-1, 0]
 
+        # post-processing: ensure that self.P_ and self.I_ is 1D.
         if self._k == 1:
             self.P_ = self.P_.flatten()
             self.I_ = self.I_.flatten()
@@ -1827,6 +1828,12 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
 
 def merge_topk_PI(PA, PB, IA, IB):
     k = PA.shape[1]
+    if k == 1:
+        mask = PB < PA
+        PA[mask] = PB[mask]
+        IA[mask] = IB[mask]
+        return
+
     for i in range(PA.shape[0]):
         _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
         PB[i, overlap_idx_B] = np.inf
diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 41843df33..53652a483 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -233,8 +233,8 @@ def test_scrump_self_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0].reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1].reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]  # .reshape(-1, 1)  # to match shape of comp_P when k=1
+    ref_I = ref_mp[:, 1]  # .reshape(-1, 1)  # to match shape of comp_I when k=1
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 

From 6e541ea3dcffb364a88ce80db326661eef309128 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 12:55:14 -0600
Subject: [PATCH 375/416] Modify merge_topk to support 1D input

---
 stumpy/core.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 5cd511a9f..900bb2696 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2609,6 +2609,12 @@ def _merge_topk_PI(PA, PB, IA, IB):
     None
     """
     k = PA.shape[1]
+    if k == 1:
+        mask = PB < PA
+        PA[mask] = PB[mask]
+        IA[mask] = IB[mask]
+        return
+
     tmp_P = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     for i in range(PA.shape[0]):
@@ -2669,6 +2675,12 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     None
     """
     k = ρA.shape[1]
+    if k == 1:
+        mask = ρB > ρA
+        ρA[mask] = ρB[mask]
+        IA[mask] = IB[mask]
+        return
+
     tmp_ρ = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     last_idx = k - 1

From 0bff1aee1e5d84967c0429b97d8f4264c4867d99 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 13:22:31 -0600
Subject: [PATCH 376/416] Fix merge_topk

---
 stumpy/core.py |  8 ++++----
 tests/naive.py | 10 ++++++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 900bb2696..d6e7545ea 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2608,13 +2608,13 @@ def _merge_topk_PI(PA, PB, IA, IB):
     -------
     None
     """
-    k = PA.shape[1]
-    if k == 1:
+    if PA.ndim == 1:
         mask = PB < PA
         PA[mask] = PB[mask]
         IA[mask] = IB[mask]
         return
 
+    k = PA.shape[1]
     tmp_P = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     for i in range(PA.shape[0]):
@@ -2674,13 +2674,13 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     -------
     None
     """
-    k = ρA.shape[1]
-    if k == 1:
+    if ρA.ndim == 1:
         mask = ρB > ρA
         ρA[mask] = ρB[mask]
         IA[mask] = IB[mask]
         return
 
+    k = ρA.shape[1]
     tmp_ρ = np.empty(k, dtype=np.float64)
     tmp_I = np.empty(k, dtype=np.int64)
     last_idx = k - 1
diff --git a/tests/naive.py b/tests/naive.py
index d6ce20adb..52e85bf42 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1827,13 +1827,13 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
 
 
 def merge_topk_PI(PA, PB, IA, IB):
-    k = PA.shape[1]
-    if k == 1:
+    if PA.ndim == 1:
         mask = PB < PA
         PA[mask] = PB[mask]
         IA[mask] = IB[mask]
         return
 
+    k = PA.shape[1]
     for i in range(PA.shape[0]):
         _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
         PB[i, overlap_idx_B] = np.inf
@@ -1869,6 +1869,12 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     # merging `ρB` and `ρA` ascendingly while choosing `ρB` over `ρA` in case of
     # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second
     # half of this array, and discard the first half.
+    if ρA.ndim == 1:
+        mask = ρB > ρA
+        ρA[mask] = ρB[mask]
+        IA[mask] = IB[mask]
+        return
+
     k = ρA.shape[1]
     for i in range(ρA.shape[0]):
         _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)

From 39e5ea30f89d174c653add18f5027d201edc1d06 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 14:15:28 -0600
Subject: [PATCH 377/416] Fix shape of variables in test functions

---
 tests/test_scrump.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 8141e8e39..1341dc1a1 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -233,8 +233,8 @@ def test_scrump_self_join_full(T_A, T_B):
     zone = int(np.ceil(m / 4))
 
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True)
-    ref_P = ref_mp[:, 0]  # .reshape(-1, 1)  # to match shape of comp_P when k=1
-    ref_I = ref_mp[:, 1]  # .reshape(-1, 1)  # to match shape of comp_I when k=1
+    ref_P = ref_mp[:, 0]
+    ref_I = ref_mp[:, 1]
     ref_left_I = ref_mp[:, 2]
     ref_right_I = ref_mp[:, 3]
 
@@ -374,10 +374,6 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
                 T_B, m, T_B, percentage, zone, True, s, k=1
             )
 
-            # ref_P and ref_I are always 2D arrays. naive.scrump, howeve, gives
-            # 1D array when k=1.
-            ref_P_aux = ref_P_aux.reshape(-1, 1)
-            ref_I_aux = ref_I_aux.reshape(-1, 1)
             naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
 
             np.random.seed(seed)
@@ -414,10 +410,6 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
                 T_A, m, T_B, percentage, None, False, None, k=1
             )
 
-            # ref_P and ref_I are always 2D arrays. naive.scrump, howeve, gives
-            # 1D array when k=1
-            ref_P_aux = ref_P_aux.reshape(-1, 1)
-            ref_I_aux = ref_I_aux.reshape(-1, 1)
             naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux)
             ref_left_I = ref_left_I_aux
             ref_right_I = ref_right_I_aux

From e9fd14c2ecd81db9f047e02b7e4be9c6d2589d1a Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 14:24:22 -0600
Subject: [PATCH 378/416] Remove unnecessary flatten operation

---
 tests/test_stimp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_stimp.py b/tests/test_stimp.py
index f30514193..a56bec695 100644
--- a/tests/test_stimp.py
+++ b/tests/test_stimp.py
@@ -52,7 +52,7 @@ def test_stimp_1_percent(T):
         tmp_P, tmp_I = naive.prescrump(T, m, T, s=s, exclusion_zone=zone)
         ref_P, ref_I, _, _ = naive.scrump(T, m, T, percentage, zone, True, s)
         naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I)
-        ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P.flatten()
+        ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P
 
     # Compare raw pan
     cmp_PAN = pan._PAN
@@ -107,7 +107,7 @@ def test_stimp_max_m(T):
         tmp_P, tmp_I = naive.prescrump(T, m, T, s=s, exclusion_zone=zone)
         ref_P, ref_I, _, _ = naive.scrump(T, m, T, percentage, zone, True, s)
         naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I)
-        ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P.flatten()
+        ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P
 
     # Compare raw pan
     cmp_PAN = pan._PAN

From d3858298a7f52f8e0c73b86f44d71fa42f3a59d0 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sun, 28 Aug 2022 14:54:02 -0600
Subject: [PATCH 379/416] Update test function for case k=1

---
 tests/test_core.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 18b9f3d8e..814e3f488 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1078,6 +1078,21 @@ def test_merge_topk_PI_without_overlap():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
+        # if k=1, make them 1D
+        if k == 1:
+            PA = PA.reshape(
+                -1,
+            )
+            IA = IA.reshape(
+                -1,
+            )
+            PB = PB.reshape(
+                -1,
+            )
+            IB = IB.reshape(
+                -1,
+            )
+
         ref_P = PA.copy()
         ref_I = IA.copy()
 
@@ -1096,6 +1111,8 @@ def test_merge_topk_PI_with_overlap():
     # is overlap between row IA[i] and row IB[i].
     n = 50
     for k in range(1, 6):
+        # note: we do not have overlap issue when k is 1. The `k=1` is considered
+        # for the sake of consistency with the `without-overlap` test function.
         PA = np.random.rand(n * k).reshape(n, k)
         PB = np.random.rand(n * k).reshape(n, k)
 
@@ -1119,6 +1136,21 @@ def test_merge_topk_PI_with_overlap():
         PB[:, :] = np.take_along_axis(PB, IDX, axis=1)
         IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
 
+        # if k=1, make them 1D
+        if k == 1:
+            PA = PA.reshape(
+                -1,
+            )
+            IA = IA.reshape(
+                -1,
+            )
+            PB = PB.reshape(
+                -1,
+            )
+            IB = IB.reshape(
+                -1,
+            )
+
         ref_P = PA.copy()
         ref_I = IA.copy()
 
@@ -1149,6 +1181,21 @@ def test_merge_topk_ρI_without_overlap():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
+        # if k=1, make them 1D
+        if k == 1:
+            ρA = ρA.reshape(
+                -1,
+            )
+            IA = IA.reshape(
+                -1,
+            )
+            ρB = ρB.reshape(
+                -1,
+            )
+            IB = IB.reshape(
+                -1,
+            )
+
         ref_ρ = ρA.copy()
         ref_I = IA.copy()
 
@@ -1167,6 +1214,8 @@ def test_merge_topk_ρI_with_overlap():
     # is overlap between row IA[i] and row IB[i].
     n = 50
     for k in range(1, 6):
+        # note: we do not have overlap issue when k is 1. The `k=1` is considered
+        # for the sake of consistency with the `without-overlap` test function.
         ρA = np.random.rand(n * k).reshape(n, k)
         ρB = np.random.rand(n * k).reshape(n, k)
 
@@ -1190,6 +1239,21 @@ def test_merge_topk_ρI_with_overlap():
         ρB[:, :] = np.take_along_axis(ρB, IDX, axis=1)
         IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
 
+        # if k=1, make them 1D
+        if k == 1:
+            ρA = ρA.reshape(
+                -1,
+            )
+            IA = IA.reshape(
+                -1,
+            )
+            ρB = ρB.reshape(
+                -1,
+            )
+            IB = IB.reshape(
+                -1,
+            )
+
         ref_ρ = ρA.copy()
         ref_I = IA.copy()
 

From 90ab9e36ed5ff5ebfdc22334597189c2c91ce01d Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Mon, 29 Aug 2022 13:07:20 -0600
Subject: [PATCH 380/416] revise comment

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 52e85bf42..2e7b9d160 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -867,7 +867,7 @@ def update(self, t):
         self.left_P_[-1] = self.P_[-1, 0]
         self.left_I_[-1] = self.I_[-1, 0]
 
-        # post-processing: ensure that self.P_ and self.I_ is 1D.
+        # post-processing: ensure that self.P_ and self.I_ are 1D.
         if self._k == 1:
             self.P_ = self.P_.flatten()
             self.I_ = self.I_.flatten()

From 0b163eb7f86d04602f5714e6b2e0199d40ce5ff5 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Mon, 29 Aug 2022 13:09:21 -0600
Subject: [PATCH 381/416] Avoid using return in the middle of code

---
 stumpy/core.py | 100 ++++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 51 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index d6e7545ea..e535d4163 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2612,33 +2612,32 @@ def _merge_topk_PI(PA, PB, IA, IB):
         mask = PB < PA
         PA[mask] = PB[mask]
         IA[mask] = IB[mask]
-        return
-
-    k = PA.shape[1]
-    tmp_P = np.empty(k, dtype=np.float64)
-    tmp_I = np.empty(k, dtype=np.int64)
-    for i in range(PA.shape[0]):
-        overlap = set(IB[i]).intersection(set(IA[i]))
-        aj, bj = 0, 0
-        idx = 0
-        # 2 * k iterations are required to traverse both A and B if needed.
-        for _ in range(2 * k):
-            if idx >= k:
-                break
-            if bj < k and PB[i, bj] < PA[i, aj]:
-                if IB[i, bj] not in overlap:
-                    tmp_P[idx] = PB[i, bj]
-                    tmp_I[idx] = IB[i, bj]
+    else:
+        k = PA.shape[1]
+        tmp_P = np.empty(k, dtype=np.float64)
+        tmp_I = np.empty(k, dtype=np.int64)
+        for i in range(PA.shape[0]):
+            overlap = set(IB[i]).intersection(set(IA[i]))
+            aj, bj = 0, 0
+            idx = 0
+            # 2 * k iterations are required to traverse both A and B if needed.
+            for _ in range(2 * k):
+                if idx >= k:
+                    break
+                if bj < k and PB[i, bj] < PA[i, aj]:
+                    if IB[i, bj] not in overlap:
+                        tmp_P[idx] = PB[i, bj]
+                        tmp_I[idx] = IB[i, bj]
+                        idx += 1
+                    bj += 1
+                else:
+                    tmp_P[idx] = PA[i, aj]
+                    tmp_I[idx] = IA[i, aj]
                     idx += 1
-                bj += 1
-            else:
-                tmp_P[idx] = PA[i, aj]
-                tmp_I[idx] = IA[i, aj]
-                idx += 1
-                aj += 1
+                    aj += 1
 
-        PA[i] = tmp_P
-        IA[i] = tmp_I
+            PA[i] = tmp_P
+            IA[i] = tmp_I
 
 
 @njit
@@ -2678,34 +2677,33 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
         mask = ρB > ρA
         ρA[mask] = ρB[mask]
         IA[mask] = IB[mask]
-        return
-
-    k = ρA.shape[1]
-    tmp_ρ = np.empty(k, dtype=np.float64)
-    tmp_I = np.empty(k, dtype=np.int64)
-    last_idx = k - 1
-    for i in range(len(ρA)):
-        overlap = set(IB[i]).intersection(set(IA[i]))
-        aj, bj = last_idx, last_idx
-        idx = last_idx
-        # 2 * k iterations are required to traverse both A and B if needed.
-        for _ in range(2 * k):
-            if idx < 0:
-                break
-            if bj >= 0 and ρB[i, bj] > ρA[i, aj]:
-                if IB[i, bj] not in overlap:
-                    tmp_ρ[idx] = ρB[i, bj]
-                    tmp_I[idx] = IB[i, bj]
+    else:
+        k = ρA.shape[1]
+        tmp_ρ = np.empty(k, dtype=np.float64)
+        tmp_I = np.empty(k, dtype=np.int64)
+        last_idx = k - 1
+        for i in range(len(ρA)):
+            overlap = set(IB[i]).intersection(set(IA[i]))
+            aj, bj = last_idx, last_idx
+            idx = last_idx
+            # 2 * k iterations are required to traverse both A and B if needed.
+            for _ in range(2 * k):
+                if idx < 0:
+                    break
+                if bj >= 0 and ρB[i, bj] > ρA[i, aj]:
+                    if IB[i, bj] not in overlap:
+                        tmp_ρ[idx] = ρB[i, bj]
+                        tmp_I[idx] = IB[i, bj]
+                        idx -= 1
+                    bj -= 1
+                else:
+                    tmp_ρ[idx] = ρA[i, aj]
+                    tmp_I[idx] = IA[i, aj]
                     idx -= 1
-                bj -= 1
-            else:
-                tmp_ρ[idx] = ρA[i, aj]
-                tmp_I[idx] = IA[i, aj]
-                idx -= 1
-                aj -= 1
+                    aj -= 1
 
-        ρA[i] = tmp_ρ
-        IA[i] = tmp_I
+            ρA[i] = tmp_ρ
+            IA[i] = tmp_I
 
 
 @njit

From bf6df9b3d727d00c194edf766fa4c382ebe17ab5 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 30 Aug 2022 12:55:44 -0600
Subject: [PATCH 382/416] Add new private function to get 2D ouput when k=1

---
 stumpy/scrump.py | 96 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 21 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index ea5ebaf5d..d4fc9b239 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -250,7 +250,7 @@ def _compute_PI(
     parallel=True,
     fastmath=True,
 )
-def _prescrump(
+def _compute_approx_PI(
     T_A,
     T_B,
     m,
@@ -368,12 +368,11 @@ def _prescrump(
     return np.sqrt(P_squared[0]), I[0]
 
 
-@core.non_normalized(scraamp.prescraamp)
-def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
+def _prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     """
-    A convenience wrapper around the Numba JIT-compiled parallelized `_prescrump`
-    function which computes the approximate (top-k) matrix profile according to
-    the preSCRIMP algorithm
+    A convenience wrapper around the Numba JIT-compiled parallelized
+    `_compute_approx_PI` function which computes the approximate (top-k) matrix
+    profile according to the preSCRIMP algorithm.
 
     Parameters
     ----------
@@ -408,15 +407,12 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     Returns
     -------
     P : numpy.ndarray
-        The (top-k) matrix profile. When k = 1 (default), the first (and only) column
-        in this 2D array consists of the matrix profile. When k > 1, the output has
-        exactly `k` columns consisting of the top-k matrix profile.
+        The (top-k) matrix profile. This 2D array has exactly `k` columns consisting
+        of the top-k matrix profile.
 
     I : numpy.ndarray
-        The (top-k) matrix profile indices. When k = 1 (default), the first (and only)
-        column in this 2D array consists of the matrix profile indices. When k > 1,
-        the output has exactly `k` columns consisting of the top-k matrix profile
-        indices.
+        The (top-k) matrix profile indices. This 2D array has exactly `k` columns
+        consisting of the top-k matrix profile indices.
 
     Notes
     -----
@@ -441,7 +437,7 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
         s = excl_zone
 
     indices = np.random.permutation(range(0, l, s)).astype(np.int64)
-    P, I = _prescrump(
+    P, I = _compute_approx_PI(
         T_A,
         T_B,
         m,
@@ -455,6 +451,68 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
         k,
     )
 
+    return P, I
+
+
+@core.non_normalized(scraamp.prescraamp)
+def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
+    """
+    A convenience wrapper around `prescrump` function which computes the approximate
+    (top-k) matrix profile according to the preSCRIMP algorithm. The output is 1D
+    when `k=1`.
+
+    Parameters
+    ----------
+    T_A : numpy.ndarray
+        The time series or sequence for which to compute the matrix profile
+
+    m : int
+        Window size
+
+    T_B : numpy.ndarray, default None
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
+
+    s : int, default None
+        The sampling interval that defaults to
+        `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
+
+    normalize : bool, default True
+        When set to `True`, this z-normalizes subsequences prior to computing distances.
+        Otherwise, this function gets re-routed to its complementary non-normalized
+        equivalent set in the `@core.non_normalized` function decorator.
+
+    p : float, default 2.0
+        The p-norm to apply for computing the Minkowski distance. This parameter is
+        ignored when `normalize == True`.
+
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
+    Returns
+    -------
+    P : numpy.ndarray
+        The (top-k) matrix profile. When `k = 1` (default), this is a 1D array
+        consisting of the matrix profile. When `k > 1`, the output is  a 2D array
+        that has exactly `k` columns consisting of the top-k matrix profile.
+
+    I : numpy.ndarray
+        The (top-k) matrix profile indices. When `k = 1` (default), this is a 1D array
+        consisting of the matrix profile indices. When `k > 1`, the output is  a
+        2D array that has exactly  `k` columns consisting of the top-k matrix profile
+        indices.
+
+    Notes
+    -----
+    `DOI: 10.1109/ICDM.2018.00099 \
+    <https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf>`__
+
+    See Algorithm 2
+    """
+    P, I = _prescrump(T_A, m, T_B, s, normalize, p, k)
+
     if k == 1:
         return P.flatten().astype(np.float64), I.flatten().astype(np.int64)
     else:
@@ -714,15 +772,11 @@ def __init__(
 
         if pre_scrump:
             if self._ignore_trivial:
-                P, I = prescrump(T_A, m, s=s, k=self._k)
+                P, I = _prescrump(T_A, m, s=s, k=self._k)
             else:
-                P, I = prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
+                P, I = _prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
 
-            # P and I are 1D when `self._k` is 1. So, we should reshape them
-            # before passing them to `_merge_topk_PI`
-            core._merge_topk_PI(
-                self._P, P.reshape(-1, self._k), self._I, I.reshape(-1, self._k)
-            )
+            core._merge_topk_PI(self._P, P, self._I, I)
 
         if self._ignore_trivial:
             self._diags = np.random.permutation(

From e6a05d625a176500a5e58bb187dc6771ff9883c4 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 30 Aug 2022 13:03:22 -0600
Subject: [PATCH 383/416] Remove check for 1D in merge_topk

---
 tests/test_core.py | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 814e3f488..c1e219744 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1078,21 +1078,6 @@ def test_merge_topk_PI_without_overlap():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
-        # if k=1, make them 1D
-        if k == 1:
-            PA = PA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            PB = PB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_P = PA.copy()
         ref_I = IA.copy()
 
@@ -1136,21 +1121,6 @@ def test_merge_topk_PI_with_overlap():
         PB[:, :] = np.take_along_axis(PB, IDX, axis=1)
         IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
 
-        # if k=1, make them 1D
-        if k == 1:
-            PA = PA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            PB = PB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_P = PA.copy()
         ref_I = IA.copy()
 

From fe905d23d744c1b34c4d9384721265e18920502e Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 30 Aug 2022 13:27:18 -0600
Subject: [PATCH 384/416] Revise test functions

---
 tests/test_core.py | 72 +++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 30 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index c1e219744..1765c048d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1134,6 +1134,27 @@ def test_merge_topk_PI_with_overlap():
         npt.assert_almost_equal(ref_I, comp_I)
 
 
+def test_merge_topk_PI_with_1D_input():
+    n = 50
+    PA = np.random.rand(n)
+    PB = np.random.rand(n)
+
+    IA = np.arange(n)
+    IB = IA + n
+
+    ref_P = PA.copy()
+    ref_I = IA.copy()
+
+    comp_P = PA.copy()
+    comp_I = IA.copy()
+
+    naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy())
+    core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy())
+
+    npt.assert_almost_equal(ref_P, comp_P)
+    npt.assert_almost_equal(ref_I, comp_I)
+
+
 def test_merge_topk_ρI_without_overlap():
     # This is to test function `core._merge_topk_ρI(ρA, ρB, IA, IB)` when there
     # is no overlap between row IA[i] and row IB[i].
@@ -1151,21 +1172,6 @@ def test_merge_topk_ρI_without_overlap():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
-        # if k=1, make them 1D
-        if k == 1:
-            ρA = ρA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            ρB = ρB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_ρ = ρA.copy()
         ref_I = IA.copy()
 
@@ -1209,21 +1215,6 @@ def test_merge_topk_ρI_with_overlap():
         ρB[:, :] = np.take_along_axis(ρB, IDX, axis=1)
         IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
 
-        # if k=1, make them 1D
-        if k == 1:
-            ρA = ρA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            ρB = ρB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_ρ = ρA.copy()
         ref_I = IA.copy()
 
@@ -1237,6 +1228,27 @@ def test_merge_topk_ρI_with_overlap():
         npt.assert_almost_equal(ref_I, comp_I)
 
 
+def test_merge_topk_ρI_with_1D_input():
+    n = 50
+    ρA = np.random.rand(n)
+    ρB = np.random.rand(n)
+
+    IA = np.arange(n)
+    IB = IA + n
+
+    ref_ρ = ρA.copy()
+    ref_I = IA.copy()
+
+    comp_ρ = ρA.copy()
+    comp_I = IA.copy()
+
+    naive.merge_topk_PI(ref_ρ, ρB.copy(), ref_I, IB.copy())
+    core._merge_topk_PI(comp_ρ, ρB.copy(), comp_I, IB.copy())
+
+    npt.assert_almost_equal(ref_ρ, comp_ρ)
+    npt.assert_almost_equal(ref_I, comp_I)
+
+
 def test_shift_insert_at_index():
     for k in range(1, 6):
         a = np.random.rand(k)

From 8e8d48b447cf10abc505eb5d190243a7611fe527 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 30 Aug 2022 13:39:05 -0600
Subject: [PATCH 385/416] Revise docstring to provide description for 1D case

---
 stumpy/core.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e535d4163..fd9a3e511 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2578,12 +2578,15 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 @njit
 def _merge_topk_PI(PA, PB, IA, IB):
     """
-    Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place) while
-    always prioritizing the values of `PA` over the values of `PB` in case of ties.
-    (i.e., values from `PB` are always inserted to the right of values from `PA`).
-    Also, update `IA` accordingly. In case of overlapping values between two arrays
-    IA[i] and IB[i], the ones in IB[i] (and their corresponding values in PB[i])
-    are ignored throughout the updating process of IA[i] (and PA[i]).
+    Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place).
+    When the inputs are 1D arrays, PA[i] is updated if it is less than PB[i]. In
+    such case, PA[i] and IA[i] are replaced with PB[i] and IB[i], respectively.
+    When the inputs are 2D arrays, always prioritizing the values of `PA` over the
+    values of `PB` in case of ties. (i.e., values from `PB` are always inserted to
+    the right of values from `PA`). Also, update `IA` accordingly. In case of
+    overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and
+    their corresponding values in PB[i]) are ignored throughout the updating process o
+    f IA[i] (and PA[i]).
 
     Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
     keeps `top-k` smallest values.
@@ -2643,12 +2646,15 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
-    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) while
-    always prioritizing the values of `ρA` over the values of `ρB` in case of ties.
-    (i.e., values from `ρB` are always inserted to the left of values from `ρA`).
-    Also, update `IA` accordingly. In case of overlapping values between two arrays
-    IA[i] and IB[i], the ones in IB[i] (and their corresponding values in ρB[i])
-    are ignored throughout the updating process of IA[i] (and ρA[i]).
+    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place).
+    When the inputs are 1D arrays, ρA[i] is updated if it is more than ρB[i]. In
+    such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i], respectively.
+    When the inputs are 2D arrays, always prioritizing the values of `ρA` over
+    the values of `ρB` in case of ties. (i.e., values from `ρB` are always inserted
+    to the left of values from `ρA`). Also, update `IA` accordingly. In case of
+    overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and
+    their corresponding values in ρB[i]) are ignored throughout the updating process
+    of IA[i] (and ρA[i]).
 
     Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
     keeps `top-k` largest values.

From 3bebc47457cbc1572cf138802960da57b0b42341 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 30 Aug 2022 14:29:53 -0600
Subject: [PATCH 386/416] Add overlap check in merge_topk with 1D input

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index fd9a3e511..6e3046f9f 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2612,7 +2612,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
     None
     """
     if PA.ndim == 1:
-        mask = PB < PA
+        mask = (PB < PA) & (IB != IA)
         PA[mask] = PB[mask]
         IA[mask] = IB[mask]
     else:
@@ -2680,7 +2680,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     None
     """
     if ρA.ndim == 1:
-        mask = ρB > ρA
+        mask = (ρB > ρA) & (IB != IA)
         ρA[mask] = ρB[mask]
         IA[mask] = IB[mask]
     else:

From 4fcf797a1f4235ffa28912eb8b762dc42f7be6a6 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 12:36:25 -0600
Subject: [PATCH 387/416] Add overlap check in 1D and revise docstring

---
 stumpy/core.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e535d4163..6e3046f9f 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2578,12 +2578,15 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 @njit
 def _merge_topk_PI(PA, PB, IA, IB):
     """
-    Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place) while
-    always prioritizing the values of `PA` over the values of `PB` in case of ties.
-    (i.e., values from `PB` are always inserted to the right of values from `PA`).
-    Also, update `IA` accordingly. In case of overlapping values between two arrays
-    IA[i] and IB[i], the ones in IB[i] (and their corresponding values in PB[i])
-    are ignored throughout the updating process of IA[i] (and PA[i]).
+    Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place).
+    When the inputs are 1D arrays, PA[i] is updated if it is less than PB[i]. In
+    such case, PA[i] and IA[i] are replaced with PB[i] and IB[i], respectively.
+    When the inputs are 2D arrays, always prioritizing the values of `PA` over the
+    values of `PB` in case of ties. (i.e., values from `PB` are always inserted to
+    the right of values from `PA`). Also, update `IA` accordingly. In case of
+    overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and
+    their corresponding values in PB[i]) are ignored throughout the updating process o
+    f IA[i] (and PA[i]).
 
     Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
     keeps `top-k` smallest values.
@@ -2609,7 +2612,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
     None
     """
     if PA.ndim == 1:
-        mask = PB < PA
+        mask = (PB < PA) & (IB != IA)
         PA[mask] = PB[mask]
         IA[mask] = IB[mask]
     else:
@@ -2643,12 +2646,15 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @njit
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
-    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place) while
-    always prioritizing the values of `ρA` over the values of `ρB` in case of ties.
-    (i.e., values from `ρB` are always inserted to the left of values from `ρA`).
-    Also, update `IA` accordingly. In case of overlapping values between two arrays
-    IA[i] and IB[i], the ones in IB[i] (and their corresponding values in ρB[i])
-    are ignored throughout the updating process of IA[i] (and ρA[i]).
+    Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place).
+    When the inputs are 1D arrays, ρA[i] is updated if it is more than ρB[i]. In
+    such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i], respectively.
+    When the inputs are 2D arrays, always prioritizing the values of `ρA` over
+    the values of `ρB` in case of ties. (i.e., values from `ρB` are always inserted
+    to the left of values from `ρA`). Also, update `IA` accordingly. In case of
+    overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and
+    their corresponding values in ρB[i]) are ignored throughout the updating process
+    of IA[i] (and ρA[i]).
 
     Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function
     keeps `top-k` largest values.
@@ -2674,7 +2680,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     None
     """
     if ρA.ndim == 1:
-        mask = ρB > ρA
+        mask = (ρB > ρA) & (IB != IA)
         ρA[mask] = ρB[mask]
         IA[mask] = IB[mask]
     else:

From 41097a7a2462fdfd6b49cf52f1030cca6cda2a6d Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 12:36:46 -0600
Subject: [PATCH 388/416] Add separate test function for _merge_topk 1D case

---
 tests/test_core.py | 102 +++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 60 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 814e3f488..1765c048d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1078,21 +1078,6 @@ def test_merge_topk_PI_without_overlap():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
-        # if k=1, make them 1D
-        if k == 1:
-            PA = PA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            PB = PB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_P = PA.copy()
         ref_I = IA.copy()
 
@@ -1136,21 +1121,6 @@ def test_merge_topk_PI_with_overlap():
         PB[:, :] = np.take_along_axis(PB, IDX, axis=1)
         IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
 
-        # if k=1, make them 1D
-        if k == 1:
-            PA = PA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            PB = PB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_P = PA.copy()
         ref_I = IA.copy()
 
@@ -1164,6 +1134,27 @@ def test_merge_topk_PI_with_overlap():
         npt.assert_almost_equal(ref_I, comp_I)
 
 
+def test_merge_topk_PI_with_1D_input():
+    n = 50
+    PA = np.random.rand(n)
+    PB = np.random.rand(n)
+
+    IA = np.arange(n)
+    IB = IA + n
+
+    ref_P = PA.copy()
+    ref_I = IA.copy()
+
+    comp_P = PA.copy()
+    comp_I = IA.copy()
+
+    naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy())
+    core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy())
+
+    npt.assert_almost_equal(ref_P, comp_P)
+    npt.assert_almost_equal(ref_I, comp_I)
+
+
 def test_merge_topk_ρI_without_overlap():
     # This is to test function `core._merge_topk_ρI(ρA, ρB, IA, IB)` when there
     # is no overlap between row IA[i] and row IB[i].
@@ -1181,21 +1172,6 @@ def test_merge_topk_ρI_without_overlap():
         IA = np.arange(n * k).reshape(n, k)
         IB = IA + n * k
 
-        # if k=1, make them 1D
-        if k == 1:
-            ρA = ρA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            ρB = ρB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_ρ = ρA.copy()
         ref_I = IA.copy()
 
@@ -1239,21 +1215,6 @@ def test_merge_topk_ρI_with_overlap():
         ρB[:, :] = np.take_along_axis(ρB, IDX, axis=1)
         IB[:, :] = np.take_along_axis(IB, IDX, axis=1)
 
-        # if k=1, make them 1D
-        if k == 1:
-            ρA = ρA.reshape(
-                -1,
-            )
-            IA = IA.reshape(
-                -1,
-            )
-            ρB = ρB.reshape(
-                -1,
-            )
-            IB = IB.reshape(
-                -1,
-            )
-
         ref_ρ = ρA.copy()
         ref_I = IA.copy()
 
@@ -1267,6 +1228,27 @@ def test_merge_topk_ρI_with_overlap():
         npt.assert_almost_equal(ref_I, comp_I)
 
 
+def test_merge_topk_ρI_with_1D_input():
+    n = 50
+    ρA = np.random.rand(n)
+    ρB = np.random.rand(n)
+
+    IA = np.arange(n)
+    IB = IA + n
+
+    ref_ρ = ρA.copy()
+    ref_I = IA.copy()
+
+    comp_ρ = ρA.copy()
+    comp_I = IA.copy()
+
+    naive.merge_topk_PI(ref_ρ, ρB.copy(), ref_I, IB.copy())
+    core._merge_topk_PI(comp_ρ, ρB.copy(), comp_I, IB.copy())
+
+    npt.assert_almost_equal(ref_ρ, comp_ρ)
+    npt.assert_almost_equal(ref_I, comp_I)
+
+
 def test_shift_insert_at_index():
     for k in range(1, 6):
         a = np.random.rand(k)

From 948d674d4b0fc5df3dbb2c834f79f30f3eb30d86 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 13:03:51 -0600
Subject: [PATCH 389/416] Add preprocessing function for prescrump

---
 stumpy/scrump.py | 125 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 102 insertions(+), 23 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index ea5ebaf5d..1ab30c89d 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -14,6 +14,80 @@
 logger = logging.getLogger(__name__)
 
 
+def _preprocess_prescrump(T_A, m, T_B=None, s=None):
+    """
+    Performs several preprocessings and returns outputs that are needed for the
+    prescrump algorithm.
+
+    Parameters
+    ----------
+    T_A : numpy.ndarray
+        The time series or sequence for which to compute the matrix profile
+
+    m : int
+        Window size
+
+    T_B : numpy.ndarray, default None
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
+
+    s : int, default None
+        The sampling interval that defaults to
+        `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
+
+    Returns
+    -------
+    T_A : numpy.ndarray
+        A copy of the time series input `T_A`, where all NaN and inf values
+        are replaced with zero.
+
+    T_B : numpy.ndarray
+        A copy of the time series input `T_B`, where all NaN and inf values
+        are replaced with zero. If the input `T_B` is not provided (default),
+        this array is just a copy of `T_A`.
+
+    μ_Q : numpy.ndarray
+        Sliding window mean for `T_A`
+
+    σ_Q : numpy.ndarray
+        Sliding window standard deviation for `T_A`
+
+    M_T : numpy.ndarray
+        Sliding window mean for `T_B`
+
+    Σ_T : numpy.ndarray
+        Sliding window standard deviation for `T_B`
+
+    indices : numpy.ndarray
+        The subsequence indices to compute `prescrump` for
+
+    s : int
+        The sampling interval that defaults to
+        `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
+
+    excl_zone : int
+        The half width for the exclusion zone
+    """
+    if T_B is None:
+        T_B = T_A
+        excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+    else:
+        excl_zone = None
+
+    T_A, μ_Q, σ_Q = core.preprocess(T_A, m)
+    T_B, M_T, Σ_T = core.preprocess(T_B, m)
+
+    n_A = T_A.shape[0]
+    l = n_A - m + 1
+
+    if s is None:  # pragma: no cover
+        s = excl_zone
+
+    indices = np.random.permutation(range(0, l, s)).astype(np.int64)
+
+    return (T_A, T_B, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone)
+
+
 @njit(fastmath=True)
 def _compute_PI(
     T_A,
@@ -425,22 +499,10 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
 
     See Algorithm 2
     """
-    if T_B is None:
-        T_B = T_A
-        excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-    else:
-        excl_zone = None
-
-    T_A, μ_Q, σ_Q = core.preprocess(T_A, m)
-    T_B, M_T, Σ_T = core.preprocess(T_B, m)
-
-    n_A = T_A.shape[0]
-    l = n_A - m + 1
-
-    if s is None:  # pragma: no cover
-        s = excl_zone
+    T_A, T_B, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone = _preprocess_prescrump(
+        T_A, m, T_B=T_B, s=s
+    )
 
-    indices = np.random.permutation(range(0, l, s)).astype(np.int64)
     P, I = _prescrump(
         T_A,
         T_B,
@@ -714,15 +776,32 @@ def __init__(
 
         if pre_scrump:
             if self._ignore_trivial:
-                P, I = prescrump(T_A, m, s=s, k=self._k)
+                (
+                    T_A,
+                    T_B,
+                    μ_Q,
+                    σ_Q,
+                    M_T,
+                    Σ_T,
+                    indices,
+                    s,
+                    excl_zone,
+                ) = _preprocess_prescrump(T_A, m, s=s)
             else:
-                P, I = prescrump(T_A, m, T_B=T_B, s=s, k=self._k)
-
-            # P and I are 1D when `self._k` is 1. So, we should reshape them
-            # before passing them to `_merge_topk_PI`
-            core._merge_topk_PI(
-                self._P, P.reshape(-1, self._k), self._I, I.reshape(-1, self._k)
-            )
+                (
+                    T_A,
+                    T_B,
+                    μ_Q,
+                    σ_Q,
+                    M_T,
+                    Σ_T,
+                    indices,
+                    s,
+                    excl_zone,
+                ) = _preprocess_prescrump(T_A, m, T_B=T_B, s=s)
+
+            P, I = _prescrump(T_A, T_B, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone, k)
+            core._merge_topk_PI(self._P, P, self._I, I)
 
         if self._ignore_trivial:
             self._diags = np.random.permutation(

From 391c97dd7c47ad3e53c6120ecaff737899833737 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 13:42:21 -0600
Subject: [PATCH 390/416] Update test function

---
 tests/test_scrump.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_scrump.py b/tests/test_scrump.py
index 1341dc1a1..de978c856 100644
--- a/tests/test_scrump.py
+++ b/tests/test_scrump.py
@@ -387,8 +387,8 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
             naive.replace_inf(ref_P)
             naive.replace_inf(comp_P)
 
-            ref_P = ref_P.flatten()
-            ref_I = ref_I.flatten()
+            ref_P = ref_P
+            ref_I = ref_I
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
 
@@ -432,8 +432,8 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages):
             naive.replace_inf(ref_P)
             naive.replace_inf(comp_P)
 
-            ref_P = ref_P.flatten()
-            ref_I = ref_I.flatten()
+            ref_P = ref_P
+            ref_I = ref_I
             npt.assert_almost_equal(ref_P, comp_P)
             npt.assert_almost_equal(ref_I, comp_I)
             npt.assert_almost_equal(ref_left_I, comp_left_I)

From 4d7cccfb1a96c70dc5e25033b644d7b7324d887b Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 14:33:04 -0600
Subject: [PATCH 391/416] fix missing argument

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 1ab30c89d..49825546b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -800,7 +800,7 @@ def __init__(
                     excl_zone,
                 ) = _preprocess_prescrump(T_A, m, T_B=T_B, s=s)
 
-            P, I = _prescrump(T_A, T_B, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone, k)
+            P, I = _prescrump(T_A, T_B, m, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone, k)
             core._merge_topk_PI(self._P, P, self._I, I)
 
         if self._ignore_trivial:

From e8814cf2181b9d5333d2b79bc0fba03a5f4e870d Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 14:48:02 -0600
Subject: [PATCH 392/416] Fix Docstring

---
 stumpy/scrump.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 49825546b..fa47c44fd 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -482,14 +482,14 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     Returns
     -------
     P : numpy.ndarray
-        The (top-k) matrix profile. When k = 1 (default), the first (and only) column
-        in this 2D array consists of the matrix profile. When k > 1, the output has
-        exactly `k` columns consisting of the top-k matrix profile.
+        The (top-k) matrix profile. When k = 1 (default), this is a 1D array
+        consisting of the matrix profile. When k > 1, the output is a 2D array that
+        has exactly `k` columns consisting of the top-k matrix profile.
 
     I : numpy.ndarray
-        The (top-k) matrix profile indices. When k = 1 (default), the first (and only)
-        column in this 2D array consists of the matrix profile indices. When k > 1,
-        the output has exactly `k` columns consisting of the top-k matrix profile
+        The (top-k) matrix profile indices. When k = 1 (default), this is a 1D array
+        consisting of the matrix profile indices. When k > 1, the output is a 2D
+        array that has exactly `k` columns consisting of the top-k matrix profile
         indices.
 
     Notes

From 39469153a503c38be5d184b50e044549bbecfe87 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 16:06:32 -0600
Subject: [PATCH 393/416] Put back the missing decorator

---
 stumpy/scrump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index e82c454e5..921aae7a6 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -442,6 +442,7 @@ def _prescrump(
     return np.sqrt(P_squared[0]), I[0]
 
 
+@core.non_normalized(scraamp.prescraamp)
 def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     """
     A convenience wrapper around the Numba JIT-compiled parallelized

From eff9ca4aaf175455c8f3ba90f82c323ee9b4a4e3 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 18:46:29 -0600
Subject: [PATCH 394/416] Add preprocessing function in prescraamp

---
 stumpy/scraamp.py | 127 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 110 insertions(+), 17 deletions(-)

diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py
index 143dc233e..b9fdc72e7 100644
--- a/stumpy/scraamp.py
+++ b/stumpy/scraamp.py
@@ -14,6 +14,76 @@
 logger = logging.getLogger(__name__)
 
 
+def _preprocess_prescraamp(T_A, m, T_B=None, s=None):
+    """
+    Performs several preprocessings and returns outputs that are needed for the
+    non-normalized preSCRIMP algorithm.
+
+    Parameters
+    ----------
+    T_A : numpy.ndarray
+        The time series or sequence for which to compute the matrix profile
+
+    m : int
+        Window size
+
+    T_B : numpy.ndarray, default None
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
+
+    s : int, default None
+        The sampling interval that defaults to
+        `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
+
+    Returns
+    -------
+    T_A : numpy.ndarray
+        A copy of the time series input `T_A`, where all NaN and inf values
+        are replaced with zero.
+
+    T_B : numpy.ndarray
+        A copy of the time series input `T_B`, where all NaN and inf values
+        are replaced with zero. If the input `T_B` is not provided (default),
+        this array is just a copy of `T_A`.
+
+    T_A_subseq_isfinite : numpy.ndarray
+        A boolean array that indicates whether a subsequence in `T_A` contains a
+        `np.nan`/`np.inf` value (False)
+
+    T_B_subseq_isfinite : numpy.ndarray
+        A boolean array that indicates whether a subsequence in `T_B` contains a
+        `np.nan`/`np.inf` value (False)
+
+    indices : numpy.ndarray
+        The subsequence indices to compute `prescrump` for
+
+    s : int
+        The sampling interval that defaults to
+        `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
+
+    excl_zone : int
+        The half width for the exclusion zone
+    """
+    if T_B is None:
+        T_B = T_A
+        excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+    else:
+        excl_zone = None
+
+    T_A, T_A_subseq_isfinite = core.preprocess_non_normalized(T_A, m)
+    T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m)
+
+    n_A = T_A.shape[0]
+    l = n_A - m + 1
+
+    if s is None:  # pragma: no cover
+        s = excl_zone
+
+    indices = np.random.permutation(range(0, l, s)).astype(np.int64)
+
+    return (T_A, T_B, T_A_subseq_isfinite, T_B_subseq_isfinite, indices, s, excl_zone)
+
+
 @njit(fastmath=True)
 def _compute_PI(
     T_A,
@@ -318,22 +388,16 @@ def prescraamp(T_A, m, T_B=None, s=None, p=2.0, k=1):
 
     See Algorithm 2
     """
-    if T_B is None:
-        T_B = T_A
-        excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-    else:
-        excl_zone = None
-
-    T_A, T_A_subseq_isfinite = core.preprocess_non_normalized(T_A, m)
-    T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m)
-
-    n_A = T_A.shape[0]
-    l = n_A - m + 1
-
-    if s is None:  # pragma: no cover
-        s = excl_zone
+    (
+        T_A,
+        T_B,
+        T_A_subseq_isfinite,
+        T_B_subseq_isfinite,
+        indices,
+        s,
+        excl_zone,
+    ) = _preprocess_prescraamp(T_A, m, T_B=T_B, s=s)
 
-    indices = np.random.permutation(range(0, l, s)).astype(np.int64)
     P, I = _prescraamp(
         T_A,
         T_B,
@@ -532,9 +596,38 @@ def __init__(
 
         if pre_scraamp:
             if self._ignore_trivial:
-                P, I = prescraamp(T_A, m, s=s, p=p)
+                (
+                    T_A,
+                    T_B,
+                    T_A_subseq_isfinite,
+                    T_B_subseq_isfinite,
+                    indices,
+                    s,
+                    excl_zone,
+                ) = _preprocess_prescraamp(T_A, m, s=s)
             else:
-                P, I = prescraamp(T_A, m, T_B=T_B, s=s, p=p)
+                (
+                    T_A,
+                    T_B,
+                    T_A_subseq_isfinite,
+                    T_B_subseq_isfinite,
+                    indices,
+                    s,
+                    excl_zone,
+                ) = _preprocess_prescraamp(T_A, m, T_B=T_B, s=s)
+
+            P, I = _prescraamp(
+                T_A,
+                T_B,
+                m,
+                T_A_subseq_isfinite,
+                T_B_subseq_isfinite,
+                p,
+                indices,
+                s,
+                excl_zone,
+            )
+
             for i in range(P.shape[0]):
                 if self._P[i, 0] > P[i]:
                     self._P[i, 0] = P[i]

From 666b93e0638a247c878f271ab347e839ee2e0cfb Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 18:56:16 -0600
Subject: [PATCH 395/416] Revise naive function

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 2e7b9d160..b2be9f86d 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1828,7 +1828,7 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
 
 def merge_topk_PI(PA, PB, IA, IB):
     if PA.ndim == 1:
-        mask = PB < PA
+        mask = (PB < PA) & (IB != IA)
         PA[mask] = PB[mask]
         IA[mask] = IB[mask]
         return
@@ -1870,7 +1870,7 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second
     # half of this array, and discard the first half.
     if ρA.ndim == 1:
-        mask = ρB > ρA
+        mask = (ρB > ρA) & (IB != IA)
         ρA[mask] = ρB[mask]
         IA[mask] = IB[mask]
         return

From eee6d75db6bbba55b90acc3803ab529983da7f60 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 18:59:17 -0600
Subject: [PATCH 396/416] Fix value of imprecision in test functions

---
 tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 1765c048d..52d00f706 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1108,7 +1108,7 @@ def test_merge_topk_PI_with_overlap():
         for i in range(n):
             # create overlaps
             col_IDX = np.random.choice(np.arange(k), num_overlaps[i], replace=False)
-            imprecision = np.random.uniform(low=-1e6, high=1e6, size=len(col_IDX))
+            imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=len(col_IDX))
             PB[i, col_IDX] = PA[i, col_IDX] + imprecision
             IB[i, col_IDX] = IA[i, col_IDX]
 
@@ -1202,7 +1202,7 @@ def test_merge_topk_ρI_with_overlap():
         for i in range(n):
             # create overlaps
             col_IDX = np.random.choice(np.arange(k), num_overlaps[i], replace=False)
-            imprecision = np.random.uniform(low=-1e6, high=1e6, size=len(col_IDX))
+            imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=len(col_IDX))
             ρB[i, col_IDX] = ρA[i, col_IDX] + imprecision
             IB[i, col_IDX] = IA[i, col_IDX]
 

From 27d229b9f325cb661d618b93e0146dc53044d8ed Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 31 Aug 2022 19:13:05 -0600
Subject: [PATCH 397/416] create overlaps randomly for test merge_topk in 1D
 case

---
 tests/test_core.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 52d00f706..693920973 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1135,6 +1135,7 @@ def test_merge_topk_PI_with_overlap():
 
 
 def test_merge_topk_PI_with_1D_input():
+    # including some overlaps randomly
     n = 50
     PA = np.random.rand(n)
     PB = np.random.rand(n)
@@ -1142,6 +1143,12 @@ def test_merge_topk_PI_with_1D_input():
     IA = np.arange(n)
     IB = IA + n
 
+    n_overlaps = np.random.randint(1, n + 1)
+    IDX_rows_with_overlaps = np.random.choice(np.arange(n), n_overlaps, replace=False)
+    imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=n_overlaps)
+    PB[IDX_rows_with_overlaps] = PA[IDX_rows_with_overlaps] + imprecision
+    IB[IDX_rows_with_overlaps] = IA[IDX_rows_with_overlaps]
+
     ref_P = PA.copy()
     ref_I = IA.copy()
 
@@ -1229,6 +1236,7 @@ def test_merge_topk_ρI_with_overlap():
 
 
 def test_merge_topk_ρI_with_1D_input():
+    # including some overlaps randomly
     n = 50
     ρA = np.random.rand(n)
     ρB = np.random.rand(n)
@@ -1242,6 +1250,12 @@ def test_merge_topk_ρI_with_1D_input():
     comp_ρ = ρA.copy()
     comp_I = IA.copy()
 
+    n_overlaps = np.random.randint(1, n + 1)
+    IDX_rows_with_overlaps = np.random.choice(np.arange(n), n_overlaps, replace=False)
+    imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=n_overlaps)
+    ρB[IDX_rows_with_overlaps] = ρA[IDX_rows_with_overlaps] + imprecision
+    IB[IDX_rows_with_overlaps] = IA[IDX_rows_with_overlaps]
+
     naive.merge_topk_PI(ref_ρ, ρB.copy(), ref_I, IB.copy())
     core._merge_topk_PI(comp_ρ, ρB.copy(), comp_I, IB.copy())
 

From 03f19d8ef2bf4cceee7c770df036a47d27818cb1 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sat, 3 Sep 2022 13:51:47 -0600
Subject: [PATCH 398/416] Revise docstrings

---
 stumpy/core.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 6e3046f9f..6cc90ed1e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2579,8 +2579,13 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place).
-    When the inputs are 1D arrays, PA[i] is updated if it is less than PB[i]. In
-    such case, PA[i] and IA[i] are replaced with PB[i] and IB[i], respectively.
+    When the inputs are 1D arrays, PA[i] is updated if it is less than PB[i] and
+    IA[i] != IB[i]. In such case, PA[i] and IA[i] are replaced with PB[i] and IB[i],
+    respectively. (Note that it might happen that IA[i]=IB[i] but PA[i] != PB[i].
+    This situation can occur if there is slight imprecision in numerical calculations.
+    In that case, we do not update PA[i] and IA[i]. While updating PA[i] and IA[i]
+    is harmless in this case, we avoid doing that so to be consistent with the merging
+    process when the inputs are 2D arrays)
     When the inputs are 2D arrays, always prioritizing the values of `PA` over the
     values of `PB` in case of ties. (i.e., values from `PB` are always inserted to
     the right of values from `PA`). Also, update `IA` accordingly. In case of
@@ -2595,7 +2600,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
     ----------
     PA : numpy.ndarray
         A (top-k) matrix profile where values in each row are sorted in ascending
-        order. `PA` must be 2-dimensional.
+        order. `PA` must be 1- or 2-dimensional.
 
     PB : numpy.ndarray
         A (top-k) matrix profile where values in each row are sorted in ascending
@@ -2647,8 +2652,13 @@ def _merge_topk_PI(PA, PB, IA, IB):
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
     Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place).
-    When the inputs are 1D arrays, ρA[i] is updated if it is more than ρB[i]. In
-    such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i], respectively.
+    When the inputs are 1D arrays, ρA[i] is updated if it is more than ρB[i] and
+    IA[i] != IB[i]. In such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i],
+    respectively. (Note that it might happen that IA[i]=IB[i] but ρA[i] != ρB[i].
+    This situation can occur if there is slight imprecision in numerical calculations.
+    In that case, we do not update ρA[i] and IA[i]. While updating ρA[i] and IA[i]
+    is harmless in this case, we avoid doing that so to be consistent with the merging
+    process when the inputs are 2D arrays)
     When the inputs are 2D arrays, always prioritizing the values of `ρA` over
     the values of `ρB` in case of ties. (i.e., values from `ρB` are always inserted
     to the left of values from `ρA`). Also, update `IA` accordingly. In case of
@@ -2663,7 +2673,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     ----------
     ρA : numpy.ndarray
         A (top-k) pearson profile where values in each row are sorted in ascending
-        order. `ρA` must be 2-dimensional.
+        order. `ρA` must be 1- or 2-dimensional.
 
     ρB : numpy.ndarray
         A (top-k) pearson profile, where values in each row are sorted in ascending

From d35de3e439c972a4746d2df4895bb4e5c7e3d78c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 14 Sep 2022 10:05:36 -0600
Subject: [PATCH 399/416] Fix docstrings

---
 stumpy/core.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 6cc90ed1e..3c8a9895e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2579,19 +2579,19 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place).
-    When the inputs are 1D arrays, PA[i] is updated if it is less than PB[i] and
+    When the inputs are 1D arrays, PA[i] is updated if it is greater than PB[i] and
     IA[i] != IB[i]. In such case, PA[i] and IA[i] are replaced with PB[i] and IB[i],
     respectively. (Note that it might happen that IA[i]=IB[i] but PA[i] != PB[i].
     This situation can occur if there is slight imprecision in numerical calculations.
     In that case, we do not update PA[i] and IA[i]. While updating PA[i] and IA[i]
     is harmless in this case, we avoid doing that so to be consistent with the merging
     process when the inputs are 2D arrays)
-    When the inputs are 2D arrays, always prioritizing the values of `PA` over the
+    When the inputs are 2D arrays, we always prioritize the values of `PA` over the
     values of `PB` in case of ties. (i.e., values from `PB` are always inserted to
     the right of values from `PA`). Also, update `IA` accordingly. In case of
     overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and
-    their corresponding values in PB[i]) are ignored throughout the updating process o
-    f IA[i] (and PA[i]).
+    their corresponding values in PB[i]) are ignored throughout the updating process
+    of IA[i] (and PA[i]).
 
     Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function
     keeps `top-k` smallest values.
@@ -2652,14 +2652,14 @@ def _merge_topk_PI(PA, PB, IA, IB):
 def _merge_topk_ρI(ρA, ρB, IA, IB):
     """
     Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place).
-    When the inputs are 1D arrays, ρA[i] is updated if it is more than ρB[i] and
+    When the inputs are 1D arrays, ρA[i] is updated if it is less than ρB[i] and
     IA[i] != IB[i]. In such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i],
     respectively. (Note that it might happen that IA[i]=IB[i] but ρA[i] != ρB[i].
     This situation can occur if there is slight imprecision in numerical calculations.
     In that case, we do not update ρA[i] and IA[i]. While updating ρA[i] and IA[i]
     is harmless in this case, we avoid doing that so to be consistent with the merging
     process when the inputs are 2D arrays)
-    When the inputs are 2D arrays, always prioritizing the values of `ρA` over
+    When the inputs are 2D arrays, we always prioritize the values of `ρA` over
     the values of `ρB` in case of ties. (i.e., values from `ρB` are always inserted
     to the left of values from `ρA`). Also, update `IA` accordingly. In case of
     overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and

From 0c80852759d05965a25989810ae8b9e032955af1 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Wed, 12 Oct 2022 23:52:44 -0600
Subject: [PATCH 400/416] minor changes

---
 tests/naive.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index b2be9f86d..9e21d7f1a 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1828,9 +1828,10 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
 
 def merge_topk_PI(PA, PB, IA, IB):
     if PA.ndim == 1:
-        mask = (PB < PA) & (IB != IA)
-        PA[mask] = PB[mask]
-        IA[mask] = IB[mask]
+        for i in range(PA.shape[0]):
+            if PB[i] < PA[i] and IB[i] != IA[i]:
+                PA[i] = PB[i]
+                IA[i] = IB[i]
         return
 
     k = PA.shape[1]
@@ -1870,9 +1871,10 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
     # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second
     # half of this array, and discard the first half.
     if ρA.ndim == 1:
-        mask = (ρB > ρA) & (IB != IA)
-        ρA[mask] = ρB[mask]
-        IA[mask] = IB[mask]
+        for i in range(ρA.shape[0]):
+            if ρB[i] > ρA[i] and IB[i] != IA[i]:
+                ρA[i] = ρB[i]
+                IA[i] = IB[i]
         return
 
     k = ρA.shape[1]

From 2e3af6a51135c5c11e7087d3307c79d8428eeed3 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Thu, 13 Oct 2022 00:08:02 -0600
Subject: [PATCH 401/416] minor fix

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 9e21d7f1a..05f101222 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -720,12 +720,12 @@ def __init__(self, T, m, excl_zone=None, p=2.0):
         self._m = m
         self._p = p
 
-        if excl_zone is None:  # apply similar changes in naive `class stumpi_egress`
-            excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
         self._excl_zone = excl_zone
+        if self._excl_zone is None:
+            self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
 
         self._l = self._T.shape[0] - m + 1
-        mp = aamp(T, m, p=p)
+        mp = aamp(T, m, exclusion_zone=self._excl_zone, p=p)
         self.P_ = mp[:, 0]
         self.I_ = mp[:, 1].astype(np.int64)
         self.left_P_ = np.full(self.P_.shape, np.inf)

From a6460340749dc3cc990ddec19b1b4e9b9bb28d5e Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Fri, 14 Oct 2022 23:09:06 -0600
Subject: [PATCH 402/416] change variable name

---
 tests/naive.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 05f101222..96dacba82 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -807,10 +807,12 @@ def __init__(self, T, m, excl_zone=None, k=1):
         self.left_I_ = mp[:, 2 * k].astype(np.int64)
         self.left_P_ = np.full_like(self.left_I_, np.inf, dtype=np.float64)
 
-        for i, nn_i in enumerate(self.left_I_):
-            if nn_i >= 0:
-                D = core.mass(self._T[i : i + self._m], self._T[nn_i : nn_i + self._m])
-                self.left_P_[i] = D[0]
+        for idx, nn_idx in enumerate(self.left_I_):
+            if nn_idx >= 0:
+                D = core.mass(
+                    self._T[idx : idx + self._m], self._T[nn_idx : nn_idx + self._m]
+                )
+                self.left_P_[idx] = D[0]
 
         self._n_appended = 0
 

From d6a0a3d00470767cd63dbb033360ef6975f3e432 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Fri, 14 Oct 2022 23:36:41 -0600
Subject: [PATCH 403/416] change variables names

---
 tests/test_gpu_stump.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 3f86ab03f..a6d4e6953 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -51,14 +51,14 @@ def test_gpu_stump_int_input():
 
 
 @cuda.jit("(f8[:, :], f8[:], i8[:], i8, b1, i8[:])")
-def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
+def _gpu_searchsorted_kernel(a, v, bfs, nlevel, is_left, idx):
     # A wrapper kernel for calling device function _gpu_searchsorted_left/right.
     i = cuda.grid(1)
-    if i < A.shape[0]:
+    if i < a.shape[0]:
         if is_left:
-            IDX[i] = _gpu_searchsorted_left(A[i], V[i], bfs, nlevel)
+            idx[i] = _gpu_searchsorted_left(a[i], v[i], bfs, nlevel)
         else:
-            IDX[i] = _gpu_searchsorted_right(A[i], V[i], bfs, nlevel)
+            idx[i] = _gpu_searchsorted_right(a[i], v[i], bfs, nlevel)
 
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)

From 6ae95ec5d64955640d7c15f62afef023b66c80a6 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Fri, 14 Oct 2022 23:53:54 -0600
Subject: [PATCH 404/416] convert attr to property attr to get 1D when k is 1

---
 tests/naive.py | 71 ++++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 96dacba82..8774f4b46 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -801,30 +801,22 @@ def __init__(self, T, m, excl_zone=None, k=1):
 
         self._l = self._T.shape[0] - m + 1
         mp = stump(T, m, exclusion_zone=self._excl_zone, k=self._k)
-        self.P_ = mp[:, :k].astype(np.float64)
-        self.I_ = mp[:, k : 2 * k].astype(np.int64)
+        self._P = mp[:, :k].astype(np.float64)
+        self._I = mp[:, k : 2 * k].astype(np.int64)
 
-        self.left_I_ = mp[:, 2 * k].astype(np.int64)
-        self.left_P_ = np.full_like(self.left_I_, np.inf, dtype=np.float64)
+        self._left_I = mp[:, 2 * k].astype(np.int64)
+        self._left_P = np.full_like(self._left_I, np.inf, dtype=np.float64)
 
-        for idx, nn_idx in enumerate(self.left_I_):
+        for idx, nn_idx in enumerate(self._left_I):
             if nn_idx >= 0:
                 D = core.mass(
                     self._T[idx : idx + self._m], self._T[nn_idx : nn_idx + self._m]
                 )
-                self.left_P_[idx] = D[0]
+                self._left_P[idx] = D[0]
 
         self._n_appended = 0
 
-        if self._k == 1:
-            self.P_ = self.P_.flatten()
-            self.I_ = self.I_.flatten()
-
     def update(self, t):
-        # ensure than self.P_ and self.I_ are 2D
-        self.P_ = self.P_.reshape(-1, self._k)
-        self.I_ = self.I_.reshape(-1, self._k)
-
         self._T[:] = np.roll(self._T, -1)
         self._T_isfinite[:] = np.roll(self._T_isfinite, -1)
         if np.isfinite(t):
@@ -835,10 +827,10 @@ def update(self, t):
             self._T[-1] = 0
         self._n_appended += 1
 
-        self.P_ = np.roll(self.P_, -1, axis=0)
-        self.I_ = np.roll(self.I_, -1, axis=0)
-        self.left_P_[:] = np.roll(self.left_P_, -1)
-        self.left_I_[:] = np.roll(self.left_I_, -1)
+        self._P = np.roll(self._P, -1, axis=0)
+        self._I = np.roll(self._I, -1, axis=0)
+        self._left_P[:] = np.roll(self._left_P, -1)
+        self._left_I[:] = np.roll(self._left_I, -1)
 
         D = core.mass(self._T[-self._m :], self._T)
         T_subseq_isfinite = np.all(
@@ -851,28 +843,45 @@ def update(self, t):
         apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
         # update top-k matrix profile using newly calculated distance profile `D`
         for j in range(D.shape[0]):
-            if D[j] < self.P_[j, -1]:
-                pos = np.searchsorted(self.P_[j], D[j], side="right")
-                self.P_[j] = np.insert(self.P_[j], pos, D[j])[:-1]
-                self.I_[j] = np.insert(
-                    self.I_[j], pos, D.shape[0] - 1 + self._n_appended
+            if D[j] < self._P[j, -1]:
+                pos = np.searchsorted(self._P[j], D[j], side="right")
+                self._P[j] = np.insert(self._P[j], pos, D[j])[:-1]
+                self._I[j] = np.insert(
+                    self._I[j], pos, D.shape[0] - 1 + self._n_appended
                 )[:-1]
 
         # update top-k for the last, newly-updated index
         I_last_topk = np.argsort(D, kind="mergesort")[: self._k]
-        self.P_[-1] = D[I_last_topk]
-        self.I_[-1] = I_last_topk + self._n_appended
-        self.I_[-1][self.P_[-1] == np.inf] = -1
+        self._P[-1] = D[I_last_topk]
+        self._I[-1] = I_last_topk + self._n_appended
+        self._I[-1][self._P[-1] == np.inf] = -1
 
         # for the last index, the left matrix profile value is self.P_[-1, 0]
         # and the same goes for the left matrix profile index
-        self.left_P_[-1] = self.P_[-1, 0]
-        self.left_I_[-1] = self.I_[-1, 0]
+        self._left_P[-1] = self._P[-1, 0]
+        self._left_I[-1] = self._I[-1, 0]
 
-        # post-processing: ensure that self.P_ and self.I_ are 1D.
+    @property
+    def P_(self):
         if self._k == 1:
-            self.P_ = self.P_.flatten()
-            self.I_ = self.I_.flatten()
+            return self._P.flatten().astype(np.float64)
+        else:
+            return self._P.astype(np.float64)
+
+    @property
+    def I_(self):
+        if self._k == 1:
+            return self._I.flatten().astype(np.int64)
+        else:
+            return self._I.astype(np.int64)
+
+    @property
+    def left_P_(self):
+        return self._left_P.astype(np.float64)
+
+    @property
+    def left_I_(self):
+        return self._left_I.astype(np.int64)
 
 
 def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m):

From 73ebe404def40c4566edf29f03edf58e31c61590 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sat, 15 Oct 2022 00:03:51 -0600
Subject: [PATCH 405/416] avoid calling performant function in a naive function

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 8774f4b46..aa7d27315 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -809,8 +809,8 @@ def __init__(self, T, m, excl_zone=None, k=1):
 
         for idx, nn_idx in enumerate(self._left_I):
             if nn_idx >= 0:
-                D = core.mass(
-                    self._T[idx : idx + self._m], self._T[nn_idx : nn_idx + self._m]
+                D = distance_profile(
+                    self._T[idx : idx + self._m], self._T[nn_idx : nn_idx + self._m], m
                 )
                 self._left_P[idx] = D[0]
 

From 4719e2f9c366ba62e5033ac8198cd0abdc7df2aa Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sat, 15 Oct 2022 00:14:37 -0600
Subject: [PATCH 406/416] minor modification on z_norm functions

---
 stumpy/core.py | 8 ++++++--
 tests/naive.py | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index fa8fc4737..1a02316fc 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -254,7 +254,7 @@ def rolling_window(a, window):
     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
 
 
-def z_norm(a, axis=0):
+def z_norm(a, axis=0, threshold=config.STUMPY_STDDEV_THRESHOLD):
     """
     Calculate the z-normalized input array `a` by subtracting the mean and
     dividing by the standard deviation along a given axis.
@@ -267,13 +267,17 @@ def z_norm(a, axis=0):
     axis : int, default 0
         NumPy array axis
 
+    threshold : float, default to config.STUMPY_STDDEV_THRESHOLD
+        A non-nan std value being less than `threshold` will be replaced with 1.0
+
     Returns
     -------
     output : numpy.ndarray
         An array with z-normalized values computed along a specified axis.
     """
     std = np.std(a, axis, keepdims=True)
-    std[std == 0] = 1
+    mask = ~np.isnan(std) & std < config.STUMPY_STDDEV_THRESHOLD
+    std[mask] = 1.0
 
     return (a - np.mean(a, axis, keepdims=True)) / std
 
diff --git a/tests/naive.py b/tests/naive.py
index aa7d27315..c02f79e25 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -6,7 +6,7 @@
 from stumpy import core, config
 
 
-def z_norm(a, axis=0, threshold=1e-7):
+def z_norm(a, axis=0, threshold=config.STUMPY_STDDEV_THRESHOLD):
     std = np.std(a, axis, keepdims=True)
     std[np.less(std, threshold, where=~np.isnan(std))] = 1.0
 

From 63b28289d964ef9496ba4783d5dee79bf918bb04 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Sat, 15 Oct 2022 00:27:05 -0600
Subject: [PATCH 407/416] fix function

---
 stumpy/core.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 0416cb510..f29d45500 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -276,8 +276,7 @@ def z_norm(a, axis=0, threshold=config.STUMPY_STDDEV_THRESHOLD):
         An array with z-normalized values computed along a specified axis.
     """
     std = np.std(a, axis, keepdims=True)
-    mask = ~np.isnan(std) & std < config.STUMPY_STDDEV_THRESHOLD
-    std[mask] = 1.0
+    std[np.less(std, threshold, where=~np.isnan(std))] = 1.0
 
     return (a - np.mean(a, axis, keepdims=True)) / std
 

From d1f3119ce5e5cf1012c0fdb6a66e57106bb14b4e Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Mon, 17 Oct 2022 21:33:11 -0600
Subject: [PATCH 408/416] revise docstrings

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f29d45500..ef99a0746 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2582,7 +2582,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
     Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place).
     When the inputs are 1D arrays, PA[i] is updated if it is greater than PB[i] and
     IA[i] != IB[i]. In such case, PA[i] and IA[i] are replaced with PB[i] and IB[i],
-    respectively. (Note that it might happen that IA[i]=IB[i] but PA[i] != PB[i].
+    respectively. (Note that it might happen that IA[i]==IB[i] but PA[i] != PB[i].
     This situation can occur if there is slight imprecision in numerical calculations.
     In that case, we do not update PA[i] and IA[i]. While updating PA[i] and IA[i]
     is harmless in this case, we avoid doing that so to be consistent with the merging
@@ -2655,7 +2655,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB):
     Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place).
     When the inputs are 1D arrays, ρA[i] is updated if it is less than ρB[i] and
     IA[i] != IB[i]. In such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i],
-    respectively. (Note that it might happen that IA[i]=IB[i] but ρA[i] != ρB[i].
+    respectively. (Note that it might happen that IA[i]==IB[i] but ρA[i] != ρB[i].
     This situation can occur if there is slight imprecision in numerical calculations.
     In that case, we do not update ρA[i] and IA[i]. While updating ρA[i] and IA[i]
     is harmless in this case, we avoid doing that so to be consistent with the merging

From 4a94c0ed0ea05a0daac7877d41ab1b3a51fc7e02 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Mon, 17 Oct 2022 21:46:01 -0600
Subject: [PATCH 409/416] change variable name

---
 stumpy/scrump.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 52b9a99ab..0482ecbec 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -191,26 +191,26 @@ def _compute_PI(
         if excl_zone is not None:
             core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf)
 
-        nn_idx = np.argmin(squared_distance_profile)
+        nn_i = np.argmin(squared_distance_profile)
         if (
-            squared_distance_profile[nn_idx] < P_squared[thread_idx, i, -1]
-            and nn_idx not in I[thread_idx, i]
+            squared_distance_profile[nn_i] < P_squared[thread_idx, i, -1]
+            and nn_i not in I[thread_idx, i]
         ):
             idx = np.searchsorted(
                 P_squared[thread_idx, i],
-                squared_distance_profile[nn_idx],
+                squared_distance_profile[nn_i],
                 side="right",
             )
             core._shift_insert_at_index(
-                P_squared[thread_idx, i], idx, squared_distance_profile[nn_idx]
+                P_squared[thread_idx, i], idx, squared_distance_profile[nn_i]
             )
-            core._shift_insert_at_index(I[thread_idx, i], idx, nn_idx)
+            core._shift_insert_at_index(I[thread_idx, i], idx, nn_i)
 
         if P_squared[thread_idx, i, 0] == np.inf:  # pragma: no cover
             I[thread_idx, i, 0] = -1
             continue
 
-        j = nn_idx
+        j = nn_i
         # Given the squared distance, work backwards and compute QT
         QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + (
             m * M_T[j] * μ_Q[i]

From 34361f7935023cad5ba7c4c9cd71d5841405f3a8 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Mon, 17 Oct 2022 21:51:01 -0600
Subject: [PATCH 410/416] Relocate comment

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index c02f79e25..e123f37ff 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1463,11 +1463,11 @@ def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1):
         j = nn_idx
         for g in range(1, min(s, l - i, w - j)):
             d = dist_matrix[i + g, j + g]
+            # Do NOT optimize the `condition` in the following if statement
+            # and similar ones in this naive function. This is to ensure
+            # we are avoiding duplicates in each row of I.
             if d < P[i + g, -1] and (j + g) not in I[i + g]:
                 pos = np.searchsorted(P[i + g], d, side="right")
-                # Do NOT optimize the `condition` in the following if statement
-                # and similar ones in this naive function. This is to ensure
-                # we are avoiding duplicates in each row of I.
                 P[i + g] = np.insert(P[i + g], pos, d)[:-1]
                 I[i + g] = np.insert(I[i + g], pos, j + g)[:-1]
             if (

From 8d0258a71d56dcd1689109da11db010326b6db78 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Mon, 17 Oct 2022 22:08:31 -0600
Subject: [PATCH 411/416] minor changes

---
 tests/naive.py | 66 ++++++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index e123f37ff..a58069bba 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1845,25 +1845,30 @@ def merge_topk_PI(PA, PB, IA, IB):
                 IA[i] = IB[i]
         return
 
-    k = PA.shape[1]
-    for i in range(PA.shape[0]):
-        _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
-        PB[i, overlap_idx_B] = np.inf
-        IB[i, overlap_idx_B] = -1
+    else:
+        k = PA.shape[1]
+        for i in range(PA.shape[0]):
+            _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
+            PB[i, overlap_idx_B] = np.inf
+            IB[i, overlap_idx_B] = -1
 
-    profile = np.column_stack((PA, PB))
-    indices = np.column_stack((IA, IB))
-    IDX = np.argsort(profile, axis=1, kind="mergesort")
-    profile[:, :] = np.take_along_axis(profile, IDX, axis=1)
-    indices[:, :] = np.take_along_axis(indices, IDX, axis=1)
+        profile = np.column_stack((PA, PB))
+        indices = np.column_stack((IA, IB))
+        IDX = np.argsort(profile, axis=1, kind="mergesort")
+        profile[:, :] = np.take_along_axis(profile, IDX, axis=1)
+        indices[:, :] = np.take_along_axis(indices, IDX, axis=1)
 
-    PA[:, :] = profile[:, :k]
-    IA[:, :] = indices[:, :k]
+        PA[:, :] = profile[:, :k]
+        IA[:, :] = indices[:, :k]
+
+        return
 
 
 def merge_topk_ρI(ρA, ρB, IA, IB):
-    # this is to merge two pearson profiles `ρA` and `ρB`, where each is a 2D array
-    # and each row is sorted ascendingly. we want to keep top-k largest values in
+    # This function merges two pearson profiles `ρA` and `ρB`, and updates `ρA`
+    # and `IA` accordingly. When the inputs are 1D, `ρA[i]` is updated if
+    #  `ρA[i] < ρB[i]` and IA[i] != IB[i]. When the inputs are 2D, each row in
+    #  `ρA` and `ρB` is sorted ascendingly. we want to keep top-k largest values in
     # merging row `ρA[i]` and `ρB[i]`.
 
     # In case of ties between `ρA` and `ρB`, the priority is with `ρA`. In case
@@ -1879,8 +1884,8 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
 
     # For the same example:
     # merging `ρB` and `ρA` ascendingly while choosing `ρB` over `ρA` in case of
-    # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and we just need to keep the second
-    # half of this array, and discard the first half.
+    # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and the second half of this array
+    # is the desribale outcome.
     if ρA.ndim == 1:
         for i in range(ρA.shape[0]):
             if ρB[i] > ρA[i] and IB[i] != IA[i]:
@@ -1888,22 +1893,25 @@ def merge_topk_ρI(ρA, ρB, IA, IB):
                 IA[i] = IB[i]
         return
 
-    k = ρA.shape[1]
-    for i in range(ρA.shape[0]):
-        _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
-        ρB[i, overlap_idx_B] = np.NINF
-        IB[i, overlap_idx_B] = -1
+    else:
+        k = ρA.shape[1]
+        for i in range(ρA.shape[0]):
+            _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True)
+            ρB[i, overlap_idx_B] = np.NINF
+            IB[i, overlap_idx_B] = -1
 
-    profile = np.column_stack((ρB, ρA))
-    indices = np.column_stack((IB, IA))
+        profile = np.column_stack((ρB, ρA))
+        indices = np.column_stack((IB, IA))
 
-    idx = np.argsort(profile, axis=1, kind="mergesort")
-    profile[:, :] = np.take_along_axis(profile, idx, axis=1)
-    indices[:, :] = np.take_along_axis(indices, idx, axis=1)
+        idx = np.argsort(profile, axis=1, kind="mergesort")
+        profile[:, :] = np.take_along_axis(profile, idx, axis=1)
+        indices[:, :] = np.take_along_axis(indices, idx, axis=1)
 
-    # keep the last k elements (top-k largest values)
-    ρA[:, :] = profile[:, k:]
-    IA[:, :] = indices[:, k:]
+        # keep the last k elements (top-k largest values)
+        ρA[:, :] = profile[:, k:]
+        IA[:, :] = indices[:, k:]
+
+        return
 
 
 def find_matches(D, excl_zone, max_distance, max_matches=None):

From abb45181c5f2e95acf50016339c8da425e6449c1 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 8 Nov 2022 01:18:00 -0700
Subject: [PATCH 412/416] fix uint

---
 stumpy/stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index aeac32f3f..49657370a 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -222,7 +222,7 @@ def _compute_diagonal(
                 # when the newly-calculated `pearson` value becomes greater than the
                 # first (i.e. smallest) element in this array. Note that a higher
                 # pearson value corresponds to a lower distance.
-                if pearson > ρ[thread_idx, i, 0]:
+                if pearson > ρ[thread_idx, uint64_i, 0]:
                     idx = np.searchsorted(ρ[thread_idx, uint64_i], pearson)
                     core._shift_insert_at_index(
                         ρ[thread_idx, uint64_i], idx, pearson, shift="left"

From 329889eb1f7e9172eafa3f3f7b6c25090f0ccbea Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 8 Nov 2022 06:59:38 -0700
Subject: [PATCH 413/416] fixed uint

---
 stumpy/stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index aeac32f3f..49657370a 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -222,7 +222,7 @@ def _compute_diagonal(
                 # when the newly-calculated `pearson` value becomes greater than the
                 # first (i.e. smallest) element in this array. Note that a higher
                 # pearson value corresponds to a lower distance.
-                if pearson > ρ[thread_idx, i, 0]:
+                if pearson > ρ[thread_idx, uint64_i, 0]:
                     idx = np.searchsorted(ρ[thread_idx, uint64_i], pearson)
                     core._shift_insert_at_index(
                         ρ[thread_idx, uint64_i], idx, pearson, shift="left"

From c0e9f74cb6c05aac8cadc54b05c52fa8d3374c48 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 8 Nov 2022 21:34:20 -0700
Subject: [PATCH 414/416] fixed test function

---
 tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 693920973..9130c11b3 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1256,8 +1256,8 @@ def test_merge_topk_ρI_with_1D_input():
     ρB[IDX_rows_with_overlaps] = ρA[IDX_rows_with_overlaps] + imprecision
     IB[IDX_rows_with_overlaps] = IA[IDX_rows_with_overlaps]
 
-    naive.merge_topk_PI(ref_ρ, ρB.copy(), ref_I, IB.copy())
-    core._merge_topk_PI(comp_ρ, ρB.copy(), comp_I, IB.copy())
+    naive._merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
+    core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy())
 
     npt.assert_almost_equal(ref_ρ, comp_ρ)
     npt.assert_almost_equal(ref_I, comp_I)

From 27c05c35cd3a798a64f72de5caecf3c0982fcc9d Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 8 Nov 2022 21:50:52 -0700
Subject: [PATCH 415/416] fixed calling function

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 9130c11b3..16d2d0fd2 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1256,7 +1256,7 @@ def test_merge_topk_ρI_with_1D_input():
     ρB[IDX_rows_with_overlaps] = ρA[IDX_rows_with_overlaps] + imprecision
     IB[IDX_rows_with_overlaps] = IA[IDX_rows_with_overlaps]
 
-    naive._merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
+    naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy())
     core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy())
 
     npt.assert_almost_equal(ref_ρ, comp_ρ)

From c45b8a4b6fee5a64304b2bf602cfa41e60b4e6e4 Mon Sep 17 00:00:00 2001
From: SolidAhmad <nimasarajpoor@gmail.com>
Date: Tue, 8 Nov 2022 22:25:58 -0700
Subject: [PATCH 416/416] Removed redundant return statement

---
 stumpy/scrump.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 0482ecbec..f874966e2 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -525,8 +525,6 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1):
     else:
         return P, I
 
-    return P, I
-
 
 @core.non_normalized(
     scraamp.scraamp,