Merge pull request #117 from TGSAI/fix/numba_safety

tasansal · web-flow · commit dd1c7cad4309 · 2022-10-25T15:42:24.000-05:00
Fix memory safety bug on IBM to IEEE conversion using Numba on MacOS
diff --git a/src/mdio/segy/ebcdic.py b/src/mdio/segy/ebcdic.py
@@ -68,4 +68,5 @@
         0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,  # 255
     ],
     dtype="uint8",
-)  # fmt: on
+)
+# fmt: on
diff --git a/src/mdio/segy/ibm_float.py b/src/mdio/segy/ibm_float.py
@@ -5,7 +5,6 @@
 
 import numba as nb
 import numpy as np
-from numpy.typing import NDArray
 
 
 # If Numba's JIT compilation is disabled, force vectorized
@@ -38,22 +37,17 @@
 
 
 @nb.njit(
-    parallel=True,
-    boundscheck=False,
-    fastmath=True,
+    "uint32(float32)",
     cache=JIT_CACHE,
     locals={
-        "sign": nb.int8,
-        "exponent": nb.int8,
-        "exp16": nb.int8,
+        "sign": nb.uint32,
+        "exponent": nb.int32,
         "exp_remainder": nb.int8,
         "downshift": nb.int8,
-        "ibm_exponent": nb.int8,
         "ibm_mantissa": nb.int32,
-        "x_basic": nb.uint32[:],
     },
 )
-def ieee2ibm(ieee_array: NDArray[np.float32]):
+def ieee2ibm_single(ieee: np.float32) -> np.uint32:
     """IEEE Float to IBM Float conversion.
 
     Modified from here:
@@ -67,84 +61,43 @@ def ieee2ibm(ieee_array: NDArray[np.float32]):
     Byte swapping is up to user after this function.
 
     Args:
-        ieee_array: Numpy IEEE 32-bit float array.
+        ieee: Numpy IEEE 32-bit float array.
 
     Returns:
         IBM 32-bit float converted array with int32 view.
     """
-    # View the numpy array as int32, so we can do bit manipulations
-    # We will do modifications in place.
-    # Note: this is destructive on the original array
-    ibm_array = ieee_array.view(np.uint32)
-
-    # We parallelize with threads along dim=0, so we need to run an outside
-    # loop separately. The ndenumerate after that handles dim=1 to dim=n, so
-    # it generalizes.
-    for dim0_idx in nb.prange(ibm_array.shape[0]):
-        for dim1n_idx, ieee in np.ndenumerate(ibm_array[dim0_idx]):
-            nd_index = (dim0_idx,) + dim1n_idx
-
-            # Special-case 0.0
-            if ieee in [0, 2147483648]:  # 0.0 or np.float32(-0.0).view('uint32')
-                ibm_array[nd_index] = 0
-                continue
-
-            # Get IEEE's sign and exponent
-            sign = ieee & IEEE32_SIGN
-            exponent = ((ieee & IEEE32_EXPONENT) >> 23) - 127
-
-            # The IBM 7-bit exponent is to the base 16 and the mantissa is presumed to
-            # be entirely to the right of the radix point. In contrast, the IEEE
-            # exponent is to the base 2 and there is an assumed 1-bit to the left of
-            # the radix point.
-            # Note: reusing exponent variable, -> it is actually exp16
-
-            # exp16, exp_remainder
-            exponent, exp_remainder = divmod(exponent + 1, 4)
-            exponent += exp_remainder != 0
-            downshift = 4 - exp_remainder if exp_remainder else 0
-            exponent = exponent + 64
-            # From here down exponent -> ibm_exponent
-            exponent = 0 if exponent < 0 else exponent
-            exponent = 127 if exponent > 127 else exponent
-            exponent = exponent << 24
-            exponent = exponent if ieee else 0
-
-            # Add the implicit initial 1-bit to the 23-bit IEEE mantissa to get the
-            # 24-bit IBM mantissa. Downshift it by the remainder from the exponent's
-            # division by 4. It is allowed to have up to 3 leading 0s.
-            ibm_mantissa = ((ieee & IEEE32_FRACTION) | 0x800000) >> downshift
-            ibm_array[nd_index] = sign | exponent | ibm_mantissa
-
-    return ibm_array
-
-
-@nb.njit("uint32(uint32)", cache=JIT_CACHE)
-def byteswap_uint32_single(value):
-    """Endianness swapping that can be JIT compiled.
-
-    This is faster or on par with the numpy implementation depending
-    on the size of the array.
-
-    We first shift (4, 3, 2, 1) to (3, 4, 1, 2)
-    Then shift (3, 4, 3, 2) to (1, 2, 3, 4)
-
-    Which yields (4, 3, 2, 1) -> (1, 2, 3, 4) or vice-versa.
-
-    Args:
-        value: Value to be byte-swapped.
-
-    Returns:
-        Byte-swapped value in same dtype.
-    """
-    value = np.uint32(value)
-
-    if value == 0:
-        return value
-
-    value = ((value << 8) & BYTEMASK_1_3) | ((value >> 8) & BYTEMASK_2_4)
-    value = np.uint32(value << 16) | np.uint32(value >> 16)
-    return value
+    ieee = np.float32(ieee).view(np.uint32)
+
+    if ieee in [0, 2147483648]:  # 0.0 or np.float32(-0.0).view('uint32')
+        return 0
+
+    # Get IEEE's sign and exponent
+    sign = ieee & IEEE32_SIGN
+    exponent = ((ieee & IEEE32_EXPONENT) >> 23) - 127
+    # The IBM 7-bit exponent is to the base 16 and the mantissa is presumed to
+    # be entirely to the right of the radix point. In contrast, the IEEE
+    # exponent is to the base 2 and there is an assumed 1-bit to the left of
+    # the radix point.
+    # Note: reusing exponent variable, -> it is actually exp16
+
+    # exp16, exp_remainder
+    exponent, exp_remainder = divmod(exponent + 1, 4)
+    exponent += exp_remainder != 0
+    downshift = 4 - exp_remainder if exp_remainder else 0
+    exponent = exponent + 64
+    # From here down exponent -> ibm_exponent
+    exponent = 0 if exponent < 0 else exponent
+    exponent = 127 if exponent > 127 else exponent
+    exponent = exponent << 24
+    exponent = exponent if ieee else 0
+
+    # Add the implicit initial 1-bit to the 23-bit IEEE mantissa to get the
+    # 24-bit IBM mantissa. Downshift it by the remainder from the exponent's
+    # division by 4. It is allowed to have up to 3 leading 0s.
+    ibm_mantissa = ((ieee & IEEE32_FRACTION) | 0x800000) >> downshift
+    ibm = sign | exponent | ibm_mantissa
+
+    return ibm
 
 
 @nb.njit(
@@ -191,13 +144,47 @@ def ibm2ieee_single(ibm: np.uint32) -> np.float32:
     return ieee
 
 
-@nb.vectorize("uint32(uint32)", **JIT_KWARGS)
-def byteswap_uint32(value):  # pragma: no cover
-    """Wrapper for vectorizing byte-swap to arrays."""
-    return byteswap_uint32_single(value)
+@nb.njit("uint32(uint32)", cache=JIT_CACHE)
+def byteswap_uint32_single(value):
+    """Endianness swapping that can be JIT compiled.
+
+    This is faster or on par with the numpy implementation depending
+    on the size of the array.
+
+    We first shift (4, 3, 2, 1) to (3, 4, 1, 2)
+    Then shift (3, 4, 3, 2) to (1, 2, 3, 4)
+
+    Which yields (4, 3, 2, 1) -> (1, 2, 3, 4) or vice-versa.
+
+    Args:
+        value: Value to be byte-swapped.
+
+    Returns:
+        Byte-swapped value in same dtype.
+    """
+    value = np.uint32(value)
+
+    if value == 0:
+        return value
+
+    value = ((value << 8) & BYTEMASK_1_3) | ((value >> 8) & BYTEMASK_2_4)
+    value = np.uint32(value << 16) | np.uint32(value >> 16)
+    return value
+
+
+@nb.vectorize("uint32(float32)", target=JIT_TARGET, **JIT_KWARGS)
+def ieee2ibm(ieee_array: np.float32) -> np.uint32:  # pragma: no cover
+    """Wrapper for vectorizing IEEE to IBM conversion to arrays."""
+    return ieee2ibm_single(ieee_array)
 
 
 @nb.vectorize("float32(uint32)", target=JIT_TARGET, **JIT_KWARGS)
 def ibm2ieee(ibm_array: np.uint32) -> np.float32:  # pragma: no cover
     """Wrapper for vectorizing IBM to IEEE conversion to arrays."""
     return ibm2ieee_single(ibm_array)
+
+
+@nb.vectorize("uint32(uint32)", **JIT_KWARGS)
+def byteswap_uint32(value):  # pragma: no cover
+    """Wrapper for vectorizing byte-swap to arrays."""
+    return byteswap_uint32_single(value)
diff --git a/tests/unit/test_ibm_ieee.py b/tests/unit/test_ibm_ieee.py
@@ -16,31 +16,35 @@
 @pytest.mark.parametrize(
     "ieee, ibm",
     [
-        ([0, -0.0], [0x00000000, 0x00000000]),
-        ([0.1, -0.1], [0x40199999, 0xC0199999]),
-        ([0.5, -0.5], [0x40800000, 0xC0800000]),
-        ([1, -1], [0x41100000, 0xC1100000]),
-        ([3.141593, -3.141593], [0x413243F7, 0xC13243F7]),
-        ([0.15625, -0.15625], [0x40280000, 0xC0280000]),
-        ([118.625, -118.625], [0x4276A000, 0xC276A000]),
-        ([8521603, -8521603], [0x46820783, 0xC6820783]),
-        ([3.4028235e38, -3.4028235e38], [0x60FFFFFF, 0xE0FFFFFF]),
+        (0.0, 0x00000000),
+        (-0.0, 0x00000000),
+        (0.1, 0x40199999),
+        (-1, 0xC1100000),
+        (3.141593, 0x413243F7),
+        (-0.15625, 0xC0280000),
+        (118.625, 0x4276A000),
+        (-8521603, 0xC6820783),
+        (3.4028235e38, 0x60FFFFFF),
+        (-3.4028235e38, 0xE0FFFFFF),
+        ([-0.0, 0.1], [0x00000000, 0x40199999]),
+        ([0.0, 0.1, 3.141593], [0x00000000, 0x40199999, 0x413243F7]),
+        ([[0.0], [0.1], [3.141593]], [[0x00000000], [0x40199999], [0x413243F7]]),
     ],
 )
 class TestIbmIeee:
     """Test conversions, back and forth."""
 
     def test_ieee_to_ibm(self, ieee, ibm):
         """IEEE to IBM conversion."""
-        ieee_fp32 = np.atleast_2d(np.float32(ieee))
-        actual_ibm = np.squeeze(ieee2ibm(ieee_fp32))
-        expected_ibm = np.squeeze(np.atleast_2d(np.uint32(ibm)))
+        ieee_fp32 = np.float32(ieee)
+        actual_ibm = ieee2ibm(ieee_fp32)
+        expected_ibm = np.uint32(ibm)
         np.testing.assert_array_equal(actual_ibm, expected_ibm)
 
     def test_ibm_to_ieee(self, ieee, ibm):
         """IBM to IEEE conversion."""
-        expected_ieee = np.asarray(ieee, dtype="float32")
-        actual_ibm = np.asarray(ibm, dtype="uint32")
+        expected_ieee = np.float32(ieee)
+        actual_ibm = np.uint32(ibm)
 
         # Assert up to 6 decimals (default)
         actual_ieee = ibm2ieee(actual_ibm)
@@ -51,9 +55,8 @@ def test_ibm_to_ieee(self, ieee, ibm):
 def test_ieee_to_ibm_roundtrip(shape: tuple):
     """IEEE to IBM and then back to IEEE conversion."""
     expected_ieee = np.random.randn(*shape).astype("float32")
-    expected_ieee = np.atleast_2d(expected_ieee)
 
-    actual_ibm = ieee2ibm(expected_ieee.copy())
+    actual_ibm = ieee2ibm(expected_ieee)
     actual_ieee = ibm2ieee(actual_ibm)
 
     # Assert up to 6 decimals (default)