Skip to content

Commit dd1c7ca

Browse files
authored
Merge pull request #117 from TGSAI/fix/numba_safety
Fix memory safety bug on IBM to IEEE conversion using Numba on MacOS
2 parents 21f44ad + 8ae4bbf commit dd1c7ca

File tree

3 files changed

+96
-105
lines changed

3 files changed

+96
-105
lines changed

src/mdio/segy/ebcdic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,5 @@
6868
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, # 255
6969
],
7070
dtype="uint8",
71-
) # fmt: on
71+
)
72+
# fmt: on

src/mdio/segy/ibm_float.py

Lines changed: 75 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import numba as nb
77
import numpy as np
8-
from numpy.typing import NDArray
98

109

1110
# If Numba's JIT compilation is disabled, force vectorized
@@ -38,22 +37,17 @@
3837

3938

4039
@nb.njit(
41-
parallel=True,
42-
boundscheck=False,
43-
fastmath=True,
40+
"uint32(float32)",
4441
cache=JIT_CACHE,
4542
locals={
46-
"sign": nb.int8,
47-
"exponent": nb.int8,
48-
"exp16": nb.int8,
43+
"sign": nb.uint32,
44+
"exponent": nb.int32,
4945
"exp_remainder": nb.int8,
5046
"downshift": nb.int8,
51-
"ibm_exponent": nb.int8,
5247
"ibm_mantissa": nb.int32,
53-
"x_basic": nb.uint32[:],
5448
},
5549
)
56-
def ieee2ibm(ieee_array: NDArray[np.float32]):
50+
def ieee2ibm_single(ieee: np.float32) -> np.uint32:
5751
"""IEEE Float to IBM Float conversion.
5852
5953
Modified from here:
@@ -67,84 +61,43 @@ def ieee2ibm(ieee_array: NDArray[np.float32]):
6761
Byte swapping is up to user after this function.
6862
6963
Args:
70-
ieee_array: Numpy IEEE 32-bit float array.
64+
ieee: Numpy IEEE 32-bit float array.
7165
7266
Returns:
7367
IBM 32-bit float converted array with int32 view.
7468
"""
75-
# View the numpy array as int32, so we can do bit manipulations
76-
# We will do modifications in place.
77-
# Note: this is destructive on the original array
78-
ibm_array = ieee_array.view(np.uint32)
79-
80-
# We parallelize with threads along dim=0, so we need to run an outside
81-
# loop separately. The ndenumerate after that handles dim=1 to dim=n, so
82-
# it generalizes.
83-
for dim0_idx in nb.prange(ibm_array.shape[0]):
84-
for dim1n_idx, ieee in np.ndenumerate(ibm_array[dim0_idx]):
85-
nd_index = (dim0_idx,) + dim1n_idx
86-
87-
# Special-case 0.0
88-
if ieee in [0, 2147483648]: # 0.0 or np.float32(-0.0).view('uint32')
89-
ibm_array[nd_index] = 0
90-
continue
91-
92-
# Get IEEE's sign and exponent
93-
sign = ieee & IEEE32_SIGN
94-
exponent = ((ieee & IEEE32_EXPONENT) >> 23) - 127
95-
96-
# The IBM 7-bit exponent is to the base 16 and the mantissa is presumed to
97-
# be entirely to the right of the radix point. In contrast, the IEEE
98-
# exponent is to the base 2 and there is an assumed 1-bit to the left of
99-
# the radix point.
100-
# Note: reusing exponent variable, -> it is actually exp16
101-
102-
# exp16, exp_remainder
103-
exponent, exp_remainder = divmod(exponent + 1, 4)
104-
exponent += exp_remainder != 0
105-
downshift = 4 - exp_remainder if exp_remainder else 0
106-
exponent = exponent + 64
107-
# From here down exponent -> ibm_exponent
108-
exponent = 0 if exponent < 0 else exponent
109-
exponent = 127 if exponent > 127 else exponent
110-
exponent = exponent << 24
111-
exponent = exponent if ieee else 0
112-
113-
# Add the implicit initial 1-bit to the 23-bit IEEE mantissa to get the
114-
# 24-bit IBM mantissa. Downshift it by the remainder from the exponent's
115-
# division by 4. It is allowed to have up to 3 leading 0s.
116-
ibm_mantissa = ((ieee & IEEE32_FRACTION) | 0x800000) >> downshift
117-
ibm_array[nd_index] = sign | exponent | ibm_mantissa
118-
119-
return ibm_array
120-
121-
122-
@nb.njit("uint32(uint32)", cache=JIT_CACHE)
123-
def byteswap_uint32_single(value):
124-
"""Endianness swapping that can be JIT compiled.
125-
126-
This is faster or on par with the numpy implementation depending
127-
on the size of the array.
128-
129-
We first shift (4, 3, 2, 1) to (3, 4, 1, 2)
130-
Then shift (3, 4, 3, 2) to (1, 2, 3, 4)
131-
132-
Which yields (4, 3, 2, 1) -> (1, 2, 3, 4) or vice-versa.
133-
134-
Args:
135-
value: Value to be byte-swapped.
136-
137-
Returns:
138-
Byte-swapped value in same dtype.
139-
"""
140-
value = np.uint32(value)
141-
142-
if value == 0:
143-
return value
144-
145-
value = ((value << 8) & BYTEMASK_1_3) | ((value >> 8) & BYTEMASK_2_4)
146-
value = np.uint32(value << 16) | np.uint32(value >> 16)
147-
return value
69+
ieee = np.float32(ieee).view(np.uint32)
70+
71+
if ieee in [0, 2147483648]: # 0.0 or np.float32(-0.0).view('uint32')
72+
return 0
73+
74+
# Get IEEE's sign and exponent
75+
sign = ieee & IEEE32_SIGN
76+
exponent = ((ieee & IEEE32_EXPONENT) >> 23) - 127
77+
# The IBM 7-bit exponent is to the base 16 and the mantissa is presumed to
78+
# be entirely to the right of the radix point. In contrast, the IEEE
79+
# exponent is to the base 2 and there is an assumed 1-bit to the left of
80+
# the radix point.
81+
# Note: reusing exponent variable, -> it is actually exp16
82+
83+
# exp16, exp_remainder
84+
exponent, exp_remainder = divmod(exponent + 1, 4)
85+
exponent += exp_remainder != 0
86+
downshift = 4 - exp_remainder if exp_remainder else 0
87+
exponent = exponent + 64
88+
# From here down exponent -> ibm_exponent
89+
exponent = 0 if exponent < 0 else exponent
90+
exponent = 127 if exponent > 127 else exponent
91+
exponent = exponent << 24
92+
exponent = exponent if ieee else 0
93+
94+
# Add the implicit initial 1-bit to the 23-bit IEEE mantissa to get the
95+
# 24-bit IBM mantissa. Downshift it by the remainder from the exponent's
96+
# division by 4. It is allowed to have up to 3 leading 0s.
97+
ibm_mantissa = ((ieee & IEEE32_FRACTION) | 0x800000) >> downshift
98+
ibm = sign | exponent | ibm_mantissa
99+
100+
return ibm
148101

149102

150103
@nb.njit(
@@ -191,13 +144,47 @@ def ibm2ieee_single(ibm: np.uint32) -> np.float32:
191144
return ieee
192145

193146

194-
@nb.vectorize("uint32(uint32)", **JIT_KWARGS)
195-
def byteswap_uint32(value): # pragma: no cover
196-
"""Wrapper for vectorizing byte-swap to arrays."""
197-
return byteswap_uint32_single(value)
147+
@nb.njit("uint32(uint32)", cache=JIT_CACHE)
148+
def byteswap_uint32_single(value):
149+
"""Endianness swapping that can be JIT compiled.
150+
151+
This is faster or on par with the numpy implementation depending
152+
on the size of the array.
153+
154+
We first shift (4, 3, 2, 1) to (3, 4, 1, 2)
155+
Then shift (3, 4, 3, 2) to (1, 2, 3, 4)
156+
157+
Which yields (4, 3, 2, 1) -> (1, 2, 3, 4) or vice-versa.
158+
159+
Args:
160+
value: Value to be byte-swapped.
161+
162+
Returns:
163+
Byte-swapped value in same dtype.
164+
"""
165+
value = np.uint32(value)
166+
167+
if value == 0:
168+
return value
169+
170+
value = ((value << 8) & BYTEMASK_1_3) | ((value >> 8) & BYTEMASK_2_4)
171+
value = np.uint32(value << 16) | np.uint32(value >> 16)
172+
return value
173+
174+
175+
@nb.vectorize("uint32(float32)", target=JIT_TARGET, **JIT_KWARGS)
176+
def ieee2ibm(ieee_array: np.float32) -> np.uint32: # pragma: no cover
177+
"""Wrapper for vectorizing IEEE to IBM conversion to arrays."""
178+
return ieee2ibm_single(ieee_array)
198179

199180

200181
@nb.vectorize("float32(uint32)", target=JIT_TARGET, **JIT_KWARGS)
201182
def ibm2ieee(ibm_array: np.uint32) -> np.float32: # pragma: no cover
202183
"""Wrapper for vectorizing IBM to IEEE conversion to arrays."""
203184
return ibm2ieee_single(ibm_array)
185+
186+
187+
@nb.vectorize("uint32(uint32)", **JIT_KWARGS)
188+
def byteswap_uint32(value): # pragma: no cover
189+
"""Wrapper for vectorizing byte-swap to arrays."""
190+
return byteswap_uint32_single(value)

tests/unit/test_ibm_ieee.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,35 @@
1616
@pytest.mark.parametrize(
1717
"ieee, ibm",
1818
[
19-
([0, -0.0], [0x00000000, 0x00000000]),
20-
([0.1, -0.1], [0x40199999, 0xC0199999]),
21-
([0.5, -0.5], [0x40800000, 0xC0800000]),
22-
([1, -1], [0x41100000, 0xC1100000]),
23-
([3.141593, -3.141593], [0x413243F7, 0xC13243F7]),
24-
([0.15625, -0.15625], [0x40280000, 0xC0280000]),
25-
([118.625, -118.625], [0x4276A000, 0xC276A000]),
26-
([8521603, -8521603], [0x46820783, 0xC6820783]),
27-
([3.4028235e38, -3.4028235e38], [0x60FFFFFF, 0xE0FFFFFF]),
19+
(0.0, 0x00000000),
20+
(-0.0, 0x00000000),
21+
(0.1, 0x40199999),
22+
(-1, 0xC1100000),
23+
(3.141593, 0x413243F7),
24+
(-0.15625, 0xC0280000),
25+
(118.625, 0x4276A000),
26+
(-8521603, 0xC6820783),
27+
(3.4028235e38, 0x60FFFFFF),
28+
(-3.4028235e38, 0xE0FFFFFF),
29+
([-0.0, 0.1], [0x00000000, 0x40199999]),
30+
([0.0, 0.1, 3.141593], [0x00000000, 0x40199999, 0x413243F7]),
31+
([[0.0], [0.1], [3.141593]], [[0x00000000], [0x40199999], [0x413243F7]]),
2832
],
2933
)
3034
class TestIbmIeee:
3135
"""Test conversions, back and forth."""
3236

3337
def test_ieee_to_ibm(self, ieee, ibm):
3438
"""IEEE to IBM conversion."""
35-
ieee_fp32 = np.atleast_2d(np.float32(ieee))
36-
actual_ibm = np.squeeze(ieee2ibm(ieee_fp32))
37-
expected_ibm = np.squeeze(np.atleast_2d(np.uint32(ibm)))
39+
ieee_fp32 = np.float32(ieee)
40+
actual_ibm = ieee2ibm(ieee_fp32)
41+
expected_ibm = np.uint32(ibm)
3842
np.testing.assert_array_equal(actual_ibm, expected_ibm)
3943

4044
def test_ibm_to_ieee(self, ieee, ibm):
4145
"""IBM to IEEE conversion."""
42-
expected_ieee = np.asarray(ieee, dtype="float32")
43-
actual_ibm = np.asarray(ibm, dtype="uint32")
46+
expected_ieee = np.float32(ieee)
47+
actual_ibm = np.uint32(ibm)
4448

4549
# Assert up to 6 decimals (default)
4650
actual_ieee = ibm2ieee(actual_ibm)
@@ -51,9 +55,8 @@ def test_ibm_to_ieee(self, ieee, ibm):
5155
def test_ieee_to_ibm_roundtrip(shape: tuple):
5256
"""IEEE to IBM and then back to IEEE conversion."""
5357
expected_ieee = np.random.randn(*shape).astype("float32")
54-
expected_ieee = np.atleast_2d(expected_ieee)
5558

56-
actual_ibm = ieee2ibm(expected_ieee.copy())
59+
actual_ibm = ieee2ibm(expected_ieee)
5760
actual_ieee = ibm2ieee(actual_ibm)
5861

5962
# Assert up to 6 decimals (default)

0 commit comments

Comments
 (0)