55
66import numba as nb
77import numpy as np
8- from numpy .typing import NDArray
98
109
1110# If Numba's JIT compilation is disabled, force vectorized
3837
3938
4039@nb .njit (
41- parallel = True ,
42- boundscheck = False ,
43- fastmath = True ,
40+ "uint32(float32)" ,
4441 cache = JIT_CACHE ,
4542 locals = {
46- "sign" : nb .int8 ,
47- "exponent" : nb .int8 ,
48- "exp16" : nb .int8 ,
43+ "sign" : nb .uint32 ,
44+ "exponent" : nb .int32 ,
4945 "exp_remainder" : nb .int8 ,
5046 "downshift" : nb .int8 ,
51- "ibm_exponent" : nb .int8 ,
5247 "ibm_mantissa" : nb .int32 ,
53- "x_basic" : nb .uint32 [:],
5448 },
5549)
56- def ieee2ibm ( ieee_array : NDArray [ np .float32 ]) :
50+ def ieee2ibm_single ( ieee : np .float32 ) -> np . uint32 :
5751 """IEEE Float to IBM Float conversion.
5852
5953 Modified from here:
@@ -67,84 +61,43 @@ def ieee2ibm(ieee_array: NDArray[np.float32]):
6761 Byte swapping is up to user after this function.
6862
6963 Args:
70- ieee_array : Numpy IEEE 32-bit float array.
64+ ieee : Numpy IEEE 32-bit float array.
7165
7266 Returns:
7367 IBM 32-bit float converted array with int32 view.
7468 """
75- # View the numpy array as int32, so we can do bit manipulations
76- # We will do modifications in place.
77- # Note: this is destructive on the original array
78- ibm_array = ieee_array .view (np .uint32 )
79-
80- # We parallelize with threads along dim=0, so we need to run an outside
81- # loop separately. The ndenumerate after that handles dim=1 to dim=n, so
82- # it generalizes.
83- for dim0_idx in nb .prange (ibm_array .shape [0 ]):
84- for dim1n_idx , ieee in np .ndenumerate (ibm_array [dim0_idx ]):
85- nd_index = (dim0_idx ,) + dim1n_idx
86-
87- # Special-case 0.0
88- if ieee in [0 , 2147483648 ]: # 0.0 or np.float32(-0.0).view('uint32')
89- ibm_array [nd_index ] = 0
90- continue
91-
92- # Get IEEE's sign and exponent
93- sign = ieee & IEEE32_SIGN
94- exponent = ((ieee & IEEE32_EXPONENT ) >> 23 ) - 127
95-
96- # The IBM 7-bit exponent is to the base 16 and the mantissa is presumed to
97- # be entirely to the right of the radix point. In contrast, the IEEE
98- # exponent is to the base 2 and there is an assumed 1-bit to the left of
99- # the radix point.
100- # Note: reusing exponent variable, -> it is actually exp16
101-
102- # exp16, exp_remainder
103- exponent , exp_remainder = divmod (exponent + 1 , 4 )
104- exponent += exp_remainder != 0
105- downshift = 4 - exp_remainder if exp_remainder else 0
106- exponent = exponent + 64
107- # From here down exponent -> ibm_exponent
108- exponent = 0 if exponent < 0 else exponent
109- exponent = 127 if exponent > 127 else exponent
110- exponent = exponent << 24
111- exponent = exponent if ieee else 0
112-
113- # Add the implicit initial 1-bit to the 23-bit IEEE mantissa to get the
114- # 24-bit IBM mantissa. Downshift it by the remainder from the exponent's
115- # division by 4. It is allowed to have up to 3 leading 0s.
116- ibm_mantissa = ((ieee & IEEE32_FRACTION ) | 0x800000 ) >> downshift
117- ibm_array [nd_index ] = sign | exponent | ibm_mantissa
118-
119- return ibm_array
120-
121-
122- @nb .njit ("uint32(uint32)" , cache = JIT_CACHE )
123- def byteswap_uint32_single (value ):
124- """Endianness swapping that can be JIT compiled.
125-
126- This is faster or on par with the numpy implementation depending
127- on the size of the array.
128-
129- We first shift (4, 3, 2, 1) to (3, 4, 1, 2)
130- Then shift (3, 4, 3, 2) to (1, 2, 3, 4)
131-
132- Which yields (4, 3, 2, 1) -> (1, 2, 3, 4) or vice-versa.
133-
134- Args:
135- value: Value to be byte-swapped.
136-
137- Returns:
138- Byte-swapped value in same dtype.
139- """
140- value = np .uint32 (value )
141-
142- if value == 0 :
143- return value
144-
145- value = ((value << 8 ) & BYTEMASK_1_3 ) | ((value >> 8 ) & BYTEMASK_2_4 )
146- value = np .uint32 (value << 16 ) | np .uint32 (value >> 16 )
147- return value
69+ ieee = np .float32 (ieee ).view (np .uint32 )
70+
71+ if ieee in [0 , 2147483648 ]: # 0.0 or np.float32(-0.0).view('uint32')
72+ return 0
73+
74+ # Get IEEE's sign and exponent
75+ sign = ieee & IEEE32_SIGN
76+ exponent = ((ieee & IEEE32_EXPONENT ) >> 23 ) - 127
77+ # The IBM 7-bit exponent is to the base 16 and the mantissa is presumed to
78+ # be entirely to the right of the radix point. In contrast, the IEEE
79+ # exponent is to the base 2 and there is an assumed 1-bit to the left of
80+ # the radix point.
81+ # Note: reusing exponent variable, -> it is actually exp16
82+
83+ # exp16, exp_remainder
84+ exponent , exp_remainder = divmod (exponent + 1 , 4 )
85+ exponent += exp_remainder != 0
86+ downshift = 4 - exp_remainder if exp_remainder else 0
87+ exponent = exponent + 64
88+ # From here down exponent -> ibm_exponent
89+ exponent = 0 if exponent < 0 else exponent
90+ exponent = 127 if exponent > 127 else exponent
91+ exponent = exponent << 24
92+ exponent = exponent if ieee else 0
93+
94+ # Add the implicit initial 1-bit to the 23-bit IEEE mantissa to get the
95+ # 24-bit IBM mantissa. Downshift it by the remainder from the exponent's
96+ # division by 4. It is allowed to have up to 3 leading 0s.
97+ ibm_mantissa = ((ieee & IEEE32_FRACTION ) | 0x800000 ) >> downshift
98+ ibm = sign | exponent | ibm_mantissa
99+
100+ return ibm
148101
149102
150103@nb .njit (
@@ -191,13 +144,47 @@ def ibm2ieee_single(ibm: np.uint32) -> np.float32:
191144 return ieee
192145
193146
194- @nb .vectorize ("uint32(uint32)" , ** JIT_KWARGS )
195- def byteswap_uint32 (value ): # pragma: no cover
196- """Wrapper for vectorizing byte-swap to arrays."""
197- return byteswap_uint32_single (value )
147+ @nb .njit ("uint32(uint32)" , cache = JIT_CACHE )
148+ def byteswap_uint32_single (value ):
149+ """Endianness swapping that can be JIT compiled.
150+
151+ This is faster or on par with the numpy implementation depending
152+ on the size of the array.
153+
154+ We first shift (4, 3, 2, 1) to (3, 4, 1, 2)
155+ Then shift (3, 4, 3, 2) to (1, 2, 3, 4)
156+
157+ Which yields (4, 3, 2, 1) -> (1, 2, 3, 4) or vice-versa.
158+
159+ Args:
160+ value: Value to be byte-swapped.
161+
162+ Returns:
163+ Byte-swapped value in same dtype.
164+ """
165+ value = np .uint32 (value )
166+
167+ if value == 0 :
168+ return value
169+
170+ value = ((value << 8 ) & BYTEMASK_1_3 ) | ((value >> 8 ) & BYTEMASK_2_4 )
171+ value = np .uint32 (value << 16 ) | np .uint32 (value >> 16 )
172+ return value
173+
174+
175+ @nb .vectorize ("uint32(float32)" , target = JIT_TARGET , ** JIT_KWARGS )
176+ def ieee2ibm (ieee_array : np .float32 ) -> np .uint32 : # pragma: no cover
177+ """Wrapper for vectorizing IEEE to IBM conversion to arrays."""
178+ return ieee2ibm_single (ieee_array )
198179
199180
200181@nb .vectorize ("float32(uint32)" , target = JIT_TARGET , ** JIT_KWARGS )
201182def ibm2ieee (ibm_array : np .uint32 ) -> np .float32 : # pragma: no cover
202183 """Wrapper for vectorizing IBM to IEEE conversion to arrays."""
203184 return ibm2ieee_single (ibm_array )
185+
186+
187+ @nb .vectorize ("uint32(uint32)" , ** JIT_KWARGS )
188+ def byteswap_uint32 (value ): # pragma: no cover
189+ """Wrapper for vectorizing byte-swap to arrays."""
190+ return byteswap_uint32_single (value )
0 commit comments