Skip to content

Commit

Permalink
add faster [<>].@(2&^.) for floats
Browse files Browse the repository at this point in the history
  • Loading branch information
kapustaikwas27 committed Aug 22, 2024
1 parent 103bf41 commit 50a6b21
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 32 deletions.
64 changes: 41 additions & 23 deletions jsrc/ca.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,54 @@ static DF1(jtonf1){PROLOG(0021);DECLFG;I flag=sv->flag,m=jt->xmode;
}

// <.@(2&^.) monad
static DF1(jtintfloorlog2) {A z;
static DF1(jtintfloorlog2) {
ARGCHK1(w);
I *wv = IAV(w); I wn=AN(w);
GATV(z, INT, wn, AR(w), AS(w)); I *zv = IAV(z); // zv points to allocated result area
if (INT & AT(w)) {
// When d >= 1 then <.@(2&^.) d is equal to the position of the highest 1-bit in d (CTLZI). failover to by hand if d<=0
DO(wn, I d=wv[i]; if(unlikely(d<=0))R onf1(w,self); zv[i]=CTLZI(d);)
// obsolete for (I i = wn - 1; i >= 0; --i, ++wv, ++zv) { // loop over all atoms of w
// obsolete I d = *wv;
// obsolete if (d > 0) *zv = CTLZI(d); // When d >= 1 then <.@(2&^.) d is equal to the position of the highest 1-bit in d (CTLZI).
// obsolete else R onf1(w, self); // When d < 1 then stop and reexecute by hand for whole w.
// obsolete }
} else R onf1(w, self);
R z;
if ((INT | FL) & AT(w)) { // Special cases only for integers and floats ([<>].@f for extended integers is handled in vx.c).
A z; I wn = AN(w); I wr = AR(w); I *ws = AS(w); I *wv = IAV(w); // GATV documentation advises using variables for arguments.
GATV(z, INT, wn, wr, ws); I *zv = IAV(z); // zv points to allocated result area.
if (INT & AT(w)) { // Case with integers.
DO(wn,
I d = wv[i]; if (unlikely(d <= 0)) R onf1(w, self); // Failover to by hand if d <= 0.
zv[i] = CTLZI(d); // When d >= 1 then <.@(2&^.) d is equal to the position of the highest 1-bit in d (CTLZI).
)
} else { // Case with floats.
DO(wn,
UI8 d = *(UI8*)&wv[i];
// Infs and NaN are the only floats that have all 1-bits in exponent. D_EXP_MSK represents +Inf, but is also used to check NaN and denorms.
if (unlikely(wv[i] <= 0 || (d & D_EXP_MSK) == D_EXP_MSK)) R onf1(w, self); // Failover to by hand if d <= 0 or d is +Inf or NaN.
zv[i] = ((~d) & D_EXP_MSK) == D_EXP_MSK // Denorms are the only floats (except 0 which was eliminated earlier) that have all 0-bits in exponent.
? CTLZI(d) - D_MANT_BITS_N + D_EXP_MIN // Denorm. Position of the highest 1-bit in d (which is in fraction part) is found with CTLZI.
: ((d & D_EXP_MSK) >> D_MANT_BITS_N) - D_EXP_MAX; // Normal. Result is exponent.
)
}
R z;
}
R onf1(w, self);
}

// >.@(2&^.) monad
static DF1(jtintceillog2) {A z; I wn, wr, *ws, *wv;
static DF1(jtintceillog2) { // Similar to the above case with floor (almost rewritten, but inner loops differ).
ARGCHK1(w);
if (INT & AT(w)) {
wn = AN(w); wr = AR(w); ws = AS(w); wv = IAV(w);
if ((INT | FL) & AT(w)) {
A z; I wn = AN(w); I wr = AR(w); I *ws = AS(w); I *wv = IAV(w);
GATV(z, INT, wn, wr, ws); I *zv = IAV(z);
for (I i = wn - 1; i >= 0; --i, wv++, zv++) {
I d = *wv;
// Similar to the above case with floor, but when d is not a power of 2 then add 1. Only powers of 2 have exactly one 1-bit, so count 1-bits in d (CT1I).
if (d > 0) *zv = CTLZI(d) + (CT1I(d) > 1);
else R onf1(w, self);
if (INT & AT(w)) { // Case with integers.
DO(wn,
I d = wv[i]; if (unlikely(d <= 0)) R onf1(w, self);
zv[i] = CTLZI(d) + (CT1I(d) > 1); // When d is not a power of 2 then add 1. Only powers of 2 have exactly one 1-bit, so count 1-bits in d (CT1I).
)
} else { // Case with floats.
DO(wn,
UI8 d = *(UI8*)&wv[i];
if (unlikely(wv[i] <= 0 || (d & D_EXP_MSK) == D_EXP_MSK)) R onf1(w, self);
zv[i] = ((~d) & D_EXP_MSK) == D_EXP_MSK
? CTLZI(d) + (CT1I(d) > 1) - D_MANT_BITS_N + D_EXP_MIN // Denorm.
: ((d & D_EXP_MSK) >> D_MANT_BITS_N) - D_EXP_MAX + (CT1I(d & D_MANT_MSK) > 1); // Normal.
)
}
} else R onf1(w, self);
R z;
R z;
}
R onf1(w, self);
}

// <.@ >.@ and the like, dyad
Expand Down
19 changes: 17 additions & 2 deletions jsrc/j.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,21 @@ static inline omp_int_t omp_get_num_threads() { return 1;}
#endif
#endif

/* IEEE 754 constants that are not defined in float.h */
// D_MANT_BITS_N is number of bits of mantissa in bit representation.
// Its value is DBL_MANT_DIG - 1, because the first digit does not occur in bit represention (always 1 for normalized numbers).
#define D_MANT_BITS_N 52
// D_EXP_BITS_N is number of bits of exponent in bit representation.
#define D_EXP_BITS_N 11
// 1 - D_EXP_MAX <= exponent <= D_EXP_MAX. In bit representation sum of exponent and D_EXP_MAX is stored which is positive. D_EXP_MAX = DBL_MAX_EXP - 1.
#define D_EXP_MAX 1023
// D_EXP_MIN = 1 - D_EXP_MAX
#define D_EXP_MIN -1022
// Bit mask of exponent is (UI8)((1 << D_EXP_BITS_N) - 1) << D_MANT_BITS_N. This is also bit representation of +Inf.
#define D_EXP_MSK 0x7ff0000000000000LL
#define D_MANT_MSK 0x000fffffffffffffLL


#if SY_64
#define IMAX 9223372036854775807LL
#define IMAXPRIME 9223372036854775783LL
Expand Down Expand Up @@ -2350,15 +2365,15 @@ if(unlikely(!_mm256_testz_pd(sgnbit,mantis0))){ /* if mantissa exactly 0, must
#else
#define CTLZI(w) (63-__builtin_clzll((UI)(w)))
#endif
#define CT1I(w) __builtin_popcountll((UI)w)
#define CT1I(w) __builtin_popcountll((UI)(w))
#else
#define CTTZI(w) __builtin_ctzl((UINT)(w))
#if (!C_AVX2) && (defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86))
#define CTLZI(w) (31-__builtin_clzl((UI)(w)|1)) // use this if we fear garbage if w=0
#else
#define CTLZI(w) (31-__builtin_clzl((UI)(w)))
#endif
#define CT1I(w) __builtin_popcountl((UI)w)
#define CT1I(w) __builtin_popcountl((UI)(w))
#endif
#define CTTZZ(w) ((w)==0 ? 32 : CTTZ(w)) // use this if we need 32 when w=0
#endif
Expand Down
10 changes: 5 additions & 5 deletions jsrc/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ D jtintpow(J jt, D x, I n) { D r;
// n = n > INT_MAX ? INT_MAX : (n < INT_MIN ? INT_MIN : n);
// R ldexp(1, n);
UI8 ires; // where bit-pattern of result will be built
if(likely(BETWEENC(n, -1022, 1023))) ires = (UI8)(n + 1023) << 52; // normal range
else if (n > 1023) ires = (UI8)((1 << 11) - 1) << 52; // infinity
if(likely(BETWEENC(n, D_EXP_MIN, D_EXP_MAX))) ires = (UI8)(n + D_EXP_MAX) << D_MANT_BITS_N; // normal range
else if (n > D_EXP_MAX) ires = D_EXP_MSK; // infinity
else {
// denorm and zero. n = -1023 is the largest denorm
n = MAX(n, -1023 - 53);
ires = ((UI8)1 << 51) >> (-1023 - n);
// denorm and zero. n = D_EXP_MIN - 1 is the largest denorm
n = MAX(n, D_EXP_MIN - 1 - D_MANT_BITS_N);
ires = ((UI8)1 << (D_MANT_BITS_N - 1)) >> (D_EXP_MIN - 1 - n);
}
R *(D*)&ires; // return bit-pattern as a D
}
Expand Down
26 changes: 24 additions & 2 deletions test/g200.ijs
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,32 @@ _ = 2^1e15 NB. on 32-bit, this is a float constant

NB. <.@(2&^.) and >.@(2&^.) ---------------------------------------------

(100 $ 0) -: (>.@(2&^.) - <.@(2&^.)) 2 <.@^ i. 100
(100 $ 0) -: (>.@(2&^.) - <.@(2&^.)) 2 ^ i. 100
(30 $ 0) -: (>.@(2&^.) - <.@(2&^.)) 2 <.@^ i. 30
(30 $ 0) -: (>.@(2&^.) - <.@(2&^.)) 2 ^ i. 30
0 1 1 2 2 2 2 3 3 3 -: <.@(2&^.) >: i. 10
0 1 2 2 3 3 3 3 4 4 -: >.@(2&^.) >: i. 10
49 53 56 -: <.@(2&^.) 1e15 1e16 1e17
50 54 57 -: >.@(2&^.) 1e15 1e16 1e17
0 1 1 2 3 3 6 -: <.@(2&^.) 1.1 2.3 3.4 7.8 8 8.0635 100.1921
1 2 2 3 3 4 7 -: >.@(2&^.) 1.1 2.3 3.4 7.8 8 8.0635 100.1921
(i. 10) -: <.@(2&^.) 2 ^ i. 10
(i. 10) -: >.@(2&^.) 2 ^ i. 10
(- i. 10) -: <.@(2&^.) 2 ^ - i. 10
(- i. 10) -: >.@(2&^.) 2 ^ - i. 10
1021 1022 1023 -: <.@(2&^.) 2 ^ 1021 1022 1023
1021 1022 1023 -: >.@(2&^.) 2 ^ 1021 1022 1023
_1072 _1073 _1074 -: <.@(2&^.) 2 ^ _1072 _1073 _1074
_1072 _1073 _1074 -: >.@(2&^.) 2 ^ _1072 _1073 _1074
_1050 -: <.@(2&^.) +/ ] 2 ^ _1050 _1054 _1070
_1049 -: >.@(2&^.) +/ ] 2 ^ _1050 _1054 _1070
'complex' -: datatype <.@(2&^.) _.
'complex' -: datatype >.@(2&^.) _.
_ -: <.@(2&^.) _
_ -: >.@(2&^.) _
__ -: <.@(2&^.) 0
__ -: >.@(2&^.) 0
1 1 0j4 -: <.@(2&^.) 2 3.4 _1
1 2 0j5 -: >.@(2&^.) 2 3.4 _1

NB. 0^0 -----------------------------------------------------------------

Expand Down

0 comments on commit 50a6b21

Please sign in to comment.