@@ -29,12 +29,21 @@ public static Vector<T> InitVector<T>(Func<int, T> f)
2929 return new Vector < T > ( arr ) ;
3030 }
3131
32+ public static T [ ] ConvertVectorToMask < T > ( T [ ] vector ) where T : IBinaryInteger < T >
33+ {
34+ T [ ] result = new T [ vector . Length ] ;
35+ for ( int i = 0 ; i < vector . Length ; i ++ )
36+ {
37+ result [ i ] = vector [ i ] == T . Zero ? T . Zero : T . One ;
38+ }
39+ return result ;
40+ }
41+
3242 public static T [ ] CreateMaskForFirstActiveElement < T > ( T [ ] mask , T [ ] srcMask )
3343 where T : unmanaged, IBinaryInteger < T >
3444 {
3545 int count = srcMask . Length ;
36- T [ ] result = new T [ count ] ;
37- Array . Copy ( srcMask , result , count ) ;
46+ T [ ] result = ConvertVectorToMask ( srcMask ) ;
3847
3948 for ( int i = 0 ; i < count ; i ++ )
4049 {
@@ -1580,19 +1589,51 @@ public static ulong FusedAddHalving(ulong op1, ulong op2)
15801589 public static long FusedAddHalving ( long op1 , long op2 )
15811590 {
15821591 long sum = op1 + op2 ;
1583- bool carry = sum < op1 ;
1584- return ( sum >> 1 ) + ( carry ? 1L << 63 : 0 ) ;
1592+
1593+ if ( op1 > 0 && op2 > 0 && sum < 0 )
1594+ {
1595+ // Addition overflows into the sign bit, which simulates an
1596+ // unsigned 64-bit addition. We need to perform a logical shift
1597+ // to make sure the sign-bit is clear on the half value.
1598+ return ( long ) ( ( ulong ) sum >>> 1 ) ;
1599+ }
1600+ else if ( op1 < 0 && op2 < 0 && sum > 0 )
1601+ {
1602+ // Addition of negative values overflows beyond the sign-bit into
1603+ // the positive range. The halved value will be OK but we need to
1604+ // reinstate the sign bit which was lost.
1605+ return ( long ) ( ( ulong ) ( sum >> 1 ) | ( 1UL << 63 ) ) ;
1606+ }
1607+ else
1608+ {
1609+ // No overflow, simply halve preserving sign-bit.
1610+ return sum >> 1 ;
1611+ }
15851612 }
15861613
15871614 public static long FusedSubtractHalving ( long op1 , long op2 )
15881615 {
1589- ulong uop1 = ( ulong ) op1 ;
1590- ulong uop2 = ( ulong ) op2 ;
1616+ long diff = op1 - op2 ;
15911617
1592- ulong udiff = uop1 - uop2 ;
1593- long sdiff = unchecked ( ( long ) udiff ) ;
1594-
1595- return sdiff >> 1 ;
1618+ if ( op1 > 0 && op2 < 0 && diff < 0 )
1619+ {
1620+ // Subtract of negative value overflows into the sign bit We need
1621+ // to perform a logical shift to make sure the sign-bit is clear
1622+ // on the half value.
1623+ return ( long ) ( ( ulong ) diff >>> 1 ) ;
1624+ }
1625+ else if ( op1 < 0 && op2 > 0 && diff > 0 )
1626+ {
1627+ // Subtraction of positive value overflows beyond the sign-bit into
1628+ // the positive range. The halved value will be OK but we need to
1629+ // reinstate the sign bit which was lost.
1630+ return ( long ) ( ( ulong ) ( diff >> 1 ) | ( 1UL << 63 ) ) ;
1631+ }
1632+ else
1633+ {
1634+ // No overflow, simply halve preserving sign-bit.
1635+ return diff >> 1 ;
1636+ }
15961637 }
15971638
15981639 public static ulong FusedSubtractHalving ( ulong op1 , ulong op2 )
@@ -1602,7 +1643,6 @@ public static ulong FusedSubtractHalving(ulong op1, ulong op2)
16021643 return ( diff >> 1 ) + ( overflow ? 1UL << 63 : 0 ) ;
16031644 }
16041645
1605-
16061646 public static uint FusedAddRoundedHalving ( uint op1 , uint op2 ) => ( uint ) ( ( ulong ) ( ( ulong ) op1 + ( ulong ) op2 + 1 ) >> 1 ) ;
16071647
16081648 public static uint FusedSubtractHalving ( uint op1 , uint op2 ) => ( uint ) ( ( ulong ) ( ( ulong ) op1 - ( ulong ) op2 ) >> 1 ) ;
@@ -2942,7 +2982,7 @@ private static sbyte SignedShift(sbyte op1, sbyte op2, bool rounding = false, bo
29422982 {
29432983 if ( shiftOvf )
29442984 {
2945- result = op2 < 0 ? sbyte . MinValue : sbyte . MaxValue ;
2985+ return op1 > 0 ? sbyte . MaxValue : sbyte . MinValue ;
29462986 }
29472987 }
29482988 }
@@ -3140,8 +3180,19 @@ private static (byte val, bool ovf) SubtractOvf(byte op1, byte op2)
31403180
31413181 public static sbyte AddSaturate ( sbyte op1 , sbyte op2 )
31423182 {
3143- var ( result , ovf ) = AddOvf ( op1 , op2 ) ;
3144- return ovf ? ( result > 0 ? sbyte . MinValue : sbyte . MaxValue ) : result ;
3183+ int result = op1 + op2 ;
3184+ if ( result > sbyte . MaxValue )
3185+ {
3186+ return sbyte . MaxValue ;
3187+ }
3188+ else if ( result < sbyte . MinValue )
3189+ {
3190+ return sbyte . MinValue ;
3191+ }
3192+ else
3193+ {
3194+ return ( sbyte ) result ;
3195+ }
31453196 }
31463197
31473198 public static sbyte AddSaturate ( sbyte op1 , byte op2 )
@@ -7517,7 +7568,7 @@ public static T[] CreateBreakPropagateMask<T>(T[] op1, T[] op2) where T : IBinar
75177568
75187569 if ( LastActive ( mask , op1 ) != T . Zero )
75197570 {
7520- Array . Copy ( op2 , result , count ) ;
7571+ result = ConvertVectorToMask ( op2 ) ;
75217572 }
75227573
75237574 return result ;
@@ -8185,7 +8236,34 @@ public static N SubtractRoundedHighNarrowingOdd<W, N>(N even, W op1, W op2, int
81858236 return Odd < N > ( even , SubtractRoundedHighNarrowing < W , N > ( op1 , op2 ) , i ) ;
81868237 }
81878238
8188- public static long FusedAddRoundedHalving ( long op1 , long op2 ) => ( long ) ( ( ulong ) ( op1 + op2 + 1 ) >> 1 ) ;
8239+ public static long FusedAddRoundedHalving ( long op1 , long op2 )
8240+ {
8241+ bool overflow = false ;
8242+ long sum = 0 ;
8243+ try
8244+ {
8245+ sum = checked ( op1 + op2 + 1 ) ;
8246+ }
8247+ catch ( OverflowException )
8248+ {
8249+ overflow = true ;
8250+ sum = op1 + op2 + 1 ;
8251+ }
8252+
8253+ // See FusedAddHalving for description of cases.
8254+ if ( op1 > 0 && op2 > 0 && overflow )
8255+ {
8256+ return ( long ) ( ( ulong ) sum >>> 1 ) ;
8257+ }
8258+ else if ( op1 < 0 && op2 < 0 && overflow )
8259+ {
8260+ return ( long ) ( ( ulong ) ( sum >> 1 ) | ( 1UL << 63 ) ) ;
8261+ }
8262+ else
8263+ {
8264+ return sum >> 1 ;
8265+ }
8266+ }
81898267
81908268 public static ulong FusedAddRoundedHalving ( ulong op1 , ulong op2 )
81918269 {
0 commit comments