Skip to content

Commit 85cad90

Browse files
committed
Do one iteration of newton-raphson refinement for FP16 inv
FP16 only has 10 explicitly stored bits of mantissa, so one iteration of refinement should be precise enough. Numerical results showed both one iteration and two iterations of newton-raphson refinement had worst ULP being 1. Signed-off-by: Ben Niu <[email protected]>
1 parent 531a496 commit 85cad90

File tree

2 files changed

+0
-3
lines changed

2 files changed

+0
-3
lines changed

src/core/NEON/NEMath.inl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -634,15 +634,13 @@ inline float16x4_t vinv_f16(float16x4_t x)
634634
{
635635
float16x4_t recip = vrecpe_f16(x);
636636
recip = vmul_f16(vrecps_f16(x, recip), recip);
637-
recip = vmul_f16(vrecps_f16(x, recip), recip);
638637
return recip;
639638
}
640639

641640
inline float16x8_t vinvq_f16(float16x8_t x)
642641
{
643642
float16x8_t recip = vrecpeq_f16(x);
644643
recip = vmulq_f16(vrecpsq_f16(x, recip), recip);
645-
recip = vmulq_f16(vrecpsq_f16(x, recip), recip);
646644
return recip;
647645
}
648646

src/core/NEON/SVEMath.inl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ inline svfloat16_t svinv_f16_z(svbool_t pg, svfloat16_t x)
8282
{
8383
auto recip = svrecpe_f16(x);
8484
recip = svmul_f16_z(pg, svrecps_f16(x, recip), recip);
85-
recip = svmul_f16_z(pg, svrecps_f16(x, recip), recip);
8685
return recip;
8786
}
8887

0 commit comments

Comments
 (0)