@@ -97,6 +97,11 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
9797 mp_err err ;
9898 int32_t n = 0 , k , t = 0 , steps = 0 ;
9999 int ilog2a ;
100+ #ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
101+ int s , g = 3 ;
102+ mp_int M2 , M4 ;
103+ bool use_newton_raphson = true;
104+ #endif
100105
101106 /* Use given buffer directly, no temporary buffers for the individual chunks */
102107 char * * sptr = & str ;
@@ -109,6 +114,13 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
109114 /* List of moduli */
110115 mp_int * P = NULL ;
111116
117+ #ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
118+ /* Be nice and utter a warning. For now. */
119+ if (radix != 10 ) {
120+ fprintf (stderr ,"The Newton-Raphson method is for base 10 only!\n" );
121+ }
122+ #endif
123+
112124 /* Denominator for the reciprocal: b^y */
113125 n = s_pow ((int32_t )radix , (int32_t )s_mp_radix_exponent_y [radix ]);
114126
@@ -163,6 +175,10 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
163175 if ((err = mp_div (& R [0 ], & P [0 ], & R [0 ], NULL )) != MP_OKAY ) goto LTM_ERR ;
164176 if ((err = mp_incr (& R [0 ])) != MP_OKAY ) goto LTM_ERR ;
165177
178+ #ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
179+ if ((err = mp_init_multi (& M2 , & M4 , NULL )) != MP_OKAY ) goto LTM_ERR ;
180+ #endif
181+
166182 /* Compute the rest of the reciprocals if as needed */
167183 for (t = 1 ; t < steps ; t ++ ) {
168184 /* P_t = (b^y)^(2^t) = n^(2^t) */
@@ -208,15 +224,41 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
208224 /* Compute numerator */
209225 if ((err = mp_init (& R [t ])) != MP_OKAY ) goto LTM_ERR ;
210226
211- /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */
212- /* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */
213- if ((err = mp_2expt (& (R [t ]), (int )((int32_t )((uint32_t )1 << (t + 1 )) * k ))) != MP_OKAY ) goto LTM_ERR ;
214-
215- /* Compute reciprocal */
216- /* R[t] = floor(2^(2^t * k) / P[t] */
217- if ((err = mp_div (& R [t ], & P [t ], & R [t ], NULL )) != MP_OKAY ) goto LTM_ERR ;
218- /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */
219- if ((err = mp_incr (& R [t ])) != MP_OKAY ) goto LTM_ERR ;
227+ #ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
228+ if (use_newton_raphson && (radix == 10 )) {
229+ /* Use a round of Newton-Raphson to compute the next reciprocal */
230+ /* s = 2^t*k */
231+ s = (int )((int32_t )((uint32_t )1 << (t )) * k );
232+ /* M = R[t-1] * 2^g */
233+ if ((err = mp_mul_2d (& R [t - 1 ], (mp_digit )3 , & M2 )) != MP_OKAY ) goto LTM_ERR ;
234+ if ((err = mp_sub_d (& M2 , (mp_digit )2 , & M2 )) != MP_OKAY ) goto LTM_ERR ;
235+ /* Do the M-R round: M = floor( ( 2*M^2 - floor((n^2*M^4) / (2^(2*(s+g)))) ) / 2^(2*g)) + 1 */
236+ /* M2 = (M * 2^g)^2*/
237+ if ((err = mp_sqr (& M2 , & M2 )) != MP_OKAY ) goto LTM_ERR ;
238+ /* M4 = M2^2 */
239+ if ((err = mp_sqr (& M2 , & M4 )) != MP_OKAY ) goto LTM_ERR ;
240+ /* Compute numerator: n^2*M^4 = (P[t]) * M4*/
241+ if ((err = mp_mul (& P [t ], & M4 , & M4 )) != MP_OKAY ) goto LTM_ERR ;
242+ /* Compute fraction by shifting right: M4>>2*(s+g) where 2*(s+g) < MAX_INT */
243+ if ((err = mp_div_2d (& M4 , 2 * (s + g ), & M4 , NULL )) != MP_OKAY ) goto LTM_ERR ;
244+ if ((err = mp_mul_2 (& M2 ,& M2 )) != MP_OKAY ) goto LTM_ERR ;
245+ if ((err = mp_sub (& M2 ,& M4 ,& M4 )) != MP_OKAY ) goto LTM_ERR ;
246+ /* R[t] = M / 2^(2*g) remove extra bits before storage */
247+ if ((err = mp_div_2d (& M4 , 2 * g , & (R [t ]), & M4 )) != MP_OKAY ) goto LTM_ERR ;
248+ if ((err = mp_incr (& R [t ])) != MP_OKAY ) goto LTM_ERR ;
249+ } else {
250+ #endif
251+ /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */
252+ /* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */
253+ if ((err = mp_2expt (& (R [t ]), (int )((int32_t )((uint32_t )1 << (t + 1 )) * k ))) != MP_OKAY ) goto LTM_ERR ;
254+ /* Compute reciprocal */
255+ /* R[t] = floor(2^(2^t * k) / P[t] */
256+ if ((err = mp_div (& R [t ], & P [t ], & R [t ], NULL )) != MP_OKAY ) goto LTM_ERR ;
257+ /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */
258+ if ((err = mp_incr (& R [t ])) != MP_OKAY ) goto LTM_ERR ;
259+ #ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
260+ }
261+ #endif
220262 }
221263
222264 /* And finally: start the recursion. */
@@ -233,6 +275,9 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
233275 } while (t -- );
234276 MP_FREE_BUF (P , (size_t ) steps * sizeof (mp_int ));
235277 MP_FREE_BUF (R , (size_t ) steps * sizeof (mp_int ));
278+ #ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
279+ mp_clear_multi (& M2 , & M4 , NULL );
280+ #endif
236281 return err ;
237282}
238283
0 commit comments