Skip to content

Commit 5c3d362

Browse files
committed
remove W array from s_mp_mul_comba and s_mp_sqr_comba
remove calls to comba from s_mp_mul and s_mp_mul_high
1 parent cc77fad commit 5c3d362

File tree

11 files changed

+141
-91
lines changed

11 files changed

+141
-91
lines changed

etc/tune.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,15 @@ static int s_number_of_test_loops;
5858
static int s_stabilization_extra;
5959
static int s_offset = 1;
6060

61-
#define s_mp_mul_full(a, b, c) s_mp_mul(a, b, c, (a)->used + (b)->used + 1)
61+
static mp_err s_mul_full(const mp_int *a, const mp_int *b, mp_int *c)
62+
{
63+
if (MP_HAS(S_MP_MUL_HIGH_COMBA)
64+
&& (MP_MIN(a->used, b->used) < MP_MAX_COMBA)) {
65+
return s_mp_mul_comba(a, b, c, a->used + b->used + 1);
66+
}
67+
return s_mp_mul(a, b, c, a->used + b->used + 1);
68+
}
69+
6270
static uint64_t s_time_mul(int size)
6371
{
6472
int x;
@@ -87,7 +95,7 @@ static uint64_t s_time_mul(int size)
8795
goto LBL_ERR;
8896
}
8997
if (s_check_result == 1) {
90-
if ((e = s_mp_mul_full(&a,&b,&d)) != MP_OKAY) {
98+
if ((e = s_mul_full(&a,&b,&d)) != MP_OKAY) {
9199
t1 = UINT64_MAX;
92100
goto LBL_ERR;
93101
}

mp_mul.c

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,7 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
3131
} else if (MP_HAS(S_MP_MUL_KARATSUBA) &&
3232
(min >= MP_MUL_KARATSUBA_CUTOFF)) {
3333
err = s_mp_mul_karatsuba(a, b, c);
34-
} else if (MP_HAS(S_MP_MUL_COMBA) &&
35-
/* can we use the fast multiplier?
36-
*
37-
* The fast multiplier can be used if the output will
38-
* have less than MP_WARRAY digits and the number of
39-
* digits won't affect carry propagation
40-
*/
41-
(digs < MP_WARRAY) &&
34+
} else if (MP_HAS(S_MP_MUL_COMBA) && /* can we use the fast multiplier? */
4235
(min <= MP_MAX_COMBA)) {
4336
err = s_mp_mul_comba(a, b, c, digs);
4437
} else if (MP_HAS(S_MP_MUL)) {

mp_reduce.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
44
/* SPDX-License-Identifier: Unlicense */
55

6+
static mp_err s_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs)
7+
{
8+
if (MP_HAS(S_MP_MUL_COMBA)
9+
&& (MP_MIN(a->used, b->used) < MP_MAX_COMBA)) {
10+
return s_mp_mul_comba(a, b, c, digs);
11+
}
12+
return s_mp_mul(a, b, c, digs);
13+
}
14+
615
/* reduces x mod m, assumes 0 < x < m**2, mu is
716
* precomputed via mp_reduce_setup.
817
* From HAC pp.604 Algorithm 14.42
@@ -26,14 +35,14 @@ mp_err mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
2635
if ((err = mp_mul(&q, mu, &q)) != MP_OKAY) {
2736
goto LBL_ERR;
2837
}
29-
} else if (MP_HAS(S_MP_MUL_HIGH)) {
30-
if ((err = s_mp_mul_high(&q, mu, &q, um)) != MP_OKAY) {
31-
goto LBL_ERR;
32-
}
3338
} else if (MP_HAS(S_MP_MUL_HIGH_COMBA)) {
3439
if ((err = s_mp_mul_high_comba(&q, mu, &q, um)) != MP_OKAY) {
3540
goto LBL_ERR;
3641
}
42+
} else if (MP_HAS(S_MP_MUL_HIGH)) {
43+
if ((err = s_mp_mul_high(&q, mu, &q, um)) != MP_OKAY) {
44+
goto LBL_ERR;
45+
}
3746
} else {
3847
err = MP_VAL;
3948
goto LBL_ERR;
@@ -48,7 +57,7 @@ mp_err mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4857
}
4958

5059
/* q = q * m mod b**(k+1), quick (no division) */
51-
if ((err = s_mp_mul(&q, m, &q, um + 1)) != MP_OKAY) {
60+
if ((err = s_mul(&q, m, &q, um + 1)) != MP_OKAY) {
5261
goto LBL_ERR;
5362
}
5463

mp_sqr.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ mp_err mp_sqr(const mp_int *a, mp_int *b)
1414
(a->used >= MP_SQR_KARATSUBA_CUTOFF)) {
1515
err = s_mp_sqr_karatsuba(a, b);
1616
} else if (MP_HAS(S_MP_SQR_COMBA) && /* can we use the fast comba multiplier? */
17-
(((a->used * 2) + 1) < MP_WARRAY) &&
1817
(a->used < (MP_MAX_COMBA / 2))) {
1918
err = s_mp_sqr_comba(a, b);
2019
} else if (MP_HAS(S_MP_SQR)) {

s_mp_mul.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,24 @@
99
*/
1010
mp_err s_mp_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs)
1111
{
12-
mp_int t;
12+
mp_int tmp, *c_;
1313
mp_err err;
1414
int pa, ix;
1515

16-
/* can we use the fast multiplier? */
17-
if ((digs < MP_WARRAY) &&
18-
(MP_MIN(a->used, b->used) < MP_MAX_COMBA)) {
19-
return s_mp_mul_comba(a, b, c, digs);
16+
if (MP_ALIAS(a, c) || MP_ALIAS(b, c)) {
17+
c_ = &tmp;
18+
if ((err = mp_init_size(c_, digs)) != MP_OKAY) {
19+
return err;
20+
}
21+
} else {
22+
c_ = c;
23+
if ((err = mp_grow(c_, digs)) != MP_OKAY) {
24+
return err;
25+
}
26+
s_mp_zero_digs(c_->dp, c_->used);
2027
}
2128

22-
if ((err = mp_init_size(&t, digs)) != MP_OKAY) {
23-
return err;
24-
}
25-
t.used = digs;
29+
c_->used = digs;
2630

2731
/* compute the digits of the product directly */
2832
pa = a->used;
@@ -36,26 +40,29 @@ mp_err s_mp_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs)
3640
/* compute the columns of the output and propagate the carry */
3741
for (iy = 0; iy < pb; iy++) {
3842
/* compute the column as a mp_word */
39-
mp_word r = (mp_word)t.dp[ix + iy] +
43+
mp_word r = (mp_word)c_->dp[ix + iy] +
4044
((mp_word)a->dp[ix] * (mp_word)b->dp[iy]) +
4145
(mp_word)u;
4246

4347
/* the new column is the lower part of the result */
44-
t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
48+
c_->dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
4549

4650
/* get the carry word from the result */
4751
u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
4852
}
4953
/* set carry if it is placed below digs */
5054
if ((ix + iy) < digs) {
51-
t.dp[ix + pb] = u;
55+
c_->dp[ix + pb] = u;
5256
}
5357
}
5458

55-
mp_clamp(&t);
56-
mp_exch(&t, c);
59+
mp_clamp(c_);
60+
61+
if (c != c_) {
62+
mp_exch(c_, c);
63+
mp_clear(c_);
64+
}
5765

58-
mp_clear(&t);
5966
return MP_OKAY;
6067
}
6168
#endif

s_mp_mul_comba.c

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,27 @@ mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs)
2323
{
2424
int oldused, pa, ix;
2525
mp_err err;
26-
mp_digit W[MP_WARRAY];
27-
mp_word _W;
26+
mp_word W;
27+
mp_int tmp, *c_;
2828

2929
/* grow the destination as required */
30-
if ((err = mp_grow(c, digs)) != MP_OKAY) {
31-
return err;
30+
if (MP_ALIAS(a, c) || MP_ALIAS(b, c)) {
31+
c_ = &tmp;
32+
if ((err = mp_init_size(c_, digs)) != MP_OKAY) {
33+
return err;
34+
}
35+
} else {
36+
c_ = c;
37+
if ((err = mp_grow(c_, digs)) != MP_OKAY) {
38+
return err;
39+
}
3240
}
3341

3442
/* number of output digits to produce */
3543
pa = MP_MIN(digs, a->used + b->used);
3644

3745
/* clear the carry */
38-
_W = 0;
46+
W = 0;
3947
for (ix = 0; ix < pa; ix++) {
4048
int tx, ty, iy, iz;
4149

@@ -50,29 +58,30 @@ mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs)
5058

5159
/* execute loop */
5260
for (iz = 0; iz < iy; ++iz) {
53-
_W += (mp_word)a->dp[tx + iz] * (mp_word)b->dp[ty - iz];
61+
W += (mp_word)a->dp[tx + iz] * (mp_word)b->dp[ty - iz];
5462
}
5563

5664
/* store term */
57-
W[ix] = (mp_digit)_W & MP_MASK;
65+
c_->dp[ix] = (mp_digit)W & MP_MASK;
5866

5967
/* make next carry */
60-
_W = _W >> (mp_word)MP_DIGIT_BIT;
68+
W = W >> (mp_word)MP_DIGIT_BIT;
6169
}
6270

6371
/* setup dest */
64-
oldused = c->used;
65-
c->used = pa;
66-
67-
for (ix = 0; ix < pa; ix++) {
68-
/* now extract the previous digit [below the carry] */
69-
c->dp[ix] = W[ix];
70-
}
72+
oldused = c_->used;
73+
c_->used = pa;
7174

7275
/* clear unused digits [that existed in the old copy of c] */
73-
s_mp_zero_digs(c->dp + c->used, oldused - c->used);
76+
s_mp_zero_digs(c_->dp + c_->used, oldused - c_->used);
77+
78+
mp_clamp(c_);
79+
80+
if (c != c_) {
81+
mp_exch(c_, c);
82+
mp_clear(c_);
83+
}
7484

75-
mp_clamp(c);
7685
return MP_OKAY;
7786
}
7887
#endif

s_mp_mul_high.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@ mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs)
1212
int pa, pb, ix;
1313
mp_err err;
1414

15-
/* can we use the fast multiplier? */
16-
if (MP_HAS(S_MP_MUL_HIGH_COMBA)
17-
&& ((a->used + b->used + 1) < MP_WARRAY)
18-
&& (MP_MIN(a->used, b->used) < MP_MAX_COMBA)) {
19-
return s_mp_mul_high_comba(a, b, c, digs);
20-
}
21-
2215
if ((err = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
2316
return err;
2417
}

s_mp_sqr.c

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,39 @@
66
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
77
mp_err s_mp_sqr(const mp_int *a, mp_int *b)
88
{
9-
mp_int t;
9+
mp_int tmp, *b_;
1010
int ix, pa;
1111
mp_err err;
1212

1313
pa = a->used;
14-
if ((err = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) {
15-
return err;
14+
15+
if (MP_ALIAS(a, b)) {
16+
b_ = &tmp;
17+
if ((err = mp_init_size(b_, (2 * pa) + 1)) != MP_OKAY) {
18+
return err;
19+
}
20+
} else {
21+
b_ = b;
22+
if ((err = mp_grow(b_, (2 * pa) + 1)) != MP_OKAY) {
23+
return err;
24+
}
25+
s_mp_zero_digs(b_->dp, b_->used);
1626
}
1727

1828
/* default used is maximum possible size */
19-
t.used = (2 * pa) + 1;
29+
b_->used = (2 * pa) + 1;
2030

2131
for (ix = 0; ix < pa; ix++) {
2232
mp_digit u;
2333
int iy;
2434

2535
/* first calculate the digit at 2*ix */
2636
/* calculate double precision result */
27-
mp_word r = (mp_word)t.dp[2*ix] +
37+
mp_word r = (mp_word)b_->dp[2*ix] +
2838
((mp_word)a->dp[ix] * (mp_word)a->dp[ix]);
2939

3040
/* store lower part in result */
31-
t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);
41+
b_->dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);
3242

3343
/* get the carry */
3444
u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
@@ -40,26 +50,30 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b)
4050
/* now calculate the double precision result, note we use
4151
* addition instead of *2 since it's easier to optimize
4252
*/
43-
r = (mp_word)t.dp[ix + iy] + r + r + (mp_word)u;
53+
r = (mp_word)b_->dp[ix + iy] + r + r + (mp_word)u;
4454

4555
/* store lower part */
46-
t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
56+
b_->dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
4757

4858
/* get carry */
4959
u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
5060
}
5161
/* propagate upwards */
5262
while (u != 0uL) {
53-
r = (mp_word)t.dp[ix + iy] + (mp_word)u;
54-
t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
63+
r = (mp_word)b_->dp[ix + iy] + (mp_word)u;
64+
b_->dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
5565
u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
5666
++iy;
5767
}
5868
}
5969

60-
mp_clamp(&t);
61-
mp_exch(&t, b);
62-
mp_clear(&t);
70+
mp_clamp(b_);
71+
72+
if (b != b_) {
73+
mp_exch(b_, b);
74+
mp_clear(b_);
75+
}
76+
6377
return MP_OKAY;
6478
}
6579
#endif

0 commit comments

Comments
 (0)