Skip to content

Commit

Permalink
ec: make P384 code a little bit more generic
Browse files Browse the repository at this point in the history
This change makes it easier to reuse the P384 code which is quite
generic already. No algorithmic changes are made, only some code
is shuffled around. This prepares the ground for P521 implementation.
  • Loading branch information
vkrasnov committed Jan 12, 2024
1 parent c72a5aa commit fcf5cd6
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 330 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ include = [
"crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl",
"crypto/fipsmodule/ec/ecp_nistz.c",
"crypto/fipsmodule/ec/ecp_nistz.h",
"crypto/fipsmodule/ec/ecp_nistz384.h",
"crypto/fipsmodule/ec/ecp_nistz384.inl",
"crypto/fipsmodule/ec/ecp_nistz.inl",
"crypto/fipsmodule/ec/gfp.h",
"crypto/fipsmodule/ec/gfp_p256.c",
"crypto/fipsmodule/ec/gfp_p384.c",
"crypto/fipsmodule/ec/p256.c",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,24 @@
* Shay Gueron and Vlad Krasnov
* "Fast Prime Field Elliptic Curve Cryptography with 256 Bit Primes"
* http://eprint.iacr.org/2013/816 */

#include "ecp_nistz.h"
#include "gfp.h"

#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsign-conversion"
#endif

#define point_add(prefix, bits) RENAME_FUNC(prefix, bits, point_add)
#define point_double(prefix, bits) RENAME_FUNC(prefix, bits, point_double)
#define point_mul(prefix, bits) RENAME_FUNC(prefix, bits, point_mul)

/* Point double: r = 2*a */
static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) {
BN_ULONG S[P384_LIMBS];
BN_ULONG M[P384_LIMBS];
BN_ULONG Zsqr[P384_LIMBS];
BN_ULONG tmp0[P384_LIMBS];
static void point_double(nistz, BITS)(NIST_POINT *r, const NIST_POINT *a) {
BN_ULONG S[FE_LIMBS];
BN_ULONG M[FE_LIMBS];
BN_ULONG Zsqr[FE_LIMBS];
BN_ULONG tmp0[FE_LIMBS];

const BN_ULONG *in_x = a->X;
const BN_ULONG *in_y = a->Y;
Expand Down Expand Up @@ -74,20 +78,20 @@ static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) {
}

/* Point addition: r = a+b */
static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
const P384_POINT *b) {
BN_ULONG U2[P384_LIMBS], S2[P384_LIMBS];
BN_ULONG U1[P384_LIMBS], S1[P384_LIMBS];
BN_ULONG Z1sqr[P384_LIMBS];
BN_ULONG Z2sqr[P384_LIMBS];
BN_ULONG H[P384_LIMBS], R[P384_LIMBS];
BN_ULONG Hsqr[P384_LIMBS];
BN_ULONG Rsqr[P384_LIMBS];
BN_ULONG Hcub[P384_LIMBS];

BN_ULONG res_x[P384_LIMBS];
BN_ULONG res_y[P384_LIMBS];
BN_ULONG res_z[P384_LIMBS];
static void point_add(nistz, BITS)(NIST_POINT *r, const NIST_POINT *a,
const NIST_POINT *b) {
BN_ULONG U2[FE_LIMBS], S2[FE_LIMBS];
BN_ULONG U1[FE_LIMBS], S1[FE_LIMBS];
BN_ULONG Z1sqr[FE_LIMBS];
BN_ULONG Z2sqr[FE_LIMBS];
BN_ULONG H[FE_LIMBS], R[FE_LIMBS];
BN_ULONG Hsqr[FE_LIMBS];
BN_ULONG Rsqr[FE_LIMBS];
BN_ULONG Hcub[FE_LIMBS];

BN_ULONG res_x[FE_LIMBS];
BN_ULONG res_y[FE_LIMBS];
BN_ULONG res_z[FE_LIMBS];

const BN_ULONG *in1_x = a->X;
const BN_ULONG *in1_y = a->Y;
Expand Down Expand Up @@ -117,11 +121,11 @@ static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
BN_ULONG is_exceptional = is_equal(U1, U2) & ~in1infty & ~in2infty;
if (is_exceptional) {
if (is_equal(S1, S2)) {
nistz384_point_double(r, a);
point_double(nistz, BITS)(r, a);
} else {
limbs_zero(r->X, P384_LIMBS);
limbs_zero(r->Y, P384_LIMBS);
limbs_zero(r->Z, P384_LIMBS);
limbs_zero(r->X, FE_LIMBS);
limbs_zero(r->Y, FE_LIMBS);
limbs_zero(r->Z, FE_LIMBS);
}
return;
}
Expand Down Expand Up @@ -152,147 +156,136 @@ static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
copy_conditional(res_y, in1_y, in2infty);
copy_conditional(res_z, in1_z, in2infty);

limbs_copy(r->X, res_x, P384_LIMBS);
limbs_copy(r->Y, res_y, P384_LIMBS);
limbs_copy(r->Z, res_z, P384_LIMBS);
limbs_copy(r->X, res_x, FE_LIMBS);
limbs_copy(r->Y, res_y, FE_LIMBS);
limbs_copy(r->Z, res_z, FE_LIMBS);
}

static void add_precomputed_w5(P384_POINT *r, crypto_word_t wvalue,
const P384_POINT table[16]) {
static void add_precomputed_w(NIST_POINT *r, crypto_word_t wvalue,
const NIST_POINT table[TBL_SZ]) {
crypto_word_t recoded_is_negative;
crypto_word_t recoded;
booth_recode(&recoded_is_negative, &recoded, wvalue, 5);
booth_recode(&recoded_is_negative, &recoded, wvalue, W_BITS);

alignas(64) P384_POINT h;
p384_point_select_w5(&h, table, recoded);
alignas(64) NIST_POINT h;
NIST_POINT_select_w(&h, table, recoded);

alignas(64) BN_ULONG tmp[P384_LIMBS];
p384_elem_neg(tmp, h.Y);
alignas(64) BN_ULONG tmp[FE_LIMBS];
elem_neg(tmp, h.Y);
copy_conditional(h.Y, tmp, recoded_is_negative);

nistz384_point_add(r, r, &h);
point_add(nistz, BITS)(r, r, &h);
}

/* r = p * p_scalar */
static void nistz384_point_mul(P384_POINT *r,
const BN_ULONG p_scalar[P384_LIMBS],
const Limb p_x[P384_LIMBS],
const Limb p_y[P384_LIMBS]) {
static const size_t kWindowSize = 5;
static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;

uint8_t p_str[(P384_LIMBS * sizeof(Limb)) + 1];
static void point_mul(nistz, BITS)(NIST_POINT *r, const BN_ULONG p_scalar[FE_LIMBS],
const BN_ULONG p_x[FE_LIMBS],
const BN_ULONG p_y[FE_LIMBS]) {
uint8_t p_str[(FE_LIMBS * sizeof(Limb)) + 1];
little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
p_scalar, P384_LIMBS);
p_scalar, FE_LIMBS);

/* A |P384_POINT| is (3 * 48) = 144 bytes, and the 64-byte alignment should
/* A |NIST_POINT| is (3 * 48) = 144 bytes, and the 64-byte alignment should
* add no more than 63 bytes of overhead. Thus, |table| should require
* ~2367 ((144 * 16) + 63) bytes of stack space. */
alignas(64) P384_POINT table[16];
alignas(64) NIST_POINT table[TBL_SZ];

/* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
* not stored. All other values are actually stored with an offset of -1 in
* table. */
P384_POINT *row = table;

limbs_copy(row[1 - 1].X, p_x, P384_LIMBS);
limbs_copy(row[1 - 1].Y, p_y, P384_LIMBS);
limbs_copy(row[1 - 1].Z, ONE, P384_LIMBS);

nistz384_point_double(&row[2 - 1], &row[1 - 1]);
nistz384_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
nistz384_point_double(&row[4 - 1], &row[2 - 1]);
nistz384_point_double(&row[6 - 1], &row[3 - 1]);
nistz384_point_double(&row[8 - 1], &row[4 - 1]);
nistz384_point_double(&row[12 - 1], &row[6 - 1]);
nistz384_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
nistz384_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
nistz384_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
nistz384_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
nistz384_point_double(&row[14 - 1], &row[7 - 1]);
nistz384_point_double(&row[10 - 1], &row[5 - 1]);
nistz384_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
nistz384_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
nistz384_point_double(&row[16 - 1], &row[8 - 1]);

static const size_t START_INDEX = 384 - 4;
NIST_POINT *row = table;

limbs_copy(row[0].X, p_x, FE_LIMBS);
limbs_copy(row[0].Y, p_y, FE_LIMBS);
limbs_copy(row[0].Z, ONE, FE_LIMBS);

point_double(nistz, BITS)(&row[1], &row[0]);

for (int i = 2; i < TBL_SZ; i += 2) {
point_add(nistz, BITS)(&row[i], &row[i - 1], &row[0]);
point_double(nistz, BITS)(&row[i + 1], &row[i / 2]);
}

static const size_t ROUND_SIZE = (BITS + W_BITS - 1) / W_BITS * W_BITS;
size_t START_INDEX = ROUND_SIZE == BITS + 1 ? ROUND_SIZE - W_BITS: ROUND_SIZE;
size_t index = START_INDEX;

BN_ULONG recoded_is_negative;
crypto_word_t recoded;

crypto_word_t wvalue = p_str[(index - 1) / 8];
wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
wvalue = (wvalue >> ((index - 1) % 8)) & W_MASK;

booth_recode(&recoded_is_negative, &recoded, wvalue, 5);
booth_recode(&recoded_is_negative, &recoded, wvalue, W_BITS);
dev_assert_secret(!recoded_is_negative);

p384_point_select_w5(r, table, recoded);
NIST_POINT_select_w(r, table, recoded);

while (index >= kWindowSize) {
while (index >= W_BITS) {
if (index != START_INDEX) {
size_t off = (index - 1) / 8;

wvalue = p_str[off] | p_str[off + 1] << 8;
wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
add_precomputed_w5(r, wvalue, table);
wvalue = (wvalue >> ((index - 1) % 8)) & W_MASK;
add_precomputed_w(r, wvalue, table);
}

index -= kWindowSize;
index -= W_BITS;

nistz384_point_double(r, r);
nistz384_point_double(r, r);
nistz384_point_double(r, r);
nistz384_point_double(r, r);
nistz384_point_double(r, r);
for (int i = 0; i < W_BITS; i++) {
point_double(nistz, BITS)(r, r);
}
}

/* Final window */
wvalue = p_str[0];
wvalue = (wvalue << 1) & kMask;
add_precomputed_w5(r, wvalue, table);
wvalue = (wvalue << 1) & W_MASK;
add_precomputed_w(r, wvalue, table);
}

void p384_point_double(Limb r[3][P384_LIMBS], const Limb a[3][P384_LIMBS])
void point_double(p, BITS)(Limb r[3][FE_LIMBS], const Limb a[3][FE_LIMBS])
{
P384_POINT t;
limbs_copy(t.X, a[0], P384_LIMBS);
limbs_copy(t.Y, a[1], P384_LIMBS);
limbs_copy(t.Z, a[2], P384_LIMBS);
nistz384_point_double(&t, &t);
limbs_copy(r[0], t.X, P384_LIMBS);
limbs_copy(r[1], t.Y, P384_LIMBS);
limbs_copy(r[2], t.Z, P384_LIMBS);
NIST_POINT t;
limbs_copy(t.X, a[0], FE_LIMBS);
limbs_copy(t.Y, a[1], FE_LIMBS);
limbs_copy(t.Z, a[2], FE_LIMBS);
point_double(nistz, BITS)(&t, &t);
limbs_copy(r[0], t.X, FE_LIMBS);
limbs_copy(r[1], t.Y, FE_LIMBS);
limbs_copy(r[2], t.Z, FE_LIMBS);
}

void p384_point_add(Limb r[3][P384_LIMBS],
const Limb a[3][P384_LIMBS],
const Limb b[3][P384_LIMBS])
void point_add(p, BITS)(Limb r[3][FE_LIMBS],
const Limb a[3][FE_LIMBS],
const Limb b[3][FE_LIMBS])
{
P384_POINT t1;
limbs_copy(t1.X, a[0], P384_LIMBS);
limbs_copy(t1.Y, a[1], P384_LIMBS);
limbs_copy(t1.Z, a[2], P384_LIMBS);
NIST_POINT t1;
limbs_copy(t1.X, a[0], FE_LIMBS);
limbs_copy(t1.Y, a[1], FE_LIMBS);
limbs_copy(t1.Z, a[2], FE_LIMBS);

P384_POINT t2;
limbs_copy(t2.X, b[0], P384_LIMBS);
limbs_copy(t2.Y, b[1], P384_LIMBS);
limbs_copy(t2.Z, b[2], P384_LIMBS);
NIST_POINT t2;
limbs_copy(t2.X, b[0], FE_LIMBS);
limbs_copy(t2.Y, b[1], FE_LIMBS);
limbs_copy(t2.Z, b[2], FE_LIMBS);

nistz384_point_add(&t1, &t1, &t2);
point_add(nistz, BITS)(&t1, &t1, &t2);

limbs_copy(r[0], t1.X, P384_LIMBS);
limbs_copy(r[1], t1.Y, P384_LIMBS);
limbs_copy(r[2], t1.Z, P384_LIMBS);
limbs_copy(r[0], t1.X, FE_LIMBS);
limbs_copy(r[1], t1.Y, FE_LIMBS);
limbs_copy(r[2], t1.Z, FE_LIMBS);
}

void p384_point_mul(Limb r[3][P384_LIMBS], const BN_ULONG p_scalar[P384_LIMBS],
const Limb p_x[P384_LIMBS], const Limb p_y[P384_LIMBS]) {
alignas(64) P384_POINT acc;
nistz384_point_mul(&acc, p_scalar, p_x, p_y);
limbs_copy(r[0], acc.X, P384_LIMBS);
limbs_copy(r[1], acc.Y, P384_LIMBS);
limbs_copy(r[2], acc.Z, P384_LIMBS);
void point_mul(p, BITS)(Limb r[3][FE_LIMBS],
const BN_ULONG p_scalar[FE_LIMBS],
const Limb p_x[FE_LIMBS],
const Limb p_y[FE_LIMBS])
{
alignas(64) NIST_POINT acc;
point_mul(nistz, BITS)(&acc, p_scalar, p_x, p_y);
limbs_copy(r[0], acc.X, FE_LIMBS);
limbs_copy(r[1], acc.Y, FE_LIMBS);
limbs_copy(r[2], acc.Z, FE_LIMBS);
}

#if defined(__GNUC__) || defined(__clang__)
Expand Down
34 changes: 0 additions & 34 deletions crypto/fipsmodule/ec/ecp_nistz384.h

This file was deleted.

Loading

0 comments on commit fcf5cd6

Please sign in to comment.