From 27aa5386a098d9540ab385533b41d1a5c0c46bd6 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 16:28:14 -0700 Subject: [PATCH] EC P-384: Use array types for crossing Rust<->C boundary. Avoid using the P384_POINT type on the C side. It seems to work for all the targets we support, for P-384, but this pattern probably doesn't work in general. Especially due to alignment issues for 32-bit targets, it is doubtful it would work for P-521. --- build.rs | 6 +-- crypto/fipsmodule/ec/ecp_nistz384.inl | 55 ++++++++++++++++++++++++--- src/ec/suite_b/ops.rs | 4 +- src/ec/suite_b/ops/p384.rs | 8 ++-- 4 files changed, 58 insertions(+), 15 deletions(-) diff --git a/build.rs b/build.rs index 3cbae9d926..f7b94108b7 100644 --- a/build.rs +++ b/build.rs @@ -949,9 +949,6 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "ecp_nistz256_neg", "ecp_nistz256_select_w5", "ecp_nistz256_select_w7", - "nistz384_point_add", - "nistz384_point_double", - "nistz384_point_mul", "p256_mul_mont", "p256_point_add", "p256_point_add_affine", @@ -966,6 +963,9 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "p384_elem_mul_mont", "p384_elem_neg", "p384_elem_sub", + "p384_point_add", + "p384_point_double", + "p384_point_mul", "p384_scalar_mul_mont", "openssl_poly1305_neon2_addmulmod", "openssl_poly1305_neon2_blocks", diff --git a/crypto/fipsmodule/ec/ecp_nistz384.inl b/crypto/fipsmodule/ec/ecp_nistz384.inl index 7555cb22a9..ae28f97ae5 100644 --- a/crypto/fipsmodule/ec/ecp_nistz384.inl +++ b/crypto/fipsmodule/ec/ecp_nistz384.inl @@ -29,7 +29,7 @@ #endif /* Point double: r = 2*a */ -void nistz384_point_double(P384_POINT *r, const P384_POINT *a) { +static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) { BN_ULONG S[P384_LIMBS]; BN_ULONG M[P384_LIMBS]; BN_ULONG Zsqr[P384_LIMBS]; @@ -74,8 +74,8 @@ void nistz384_point_double(P384_POINT *r, const P384_POINT *a) { } /* Point addition: r = a+b */ -void nistz384_point_add(P384_POINT *r, const P384_POINT *a, - const P384_POINT *b) { +static void nistz384_point_add(P384_POINT *r, const P384_POINT *a, + const P384_POINT *b) { BN_ULONG U2[P384_LIMBS], S2[P384_LIMBS]; BN_ULONG U1[P384_LIMBS], S1[P384_LIMBS]; BN_ULONG Z1sqr[P384_LIMBS]; @@ -174,9 +174,10 @@ static void add_precomputed_w5(P384_POINT *r, crypto_word_t wvalue, } /* r = p * p_scalar */ -void nistz384_point_mul(P384_POINT *r, const BN_ULONG p_scalar[P384_LIMBS], - const BN_ULONG p_x[P384_LIMBS], - const BN_ULONG p_y[P384_LIMBS]) { +static void nistz384_point_mul(P384_POINT *r, + const BN_ULONG p_scalar[P384_LIMBS], + const Limb p_x[P384_LIMBS], + const Limb p_y[P384_LIMBS]) { static const size_t kWindowSize = 5; static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1; @@ -252,6 +253,48 @@ void nistz384_point_mul(P384_POINT *r, const BN_ULONG p_scalar[P384_LIMBS], add_precomputed_w5(r, wvalue, table); } +void p384_point_double(Limb r[3][P384_LIMBS], const Limb a[3][P384_LIMBS]) +{ + P384_POINT t; + limbs_copy(t.X, a[0], P384_LIMBS); + limbs_copy(t.Y, a[1], P384_LIMBS); + limbs_copy(t.Z, a[2], P384_LIMBS); + nistz384_point_double(&t, &t); + limbs_copy(r[0], t.X, P384_LIMBS); + limbs_copy(r[1], t.Y, P384_LIMBS); + limbs_copy(r[2], t.Z, P384_LIMBS); +} + +void p384_point_add(Limb r[3][P384_LIMBS], + const Limb a[3][P384_LIMBS], + const Limb b[3][P384_LIMBS]) +{ + P384_POINT t1; + limbs_copy(t1.X, a[0], P384_LIMBS); + limbs_copy(t1.Y, a[1], P384_LIMBS); + limbs_copy(t1.Z, a[2], P384_LIMBS); + + P384_POINT t2; + limbs_copy(t2.X, b[0], P384_LIMBS); + limbs_copy(t2.Y, b[1], P384_LIMBS); + limbs_copy(t2.Z, b[2], P384_LIMBS); + + nistz384_point_add(&t1, &t1, &t2); + + limbs_copy(r[0], t1.X, P384_LIMBS); + limbs_copy(r[1], t1.Y, P384_LIMBS); + limbs_copy(r[2], t1.Z, P384_LIMBS); +} + +void p384_point_mul(Limb r[3][P384_LIMBS], const BN_ULONG p_scalar[P384_LIMBS], + const Limb p_x[P384_LIMBS], const Limb p_y[P384_LIMBS]) { + alignas(64) P384_POINT acc; + nistz384_point_mul(&acc, p_scalar, p_x, p_y); + limbs_copy(r[0], acc.X, P384_LIMBS); + limbs_copy(r[1], acc.Y, P384_LIMBS); + limbs_copy(r[2], acc.Z, P384_LIMBS); +} + #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs index 5aa241390e..e49c3ef165 100644 --- a/src/ec/suite_b/ops.rs +++ b/src/ec/suite_b/ops.rs @@ -856,14 +856,14 @@ mod tests { #[test] fn p384_point_double_test() { prefixed_extern! { - fn nistz384_point_double( + fn p384_point_double( r: *mut Limb, // [p384::COMMON_OPS.num_limbs*3] a: *const Limb, // [p384::COMMON_OPS.num_limbs*3] ); } point_double_test( &p384::PRIVATE_KEY_OPS, - nistz384_point_double, + p384_point_double, test_file!("ops/p384_point_double_tests.txt"), ); } diff --git a/src/ec/suite_b/ops/p384.rs b/src/ec/suite_b/ops/p384.rs index f424c520d0..54ec00aa8f 100644 --- a/src/ec/suite_b/ops/p384.rs +++ b/src/ec/suite_b/ops/p384.rs @@ -33,14 +33,14 @@ pub static COMMON_OPS: CommonOps = CommonOps { elem_mul_mont: p384_elem_mul_mont, elem_sqr_mont: p384_elem_sqr_mont, - point_add_jacobian_impl: nistz384_point_add, + point_add_jacobian_impl: p384_point_add, }; pub static PRIVATE_KEY_OPS: PrivateKeyOps = PrivateKeyOps { common: &COMMON_OPS, elem_inv_squared: p384_elem_inv_squared, point_mul_base_impl: p384_point_mul_base_impl, - point_mul_impl: nistz384_point_mul, + point_mul_impl: p384_point_mul, }; fn p384_elem_inv_squared(a: &Elem) -> Elem { @@ -286,12 +286,12 @@ prefixed_extern! { b: *const Limb, // [COMMON_OPS.num_limbs] ); - fn nistz384_point_add( + fn p384_point_add( r: *mut Limb, // [3][COMMON_OPS.num_limbs] a: *const Limb, // [3][COMMON_OPS.num_limbs] b: *const Limb, // [3][COMMON_OPS.num_limbs] ); - fn nistz384_point_mul( + fn p384_point_mul( r: *mut Limb, // [3][COMMON_OPS.num_limbs] p_scalar: *const Limb, // [COMMON_OPS.num_limbs] p_x: *const Limb, // [COMMON_OPS.num_limbs]