Safegcd inverses, drop Jacobi symbols, remove libgmp by sipa · Pull Request #831 · bitcoin-core/secp256k1

sipa · 2020-10-12T02:37:19Z

This is a rebased and squashed version of #767, adding safegcd-based implementations of constant-time and variable-time modular inverses for scalars and field elements, by Peter Dettman. The PR is organized as follows:

Add secp256k1_ctz{32,64}_var functions Introduction of ctz functions to util.h (which use __builtin_ctz on recent GCC and Clang, but fall back to using a software emulation using de Bruijn on other platforms). This isn't used anywhere in this commit, but does include tests.
Add safegcd based modular inverse modules Add Peter Dettman's safegcd code from WIP: "safegcd" field and scalar inversion #767 (without some of his optimizations, which are moved to later commits), turned into separate modules by me.
Add extensive comments on the safegcd algorithm and implementation Add a long description of the algorithm and optimizations to doc/safegcd_implementation.md, as well as additional comments to the code itself. It is probably best to review this together with the previous commit (they're separated to keep authorship).
Add tests for modinv modules Adds tests on the modinv interface directly, for arbitrary moduli.
Improve bounds checks in modinv modules Adds a lot of sanity checking to the modinv modules.
Move secp256k1_scalar_{inverse{_var},is_even} to per-impl files A pure refactor to prepare for switching the field and scalar code to modinv.
Make field/scalar code use the new modinv modules for inverses Actually switch over.
Add extra modular inverse tests This adds modular inverse tests through the field/scalar interface, now that those use modinv.
Remove unused Jacobi symbol support No longer needed.
Remove num/gmp support Bye-bye.
3 commits with further optimizations.

sipa · 2020-10-12T02:38:24Z

Ping @peterdettman

I made the following change to avoid signed overflow:

diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index d5ff75c..a2c16a6 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1362,7 +1362,7 @@ static void secp256k1_fe_update_de_30(int32_t *d, int32_t *e, const int32_t *t)
     /* P == 2^256 - 2^32 - C30 */
     const int64_t C30 = 0x3D1L;
     /* I30 == -P^-1 mod 2^30 */
-    const int32_t I30 = 0x12253531L;
+    const uint32_t I30 = 0x12253531L;
     const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
     const int32_t u = t[0], v = t[1], q = t[2], r = t[3];
     int32_t di, ei, md, me;
@@ -1377,8 +1377,8 @@ static void secp256k1_fe_update_de_30(int32_t *d, int32_t *e, const int32_t *t)
 
     /* Calculate the multiples of P to add, to zero the 30 bottom bits. We choose md, me
      * from the centred range [-2^29, 2^29) to keep d, e within [-2^256, 2^256). */
-    md = (I30 * 4 * (int32_t)cd) >> 2;
-    me = (I30 * 4 * (int32_t)ce) >> 2;
+    md = ((int32_t)(I30 * 4 * (uint32_t)cd)) >> 2;
+    me = ((int32_t)(I30 * 4 * (uint32_t)ce)) >> 2;
 
     cd -= (int64_t)C30 * md;
     ce -= (int64_t)C30 * me;
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 2e81fc2..8f7d290 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -663,7 +663,7 @@ static void secp256k1_fe_update_de_62(int64_t *d, int64_t *e, const int64_t *t)
     /* P == 2^256 - C62 */
     const int64_t C62 = 0x1000003D1LL;
     /* I62 == -P^-1 mod 2^62 */
-    const int64_t I62 = 0x1838091DD2253531LL;
+    const uint64_t I62 = 0x1838091DD2253531LL;
     const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
     const int64_t d0 = d[0], d1 = d[1], d2 = d[2], d3 = d[3], d4 = d[4];
     const int64_t e0 = e[0], e1 = e[1], e2 = e[2], e3 = e[3], e4 = e[4];
@@ -676,8 +676,8 @@ static void secp256k1_fe_update_de_62(int64_t *d, int64_t *e, const int64_t *t)
 
     /* Calculate the multiples of P to add, to zero the 62 bottom bits. We choose md, me
      * from the centred range [-2^61, 2^61) to keep d, e within [-2^256, 2^256). */
-    md = (I62 * 4 * (int64_t)cd) >> 2;
-    me = (I62 * 4 * (int64_t)ce) >> 2;
+    md = ((int64_t)(I62 * 4 * (uint64_t)cd)) >> 2;
+    me = ((int64_t)(I62 * 4 * (uint64_t)ce)) >> 2;
 
     cd -= (int128_t)C62 * md;
     ce -= (int128_t)C62 * me;
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 5dfa742..b649928 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -1113,7 +1113,7 @@ static uint64_t secp256k1_scalar_divsteps_62_var(uint64_t eta, uint64_t f0, uint
 static void secp256k1_scalar_update_de_62(int64_t *d, int64_t *e, const int64_t *t) {
 
     /* I62 == -P^-1 mod 2^62 */
-    const int64_t I62 = 0x0B0DFF665588B13FLL;
+    const uint64_t I62 = 0x0B0DFF665588B13FLL;
     const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
     const int64_t P[5] = { 0x3FD25E8CD0364141LL, 0x2ABB739ABD2280EELL, -0x15LL, 0, 256 };
     const int64_t d0 = d[0], d1 = d[1], d2 = d[2], d3 = d[3], d4 = d[4];
@@ -1127,8 +1127,8 @@ static void secp256k1_scalar_update_de_62(int64_t *d, int64_t *e, const int64_t
 
     /* Calculate the multiples of P to add, to zero the 62 bottom bits. We choose md, me
      * from the centred range [-2^61, 2^61) to keep d, e within [-2^256, 2^256). */
-    md = (I62 * 4 * (int64_t)cd) >> 2;
-    me = (I62 * 4 * (int64_t)ce) >> 2;
+    md = ((int64_t)(I62 * 4 * (uint64_t)cd)) >> 2;
+    me = ((int64_t)(I62 * 4 * (uint64_t)ce)) >> 2;
 
     cd += (int128_t)P[0] * md;
     ce += (int128_t)P[0] * me;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 40fcaf7..66c4494 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -914,7 +914,7 @@ static uint32_t secp256k1_scalar_divsteps_30_var(uint32_t eta, uint32_t f0, uint
 static void secp256k1_scalar_update_de_30(int32_t *d, int32_t *e, const int32_t *t) {
 
     /* I30 == -P^-1 mod 2^30 */
-    const int32_t I30 = 0x1588B13FL;
+    const uint32_t I30 = 0x1588B13FL;
     const int32_t P[9] = { 0x10364141L, 0x3F497A33L, 0x348A03BBL, 0x2BB739ABL, -0x146L,
         0, 0, 0, 65536 };
     const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
@@ -930,8 +930,8 @@ static void secp256k1_scalar_update_de_30(int32_t *d, int32_t *e, const int32_t
 
     /* Calculate the multiples of P to add, to zero the 30 bottom bits. We choose md, me
      * from the centred range [-2^29, 2^29) to keep d, e within [-2^256, 2^256). */
-    md = (I30 * 4 * (int32_t)cd) >> 2;
-    me = (I30 * 4 * (int32_t)ce) >> 2;
+    md = ((int32_t)(I30 * 4 * (uint32_t)cd)) >> 2;
+    me = ((int32_t)(I30 * 4 * (uint32_t)ce)) >> 2;
 
     cd += (int64_t)P[0] * md;
     ce += (int64_t)P[0] * me;

With that, all tests pass when compiled with -ftrapv or -fsanitize=undefined.

sipa · 2020-10-12T07:38:11Z

I want to rework this a bit still, introducing structs for the signed-limb and transformation-matrix for clarify, and adding comments to explain my own understanding. Marking as draft for now, but it can be built/benchmarked/tested already.

sipa · 2020-10-14T05:31:27Z

I added a commit that introduces shared modinv* files with the bulk of the algorithm, which is then used by both scalar and field code. I measure around a 1% slowdown for the variable-time versions, but a 2% speedup for the constant-time version with this (?!).

I'd be interested to hear what benchmarks others see with/without the last commit.

peterdettman · 2020-10-14T06:35:16Z

(only testing 64bit) I see very little change apart from field_inverse getting noticeably slower (4%), which is presumably down to no longer having the nice prime shape baked in to _update_de_62.

sipa · 2020-10-14T08:00:22Z

@peterdettman Oh, I see - you're saying that these aren't invariants that hold for all signed62 values; of course. This _verify function is a bit of a leftover - it should just be inlined back into the _from_signedXX functions, as that's where this strict condition is required. An internal _verify function may be helpful too, but would need to be less strict.

sipa · 2020-11-27T05:19:43Z

Rebased on top of the endomorphism changes, included the recent fixes from sipa#6, squashed the implementation commits, removed the _verify functions that were misleading, and marked as ready for review.

I'd like to make a few more changes, adding checks for the ranges of variables and additional comments, but if someone is interested I think this is pretty close to done otherwise.

sipa · 2020-11-27T21:40:54Z

@peterdettman I had to make these additional changes to satisfy ubsan (they were actually invoking UB, I believe):

diff --git a/src/modinv32.h b/src/modinv32.h
index 74fc3fd8..d46447b1 100644
--- a/src/modinv32.h
+++ b/src/modinv32.h
@@ -22,7 +22,7 @@ typedef struct {
     secp256k1_modinv32_signed30 modulus;
 
     /* modulus^{-1} mod 2^30 */
-    int32_t modulus_inv30;
+    uint32_t modulus_inv30;
 } secp256k1_modinv32_modinfo;
 
 static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
index 2cd65557..a15bd2f9 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -201,8 +201,8 @@ static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp
      * the range (-2.P, P), consistent with the input constraint.
      */
 
-    md -= (modinfo->modulus_inv30 * (int32_t)cd + md) & M30;
-    me -= (modinfo->modulus_inv30 * (int32_t)ce + me) & M30;
+    md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
+    me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
 
     /* The modulus has to be odd, so we can assume it is nonzero. */
     cd += (int64_t)modinfo->modulus.v[0] * md;
@@ -380,8 +380,8 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
         cond |= gn ^ (gn >> 31);
 
         if (cond == 0) {
-            f.v[len - 2] |= fn << 30;
-            g.v[len - 2] |= gn << 30;
+            f.v[len - 2] |= (uint32_t)fn << 30;
+            g.v[len - 2] |= (uint32_t)gn << 30;
             --len;
         }
     }
diff --git a/src/modinv64.h b/src/modinv64.h
index caff6f5a..6fea9651 100644
--- a/src/modinv64.h
+++ b/src/modinv64.h
@@ -26,7 +26,7 @@ typedef struct {
     secp256k1_modinv64_signed62 modulus;
 
     /* modulus^{-1} mod 2^62 */
-    int64_t modulus_inv62;
+    uint64_t modulus_inv62;
 } secp256k1_modinv64_modinfo;
 
 static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index ebf1fe7f..7ce6c909 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -177,8 +177,8 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
      * the range (-2.P, P), consistent with the input constraint.
...skipping...
index 2cd65557..a15bd2f9 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -201,8 +201,8 @@ static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp
      * the range (-2.P, P), consistent with the input constraint.
      */
 
-    md -= (modinfo->modulus_inv30 * (int32_t)cd + md) & M30;
-    me -= (modinfo->modulus_inv30 * (int32_t)ce + me) & M30;
+    md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
+    me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
 
     /* The modulus has to be odd, so we can assume it is nonzero. */
     cd += (int64_t)modinfo->modulus.v[0] * md;
@@ -380,8 +380,8 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
         cond |= gn ^ (gn >> 31);
 
         if (cond == 0) {
-            f.v[len - 2] |= fn << 30;
-            g.v[len - 2] |= gn << 30;
+            f.v[len - 2] |= (uint32_t)fn << 30;
+            g.v[len - 2] |= (uint32_t)gn << 30;
             --len;
         }
     }
diff --git a/src/modinv64.h b/src/modinv64.h
index caff6f5a..6fea9651 100644
--- a/src/modinv64.h
+++ b/src/modinv64.h
@@ -26,7 +26,7 @@ typedef struct {
     secp256k1_modinv64_signed62 modulus;
 
     /* modulus^{-1} mod 2^62 */
-    int64_t modulus_inv62;
+    uint64_t modulus_inv62;
 } secp256k1_modinv64_modinfo;
 
 static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index ebf1fe7f..7ce6c909 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -177,8 +177,8 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
      * the range (-2.P, P), consistent with the input constraint.
      */
 
-    md -= (modinfo->modulus_inv62 * (int64_t)cd + md) & M62;
-    me -= (modinfo->modulus_inv62 * (int64_t)ce + me) & M62;
+    md -= (modinfo->modulus_inv62 * (uint64_t)cd + md) & M62;
+    me -= (modinfo->modulus_inv62 * (uint64_t)ce + me) & M62;
 
     /* The modulus has to be odd, so we can assume it is nonzero. */
     cd += (int128_t)modinfo->modulus.v[0] * md;
@@ -388,8 +388,8 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256
         cond |= gn ^ (gn >> 63);
 
         if (cond == 0) {
-            f.v[len - 2] |= fn << 62;
-            g.v[len - 2] |= gn << 62;
+            f.v[len - 2] |= (uint64_t)fn << 62;
+            g.v[len - 2] |= (uint64_t)gn << 62;
             --len;
         }
     }

sipa · 2020-12-04T01:32:59Z

@peterdettman It seems that the bounds on (u,v,q,r) after N divsteps are:

u in [-2^(N-2),2^N]
v in [2-2^(N-2),2^N]
q in [-2^(N-1),2^N-1]
r in [1-2^(N-1),2^N-1]

If we'd negate those coefficients, 31/63 fit in a single int{32,64}_t transformation matrix. Is there any reason why 30/62 are preferable? For the 32-bit constant time version this gains us one big step. For the variable-time version it may mean doing a group less in some cases.

Some additional bounds that may help reasoning about ranges:

u+v in [-2^(N-2),2^N]
u-v in [-2^N,2^N]
q+r in [-2^(N-1),2^N]
q-r in [-2^(N-1),2^N-2]

This temporarily duplicates the inversion code across the 5x52 and 10x26 implementations. Those implementations will be replaced in a next commit.

sipa · 2021-03-11T18:46:52Z

Addressed @real-or-random's comments:

diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 1a33cd81..c2802514 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1230,43 +1230,27 @@ static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe = {
 static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
     secp256k1_fe tmp;
     secp256k1_modinv32_signed30 s;
-#ifdef VERIFY
-    int zero_in;
-#endif
 
     tmp = *x;
     secp256k1_fe_normalize(&tmp);
-#ifdef VERIFY
-    zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
     secp256k1_fe_to_signed30(&s, &tmp);
     secp256k1_modinv32(&s, &secp256k1_const_modinfo_fe);
     secp256k1_fe_from_signed30(r, &s);
 
-#ifdef VERIFY
-    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
 }
 
 static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
     secp256k1_fe tmp;
     secp256k1_modinv32_signed30 s;
-#ifdef VERIFY
-    int zero_in;
-#endif
 
     tmp = *x;
     secp256k1_fe_normalize_var(&tmp);
-#ifdef VERIFY
-    zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
     secp256k1_fe_to_signed30(&s, &tmp);
     secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_fe);
     secp256k1_fe_from_signed30(r, &s);
 
-#ifdef VERIFY
-    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
 }
 
 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index b08a087f..56e71bc9 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -548,43 +548,27 @@ static const secp256k1_modinv64_modinfo secp256k1_const_modinfo_fe = {
 static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
     secp256k1_fe tmp;
     secp256k1_modinv64_signed62 s;
-#ifdef VERIFY
-    int zero_in;
-#endif
 
     tmp = *x;
     secp256k1_fe_normalize(&tmp);
-#ifdef VERIFY
-    zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
     secp256k1_fe_to_signed62(&s, &tmp);
     secp256k1_modinv64(&s, &secp256k1_const_modinfo_fe);
     secp256k1_fe_from_signed62(r, &s);
 
-#ifdef VERIFY
-    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
 }
 
 static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
     secp256k1_fe tmp;
     secp256k1_modinv64_signed62 s;
-#ifdef VERIFY
-    int zero_in;
-#endif
 
     tmp = *x;
     secp256k1_fe_normalize_var(&tmp);
-#ifdef VERIFY
-    zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
     secp256k1_fe_to_signed62(&s, &tmp);
     secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_fe);
     secp256k1_fe_from_signed62(r, &s);
 
-#ifdef VERIFY
-    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
 }
 
 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
index 3eba04fc..aa7988c4 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -37,18 +37,18 @@ static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp
     r->v[8] = (int32_t)c;
 }
 
-/* Compare a with b*factor. */
+/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
 static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
     int i;
     secp256k1_modinv32_signed30 am, bm;
     secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
     secp256k1_modinv32_mul_30(&bm, b, 9, factor);
+    for (i = 0; i < 8; ++i) {
+        /* Verify that all but the top limb of a and b are normalized. */
+        VERIFY_CHECK(am.v[i] >> 30 == 0);
+        VERIFY_CHECK(bm.v[i] >> 30 == 0);
+    }
     for (i = 8; i >= 0; --i) {
-        if (i != 8) {
-            /* Verify that all but the top limb of a and b are normalized. */
-            VERIFY_CHECK(am.v[i] >> 30 == 0);
-            VERIFY_CHECK(bm.v[i] >> 30 == 0);
-        }
         if (am.v[i] < bm.v[i]) return -1;
         if (am.v[i] > bm.v[i]) return 1;
     }
@@ -155,7 +155,7 @@ static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int3
     VERIFY_CHECK(r7 >> 30 == 0);
     VERIFY_CHECK(r8 >> 30 == 0);
     VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < P */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
 #endif
 }
 
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index e1ba8b63..78505fa1 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -43,18 +43,18 @@ static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp
     r->v[4] = (int64_t)c;
 }
 
-/* Compare a with b*bf. */
+/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */
 static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, int alen, const secp256k1_modinv64_signed62 *b, int64_t factor) {
     int i;
     secp256k1_modinv64_signed62 am, bm;
     secp256k1_modinv64_mul_62(&am, a, alen, 1); /* Normalize all but the top limb of a. */
     secp256k1_modinv64_mul_62(&bm, b, 5, factor);
+    for (i = 0; i < 4; ++i) {
+        /* Verify that all but the top limb of a and b are normalized. */
+        VERIFY_CHECK(am.v[i] >> 62 == 0);
+        VERIFY_CHECK(bm.v[i] >> 62 == 0);
+    }
     for (i = 4; i >= 0; --i) {
-        if (i != 4) {
-            /* Verify that all but the top limb of a and b are normalized. */
-            VERIFY_CHECK(am.v[i] >> 62 == 0);
-            VERIFY_CHECK(bm.v[i] >> 62 == 0);
-        }
         if (am.v[i] < bm.v[i]) return -1;
         if (am.v[i] > bm.v[i]) return 1;
     }
@@ -132,7 +132,7 @@ static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int6
     VERIFY_CHECK(r3 >> 62 == 0);
     VERIFY_CHECK(r4 >> 62 == 0);
     VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 0) >= 0); /* r >= 0 */
-    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < P */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < modulus */
 #endif
 }
 
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index a1def26f..f8f37ff8 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -808,18 +808,14 @@ static void secp256k1_scalar_from_signed62(secp256k1_scalar *r, const secp256k1_
     r->d[2] = a2 >> 4 | a3 << 58;
     r->d[3] = a3 >> 6 | a4 << 56;
 
-#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
-#endif
 }
 
 static void secp256k1_scalar_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_scalar *a) {
     const uint64_t M62 = UINT64_MAX >> 2;
     const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
 
-#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
-#endif
 
     r->v[0] =  a0                   & M62;
     r->v[1] = (a0 >> 62 | a1 <<  2) & M62;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 62c7ae71..3282c68d 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -670,9 +670,7 @@ static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_
     r->d[6] = a6 >> 12 | a7 << 18;
     r->d[7] = a7 >> 14 | a8 << 16;
 
-#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
-#endif
 }
 
 static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
@@ -680,9 +678,7 @@ static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const s
     const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
                    a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
 
-#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
-#endif
 
     r->v[0] =  a0                   & M30;
     r->v[1] = (a0 >> 30 | a1 <<  2) & M30;

sipa · 2021-03-11T19:27:55Z

FWIW, ACK the parts of the code by @peterdettman here. I assume that's implied by it being my PR, but still - most of the non-test code isn't mine here.

Add a new run_inverse_tests that replaces all existing field/scalar inverse tests, and tests a few identities for fixed inputs, small numbers (-999...999), random inputs (structured and unstructured), as well as comparing with the output of secp256k1_fe_inv_all_var.

No exposed functions rely on Jacobi symbol computation anymore. Remove it; it can always be brough back later if needed.

sipa · 2021-03-12T18:07:48Z

To make sure we the unused evaluations of expressions inside VERIFY_CHECKs don't affect runtime, I made this change:

diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 56e71bc9..b73cfea2 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -555,7 +555,9 @@ static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
     secp256k1_modinv64(&s, &secp256k1_const_modinfo_fe);
     secp256k1_fe_from_signed62(r, &s);
 
+#ifdef VERIFY
     VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+#endif
 }
 
 static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
@@ -568,7 +570,9 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
     secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_fe);
     secp256k1_fe_from_signed62(r, &s);
 
+#ifdef VERIFY
     VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+#endif
 }
 
 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index f8f37ff8..a1def26f 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -808,14 +808,18 @@ static void secp256k1_scalar_from_signed62(secp256k1_scalar *r, const secp256k1_
     r->d[2] = a2 >> 4 | a3 << 58;
     r->d[3] = a3 >> 6 | a4 << 56;
 
+#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
 }
 
 static void secp256k1_scalar_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_scalar *a) {
     const uint64_t M62 = UINT64_MAX >> 2;
     const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
 
+#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
+#endif
 
     r->v[0] =  a0                   & M62;
     r->v[1] = (a0 >> 62 | a1 <<  2) & M62;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 3282c68d..62c7ae71 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -670,7 +670,9 @@ static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_
     r->d[6] = a6 >> 12 | a7 << 18;
     r->d[7] = a7 >> 14 | a8 << 16;
 
+#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
 }
 
 static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
@@ -678,7 +680,9 @@ static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const s
     const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
                    a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
 
+#ifdef VERIFY
     VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
+#endif
 
     r->v[0] =  a0                   & M30;
     r->v[1] = (a0 >> 30 | a1 <<  2) & M30;

Conversation

sipa commented Oct 12, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sipa commented Oct 12, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sipa commented Oct 12, 2020

Uh oh!

Uh oh!

sipa commented Oct 14, 2020

Uh oh!

peterdettman commented Oct 14, 2020

Uh oh!

Uh oh!

Uh oh!

sipa commented Oct 14, 2020

Uh oh!

sipa commented Nov 27, 2020

Uh oh!

sipa commented Nov 27, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

sipa commented Dec 4, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sipa commented Mar 11, 2021

Uh oh!

sipa commented Mar 11, 2021

Uh oh!

sipa commented Mar 12, 2021 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sipa commented Mar 12, 2021

Uh oh!

real-or-random commented Mar 15, 2021

Uh oh!

real-or-random commented Mar 15, 2021

Uh oh!

sipa commented Mar 15, 2021

Uh oh!

real-or-random left a comment

Choose a reason for hiding this comment

Uh oh!

gmaxwell commented Mar 16, 2021

Uh oh!

sanket1729 commented Mar 16, 2021

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

7 participants

sipa commented Oct 12, 2020 •

edited

Loading

sipa commented Oct 12, 2020 •

edited

Loading

sipa commented Nov 27, 2020 •

edited

Loading

sipa commented Dec 4, 2020 •

edited

Loading

sipa commented Mar 12, 2021 •

edited

Loading