Safegcd inverses, drop Jacobi symbols, remove libgmp#831
Safegcd inverses, drop Jacobi symbols, remove libgmp#831sipa merged 16 commits intobitcoin-core:masterfrom
Conversation
|
Ping @peterdettman I made the following change to avoid signed overflow: diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index d5ff75c..a2c16a6 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1362,7 +1362,7 @@ static void secp256k1_fe_update_de_30(int32_t *d, int32_t *e, const int32_t *t)
/* P == 2^256 - 2^32 - C30 */
const int64_t C30 = 0x3D1L;
/* I30 == -P^-1 mod 2^30 */
- const int32_t I30 = 0x12253531L;
+ const uint32_t I30 = 0x12253531L;
const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
const int32_t u = t[0], v = t[1], q = t[2], r = t[3];
int32_t di, ei, md, me;
@@ -1377,8 +1377,8 @@ static void secp256k1_fe_update_de_30(int32_t *d, int32_t *e, const int32_t *t)
/* Calculate the multiples of P to add, to zero the 30 bottom bits. We choose md, me
* from the centred range [-2^29, 2^29) to keep d, e within [-2^256, 2^256). */
- md = (I30 * 4 * (int32_t)cd) >> 2;
- me = (I30 * 4 * (int32_t)ce) >> 2;
+ md = ((int32_t)(I30 * 4 * (uint32_t)cd)) >> 2;
+ me = ((int32_t)(I30 * 4 * (uint32_t)ce)) >> 2;
cd -= (int64_t)C30 * md;
ce -= (int64_t)C30 * me;
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 2e81fc2..8f7d290 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -663,7 +663,7 @@ static void secp256k1_fe_update_de_62(int64_t *d, int64_t *e, const int64_t *t)
/* P == 2^256 - C62 */
const int64_t C62 = 0x1000003D1LL;
/* I62 == -P^-1 mod 2^62 */
- const int64_t I62 = 0x1838091DD2253531LL;
+ const uint64_t I62 = 0x1838091DD2253531LL;
const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
const int64_t d0 = d[0], d1 = d[1], d2 = d[2], d3 = d[3], d4 = d[4];
const int64_t e0 = e[0], e1 = e[1], e2 = e[2], e3 = e[3], e4 = e[4];
@@ -676,8 +676,8 @@ static void secp256k1_fe_update_de_62(int64_t *d, int64_t *e, const int64_t *t)
/* Calculate the multiples of P to add, to zero the 62 bottom bits. We choose md, me
* from the centred range [-2^61, 2^61) to keep d, e within [-2^256, 2^256). */
- md = (I62 * 4 * (int64_t)cd) >> 2;
- me = (I62 * 4 * (int64_t)ce) >> 2;
+ md = ((int64_t)(I62 * 4 * (uint64_t)cd)) >> 2;
+ me = ((int64_t)(I62 * 4 * (uint64_t)ce)) >> 2;
cd -= (int128_t)C62 * md;
ce -= (int128_t)C62 * me;
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 5dfa742..b649928 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -1113,7 +1113,7 @@ static uint64_t secp256k1_scalar_divsteps_62_var(uint64_t eta, uint64_t f0, uint
static void secp256k1_scalar_update_de_62(int64_t *d, int64_t *e, const int64_t *t) {
/* I62 == -P^-1 mod 2^62 */
- const int64_t I62 = 0x0B0DFF665588B13FLL;
+ const uint64_t I62 = 0x0B0DFF665588B13FLL;
const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
const int64_t P[5] = { 0x3FD25E8CD0364141LL, 0x2ABB739ABD2280EELL, -0x15LL, 0, 256 };
const int64_t d0 = d[0], d1 = d[1], d2 = d[2], d3 = d[3], d4 = d[4];
@@ -1127,8 +1127,8 @@ static void secp256k1_scalar_update_de_62(int64_t *d, int64_t *e, const int64_t
/* Calculate the multiples of P to add, to zero the 62 bottom bits. We choose md, me
* from the centred range [-2^61, 2^61) to keep d, e within [-2^256, 2^256). */
- md = (I62 * 4 * (int64_t)cd) >> 2;
- me = (I62 * 4 * (int64_t)ce) >> 2;
+ md = ((int64_t)(I62 * 4 * (uint64_t)cd)) >> 2;
+ me = ((int64_t)(I62 * 4 * (uint64_t)ce)) >> 2;
cd += (int128_t)P[0] * md;
ce += (int128_t)P[0] * me;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 40fcaf7..66c4494 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -914,7 +914,7 @@ static uint32_t secp256k1_scalar_divsteps_30_var(uint32_t eta, uint32_t f0, uint
static void secp256k1_scalar_update_de_30(int32_t *d, int32_t *e, const int32_t *t) {
/* I30 == -P^-1 mod 2^30 */
- const int32_t I30 = 0x1588B13FL;
+ const uint32_t I30 = 0x1588B13FL;
const int32_t P[9] = { 0x10364141L, 0x3F497A33L, 0x348A03BBL, 0x2BB739ABL, -0x146L,
0, 0, 0, 65536 };
const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
@@ -930,8 +930,8 @@ static void secp256k1_scalar_update_de_30(int32_t *d, int32_t *e, const int32_t
/* Calculate the multiples of P to add, to zero the 30 bottom bits. We choose md, me
* from the centred range [-2^29, 2^29) to keep d, e within [-2^256, 2^256). */
- md = (I30 * 4 * (int32_t)cd) >> 2;
- me = (I30 * 4 * (int32_t)ce) >> 2;
+ md = ((int32_t)(I30 * 4 * (uint32_t)cd)) >> 2;
+ me = ((int32_t)(I30 * 4 * (uint32_t)ce)) >> 2;
cd += (int64_t)P[0] * md;
ce += (int64_t)P[0] * me;With that, all tests pass when compiled with |
59d69a3 to
4ad49c8
Compare
|
I want to rework this a bit still, introducing structs for the signed-limb and transformation-matrix for clarify, and adding comments to explain my own understanding. Marking as draft for now, but it can be built/benchmarked/tested already. |
|
I added a commit that introduces shared modinv* files with the bulk of the algorithm, which is then used by both scalar and field code. I measure around a 1% slowdown for the variable-time versions, but a 2% speedup for the constant-time version with this (?!). I'd be interested to hear what benchmarks others see with/without the last commit. |
|
(only testing 64bit) I see very little change apart from field_inverse getting noticeably slower (4%), which is presumably down to no longer having the nice prime shape baked in to _update_de_62. |
|
@peterdettman Oh, I see - you're saying that these aren't invariants that hold for all signed62 values; of course. This _verify function is a bit of a leftover - it should just be inlined back into the _from_signedXX functions, as that's where this strict condition is required. An internal _verify function may be helpful too, but would need to be less strict. |
|
Rebased on top of the endomorphism changes, included the recent fixes from sipa#6, squashed the implementation commits, removed the I'd like to make a few more changes, adding checks for the ranges of variables and additional comments, but if someone is interested I think this is pretty close to done otherwise. |
|
@peterdettman I had to make these additional changes to satisfy ubsan (they were actually invoking UB, I believe): diff --git a/src/modinv32.h b/src/modinv32.h
index 74fc3fd8..d46447b1 100644
--- a/src/modinv32.h
+++ b/src/modinv32.h
@@ -22,7 +22,7 @@ typedef struct {
secp256k1_modinv32_signed30 modulus;
/* modulus^{-1} mod 2^30 */
- int32_t modulus_inv30;
+ uint32_t modulus_inv30;
} secp256k1_modinv32_modinfo;
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
index 2cd65557..a15bd2f9 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -201,8 +201,8 @@ static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp
* the range (-2.P, P), consistent with the input constraint.
*/
- md -= (modinfo->modulus_inv30 * (int32_t)cd + md) & M30;
- me -= (modinfo->modulus_inv30 * (int32_t)ce + me) & M30;
+ md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
+ me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
/* The modulus has to be odd, so we can assume it is nonzero. */
cd += (int64_t)modinfo->modulus.v[0] * md;
@@ -380,8 +380,8 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
cond |= gn ^ (gn >> 31);
if (cond == 0) {
- f.v[len - 2] |= fn << 30;
- g.v[len - 2] |= gn << 30;
+ f.v[len - 2] |= (uint32_t)fn << 30;
+ g.v[len - 2] |= (uint32_t)gn << 30;
--len;
}
}
diff --git a/src/modinv64.h b/src/modinv64.h
index caff6f5a..6fea9651 100644
--- a/src/modinv64.h
+++ b/src/modinv64.h
@@ -26,7 +26,7 @@ typedef struct {
secp256k1_modinv64_signed62 modulus;
/* modulus^{-1} mod 2^62 */
- int64_t modulus_inv62;
+ uint64_t modulus_inv62;
} secp256k1_modinv64_modinfo;
static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index ebf1fe7f..7ce6c909 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -177,8 +177,8 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
* the range (-2.P, P), consistent with the input constraint.
...skipping...
index 2cd65557..a15bd2f9 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -201,8 +201,8 @@ static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp
* the range (-2.P, P), consistent with the input constraint.
*/
- md -= (modinfo->modulus_inv30 * (int32_t)cd + md) & M30;
- me -= (modinfo->modulus_inv30 * (int32_t)ce + me) & M30;
+ md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
+ me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
/* The modulus has to be odd, so we can assume it is nonzero. */
cd += (int64_t)modinfo->modulus.v[0] * md;
@@ -380,8 +380,8 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
cond |= gn ^ (gn >> 31);
if (cond == 0) {
- f.v[len - 2] |= fn << 30;
- g.v[len - 2] |= gn << 30;
+ f.v[len - 2] |= (uint32_t)fn << 30;
+ g.v[len - 2] |= (uint32_t)gn << 30;
--len;
}
}
diff --git a/src/modinv64.h b/src/modinv64.h
index caff6f5a..6fea9651 100644
--- a/src/modinv64.h
+++ b/src/modinv64.h
@@ -26,7 +26,7 @@ typedef struct {
secp256k1_modinv64_signed62 modulus;
/* modulus^{-1} mod 2^62 */
- int64_t modulus_inv62;
+ uint64_t modulus_inv62;
} secp256k1_modinv64_modinfo;
static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index ebf1fe7f..7ce6c909 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -177,8 +177,8 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
* the range (-2.P, P), consistent with the input constraint.
*/
- md -= (modinfo->modulus_inv62 * (int64_t)cd + md) & M62;
- me -= (modinfo->modulus_inv62 * (int64_t)ce + me) & M62;
+ md -= (modinfo->modulus_inv62 * (uint64_t)cd + md) & M62;
+ me -= (modinfo->modulus_inv62 * (uint64_t)ce + me) & M62;
/* The modulus has to be odd, so we can assume it is nonzero. */
cd += (int128_t)modinfo->modulus.v[0] * md;
@@ -388,8 +388,8 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256
cond |= gn ^ (gn >> 63);
if (cond == 0) {
- f.v[len - 2] |= fn << 62;
- g.v[len - 2] |= gn << 62;
+ f.v[len - 2] |= (uint64_t)fn << 62;
+ g.v[len - 2] |= (uint64_t)gn << 62;
--len;
}
} |
7a5c193 to
b4721dc
Compare
|
@peterdettman It seems that the bounds on (u,v,q,r) after N divsteps are:
If we'd negate those coefficients, 31/63 fit in a single int{32,64}_t transformation matrix. Is there any reason why 30/62 are preferable? For the 32-bit constant time version this gains us one big step. For the variable-time version it may mean doing a group less in some cases. Some additional bounds that may help reasoning about ranges:
|
This temporarily duplicates the inversion code across the 5x52 and 10x26 implementations. Those implementations will be replaced in a next commit.
|
Addressed @real-or-random's comments: diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 1a33cd81..c2802514 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1230,43 +1230,27 @@ static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe = {
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_fe tmp;
secp256k1_modinv32_signed30 s;
-#ifdef VERIFY
- int zero_in;
-#endif
tmp = *x;
secp256k1_fe_normalize(&tmp);
-#ifdef VERIFY
- zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
secp256k1_fe_to_signed30(&s, &tmp);
secp256k1_modinv32(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed30(r, &s);
-#ifdef VERIFY
- VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
}
static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_fe tmp;
secp256k1_modinv32_signed30 s;
-#ifdef VERIFY
- int zero_in;
-#endif
tmp = *x;
secp256k1_fe_normalize_var(&tmp);
-#ifdef VERIFY
- zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
secp256k1_fe_to_signed30(&s, &tmp);
secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed30(r, &s);
-#ifdef VERIFY
- VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
}
#endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index b08a087f..56e71bc9 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -548,43 +548,27 @@ static const secp256k1_modinv64_modinfo secp256k1_const_modinfo_fe = {
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_fe tmp;
secp256k1_modinv64_signed62 s;
-#ifdef VERIFY
- int zero_in;
-#endif
tmp = *x;
secp256k1_fe_normalize(&tmp);
-#ifdef VERIFY
- zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
secp256k1_fe_to_signed62(&s, &tmp);
secp256k1_modinv64(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed62(r, &s);
-#ifdef VERIFY
- VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
}
static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_fe tmp;
secp256k1_modinv64_signed62 s;
-#ifdef VERIFY
- int zero_in;
-#endif
tmp = *x;
secp256k1_fe_normalize_var(&tmp);
-#ifdef VERIFY
- zero_in = secp256k1_fe_normalizes_to_zero(&tmp);
-#endif
secp256k1_fe_to_signed62(&s, &tmp);
secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed62(r, &s);
-#ifdef VERIFY
- VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == zero_in);
-#endif
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
}
#endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
index 3eba04fc..aa7988c4 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -37,18 +37,18 @@ static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp
r->v[8] = (int32_t)c;
}
-/* Compare a with b*factor. */
+/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
int i;
secp256k1_modinv32_signed30 am, bm;
secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
secp256k1_modinv32_mul_30(&bm, b, 9, factor);
+ for (i = 0; i < 8; ++i) {
+ /* Verify that all but the top limb of a and b are normalized. */
+ VERIFY_CHECK(am.v[i] >> 30 == 0);
+ VERIFY_CHECK(bm.v[i] >> 30 == 0);
+ }
for (i = 8; i >= 0; --i) {
- if (i != 8) {
- /* Verify that all but the top limb of a and b are normalized. */
- VERIFY_CHECK(am.v[i] >> 30 == 0);
- VERIFY_CHECK(bm.v[i] >> 30 == 0);
- }
if (am.v[i] < bm.v[i]) return -1;
if (am.v[i] > bm.v[i]) return 1;
}
@@ -155,7 +155,7 @@ static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int3
VERIFY_CHECK(r7 >> 30 == 0);
VERIFY_CHECK(r8 >> 30 == 0);
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
- VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < P */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
#endif
}
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index e1ba8b63..78505fa1 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -43,18 +43,18 @@ static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp
r->v[4] = (int64_t)c;
}
-/* Compare a with b*bf. */
+/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */
static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, int alen, const secp256k1_modinv64_signed62 *b, int64_t factor) {
int i;
secp256k1_modinv64_signed62 am, bm;
secp256k1_modinv64_mul_62(&am, a, alen, 1); /* Normalize all but the top limb of a. */
secp256k1_modinv64_mul_62(&bm, b, 5, factor);
+ for (i = 0; i < 4; ++i) {
+ /* Verify that all but the top limb of a and b are normalized. */
+ VERIFY_CHECK(am.v[i] >> 62 == 0);
+ VERIFY_CHECK(bm.v[i] >> 62 == 0);
+ }
for (i = 4; i >= 0; --i) {
- if (i != 4) {
- /* Verify that all but the top limb of a and b are normalized. */
- VERIFY_CHECK(am.v[i] >> 62 == 0);
- VERIFY_CHECK(bm.v[i] >> 62 == 0);
- }
if (am.v[i] < bm.v[i]) return -1;
if (am.v[i] > bm.v[i]) return 1;
}
@@ -132,7 +132,7 @@ static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int6
VERIFY_CHECK(r3 >> 62 == 0);
VERIFY_CHECK(r4 >> 62 == 0);
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 0) >= 0); /* r >= 0 */
- VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < P */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < modulus */
#endif
}
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index a1def26f..f8f37ff8 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -808,18 +808,14 @@ static void secp256k1_scalar_from_signed62(secp256k1_scalar *r, const secp256k1_
r->d[2] = a2 >> 4 | a3 << 58;
r->d[3] = a3 >> 6 | a4 << 56;
-#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
-#endif
}
static void secp256k1_scalar_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_scalar *a) {
const uint64_t M62 = UINT64_MAX >> 2;
const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
-#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
-#endif
r->v[0] = a0 & M62;
r->v[1] = (a0 >> 62 | a1 << 2) & M62;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 62c7ae71..3282c68d 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -670,9 +670,7 @@ static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_
r->d[6] = a6 >> 12 | a7 << 18;
r->d[7] = a7 >> 14 | a8 << 16;
-#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
-#endif
}
static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
@@ -680,9 +678,7 @@ static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const s
const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
-#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
-#endif
r->v[0] = a0 & M30;
r->v[1] = (a0 >> 30 | a1 << 2) & M30; |
|
FWIW, ACK the parts of the code by @peterdettman here. I assume that's implied by it being my PR, but still - most of the non-test code isn't mine here. |
Add a new run_inverse_tests that replaces all existing field/scalar inverse tests, and tests a few identities for fixed inputs, small numbers (-999...999), random inputs (structured and unstructured), as well as comparing with the output of secp256k1_fe_inv_all_var.
No exposed functions rely on Jacobi symbol computation anymore. Remove it; it can always be brough back later if needed.
|
To make sure we the unused evaluations of expressions inside diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 56e71bc9..b73cfea2 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -555,7 +555,9 @@ static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_modinv64(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed62(r, &s);
+#ifdef VERIFY
VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+#endif
}
static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
@@ -568,7 +570,9 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed62(r, &s);
+#ifdef VERIFY
VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+#endif
}
#endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index f8f37ff8..a1def26f 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -808,14 +808,18 @@ static void secp256k1_scalar_from_signed62(secp256k1_scalar *r, const secp256k1_
r->d[2] = a2 >> 4 | a3 << 58;
r->d[3] = a3 >> 6 | a4 << 56;
+#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
}
static void secp256k1_scalar_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_scalar *a) {
const uint64_t M62 = UINT64_MAX >> 2;
const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
+#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
+#endif
r->v[0] = a0 & M62;
r->v[1] = (a0 >> 62 | a1 << 2) & M62;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 3282c68d..62c7ae71 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -670,7 +670,9 @@ static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_
r->d[6] = a6 >> 12 | a7 << 18;
r->d[7] = a7 >> 14 | a8 << 16;
+#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
}
static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
@@ -678,7 +680,9 @@ static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const s
const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
+#ifdef VERIFY
VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
+#endif
r->v[0] = a0 & M30;
r->v[1] = (a0 >> 30 | a1 << 2) & M30;See also #902. |
|
@real-or-random, @sanket1729, maybe @peterdettman: feel like reviewing the final state things are in now? |
|
Can you remove |
The whole "num" API and its libgmp-based implementation are now unused. Remove them.
Both the field and scalar modulus can be written in signed{30,62} notation
with one or more zero limbs. Make use of this in the update_de function to
avoid a few wide multiplications when that is the case.
This doesn't appear to be a win in the 32-bit implementation, so only
do it for the 64-bit one.
…bits This only seems to be a win on 64-bit platforms, so only do it there. Refactored by: Pieter Wuille <pieter@wuille.net>
…te_fg_var The magnitude of the f and g variables generally goes down as the algorithm progresses. Make use of this by keeping tracking how many limbs are used, and when the number becomes small enough, make use of this to reduce the complexity of arithmetic on them. Refactored by: Pieter Wuille <pieter@wuille.net>
Done. |
real-or-random
left a comment
There was a problem hiding this comment.
ACK 24ad04f careful code review, some testing
|
ACK 24ad04f |
1 similar comment
|
ACK 24ad04f |

This is a rebased and squashed version of #767, adding safegcd-based implementations of constant-time and variable-time modular inverses for scalars and field elements, by Peter Dettman. The PR is organized as follows:
__builtin_ctzon recent GCC and Clang, but fall back to using a software emulation using de Bruijn on other platforms). This isn't used anywhere in this commit, but does include tests.doc/safegcd_implementation.md, as well as additional comments to the code itself. It is probably best to review this together with the previous commit (they're separated to keep authorship).