2 *****************************************************************************
3 Implementation of arithmetic in the finite field F[p], for prime p of fixed
5 *****************************************************************************
6 * @author This file is part of libff, developed by SCIPR Lab
7 * and contributors (see AUTHORS).
8 * @copyright MIT license (see LICENSE file)
9 *****************************************************************************/
16 #include <libff/algebra/fields/field_serialization.hpp>
17 #include <libff/algebra/fields/field_utils.hpp>
18 #include <libff/algebra/fields/fp_aux.tcc>
24 template<mp_size_t n, const bigint<n> &modulus>
25 bool Fp_model<n, modulus>::s_initialized = false;
27 template<mp_size_t n, const bigint<n> &modulus>
28 Fp_model<n, modulus> Fp_model<n, modulus>::s_zero;
30 template<mp_size_t n, const bigint<n> &modulus>
31 Fp_model<n, modulus> Fp_model<n, modulus>::s_one;
33 template<mp_size_t n, const bigint<n> &modulus>
34 void Fp_model<n, modulus>::static_init()
36 // This function may be entered several times, in particular where an
37 // individual field is shread between curves.
41 // Initialize s_zero and s_one
42 s_zero.mont_repr.clear();
44 s_one.mont_repr.data[0] = 1;
45 s_one.mul_reduce(Rsquared);
50 template<mp_size_t n, const bigint<n> &modulus>
51 void Fp_model<n, modulus>::mul_reduce(const bigint<n> &other)
53 // stupid pre-processor tricks; beware
54 #if defined(__x86_64__) && defined(USE_ASM)
55 if (n == 3) { // Use asm-optimized Comba multiplication and reduction
58 COMBA_3_BY_3_MUL(c0, c1, c2, res, this->mont_repr.data, other.data);
61 mp_limb_t tmp1, tmp2, tmp3;
62 REDUCE_6_LIMB_PRODUCT(k, tmp1, tmp2, tmp3, inv, res, modulus.data);
65 __asm__( // Preserve alignment
66 "/* check for overflow */ \n\t" //
70 "/* subtract mod if overflow */ \n\t" //
77 : [tmp] "r"(res + n), [M] "r"(modulus.data)
78 : "cc", "memory", "%rax");
79 mpn_copyi(this->mont_repr.data, res + n, n);
80 } else if (n == 4) { // use asm-optimized "CIOS method"
83 mp_limb_t T0 = 0, T1 = 1, cy = 2, u = 3; // TODO: fix this
85 __asm__( // Preserve alignment
92 MONT_ITERITER(1, 1) //
93 MONT_ITERITER(1, 2) //
94 MONT_ITERITER(1, 3) //
97 MONT_ITERITER(2, 1) //
98 MONT_ITERITER(2, 2) //
99 MONT_ITERITER(2, 3) //
102 MONT_ITERITER(3, 1) //
103 MONT_ITERITER(3, 2) //
104 MONT_ITERITER(3, 3) //
106 "/* check for overflow */ " //
112 "/* subtract mod if overflow */ " //
124 [A] "r"(this->mont_repr.data),
127 [M] "r"(modulus.data),
132 : "cc", "memory", "%rax", "%rdx");
133 mpn_copyi(this->mont_repr.data, tmp, n);
135 // use asm-optimized "CIOS method"
136 mp_limb_t tmp[n + 1];
137 mp_limb_t T0 = 0, T1 = 1, cy = 2, u = 3; // TODO: fix this
139 __asm__( // Preserve alignment
147 MONT_ITERITER(1, 1) //
148 MONT_ITERITER(1, 2) //
149 MONT_ITERITER(1, 3) //
150 MONT_ITERITER(1, 4) //
153 MONT_ITERITER(2, 1) //
154 MONT_ITERITER(2, 2) //
155 MONT_ITERITER(2, 3) //
156 MONT_ITERITER(2, 4) //
159 MONT_ITERITER(3, 1) //
160 MONT_ITERITER(3, 2) //
161 MONT_ITERITER(3, 3) //
162 MONT_ITERITER(3, 4) //
165 MONT_ITERITER(4, 1) //
166 MONT_ITERITER(4, 2) //
167 MONT_ITERITER(4, 3) //
168 MONT_ITERITER(4, 4) //
170 "/* check for overflow */ " //
177 "/* subtract mod if overflow */ " //
192 [A] "r"(this->mont_repr.data),
195 [M] "r"(modulus.data),
200 : "cc", "memory", "%rax", "%rdx");
201 mpn_copyi(this->mont_repr.data, tmp, n);
205 mp_limb_t res[2 * n];
206 mpn_mul_n(res, this->mont_repr.data, other.data, n);
208 // The Montgomery reduction here is based on Algorithm 14.32 in
209 // Handbook of Applied Cryptography
210 // <http://cacr.uwaterloo.ca/hac/about/chap14.pdf>.
211 for (size_t i = 0; i < n; ++i) {
212 mp_limb_t k = inv * res[i];
213 // calculate res = res + k * mod * b^i
214 mp_limb_t carryout = mpn_addmul_1(res + i, modulus.data, n, k);
215 carryout = mpn_add_1(res + n + i, res + n + i, n - i, carryout);
216 assert(carryout == 0);
219 if (mpn_cmp(res + n, modulus.data, n) >= 0) {
220 const mp_limb_t borrow =
221 mpn_sub(res + n, res + n, n, modulus.data, n);
226 mpn_copyi(this->mont_repr.data, res + n, n);
230 template<mp_size_t n, const bigint<n> &modulus>
231 Fp_model<n, modulus>::Fp_model(const bigint<n> &b)
233 mpn_copyi(this->mont_repr.data, Rsquared.data, n);
237 template<mp_size_t n, const bigint<n> &modulus>
238 Fp_model<n, modulus>::Fp_model(const long x, const bool is_unsigned)
241 std::numeric_limits<mp_limb_t>::max() >=
242 static_cast<unsigned long>(std::numeric_limits<long>::max()),
243 "long won't fit in mp_limb_t");
244 if (is_unsigned || x >= 0) {
245 this->mont_repr.data[0] = (mp_limb_t)x;
247 const mp_limb_t borrow =
248 mpn_sub_1(this->mont_repr.data, modulus.data, n, (mp_limb_t)-x);
253 mul_reduce(Rsquared);
256 template<mp_size_t n, const bigint<n> &modulus>
257 void Fp_model<n, modulus>::set_ulong(const unsigned long x)
259 this->mont_repr.clear();
260 this->mont_repr.data[0] = x;
261 mul_reduce(Rsquared);
264 template<mp_size_t n, const bigint<n> &modulus>
265 void Fp_model<n, modulus>::clear()
267 this->mont_repr.clear();
270 template<mp_size_t n, const bigint<n> &modulus>
271 bigint<n> Fp_model<n, modulus>::as_bigint() const
277 Fp_model<n, modulus> res(*this);
280 return (res.mont_repr);
283 template<mp_size_t n, const bigint<n> &modulus>
284 unsigned long Fp_model<n, modulus>::as_ulong() const
286 return this->as_bigint().as_ulong();
289 template<mp_size_t n, const bigint<n> &modulus>
290 bool Fp_model<n, modulus>::operator==(const Fp_model &other) const
292 return (this->mont_repr == other.mont_repr);
295 template<mp_size_t n, const bigint<n> &modulus>
296 bool Fp_model<n, modulus>::operator!=(const Fp_model &other) const
298 return (this->mont_repr != other.mont_repr);
301 template<mp_size_t n, const bigint<n> &modulus>
302 bool Fp_model<n, modulus>::is_zero() const
305 return (this->mont_repr.is_zero());
308 template<mp_size_t n, const bigint<n> &modulus>
309 void Fp_model<n, modulus>::print() const
311 Fp_model<n, modulus> tmp;
312 tmp.mont_repr.data[0] = 1;
313 tmp.mul_reduce(this->mont_repr);
315 tmp.mont_repr.print();
318 template<mp_size_t n, const bigint<n> &modulus>
319 const Fp_model<n, modulus> &Fp_model<n, modulus>::zero()
321 assert(s_initialized);
325 template<mp_size_t n, const bigint<n> &modulus>
326 const Fp_model<n, modulus> &Fp_model<n, modulus>::one()
328 assert(s_initialized);
332 template<mp_size_t n, const bigint<n> &modulus>
333 Fp_model<n, modulus> Fp_model<n, modulus>::geometric_generator()
335 Fp_model<n, modulus> res;
336 res.mont_repr.data[0] = 2;
337 res.mul_reduce(Rsquared);
341 template<mp_size_t n, const bigint<n> &modulus>
342 Fp_model<n, modulus> Fp_model<n, modulus>::arithmetic_generator()
344 Fp_model<n, modulus> res;
345 res.mont_repr.data[0] = 1;
346 res.mul_reduce(Rsquared);
350 template<mp_size_t n, const bigint<n> &modulus>
351 Fp_model<n, modulus> &Fp_model<n, modulus>::operator+=(
352 const Fp_model<n, modulus> &other)
354 #ifdef PROFILE_OP_COUNTS
357 #if defined(__x86_64__) && defined(USE_ASM)
359 __asm__( // Preserve alignment
360 "/* perform bignum addition */ \n\t" //
364 "/* if overflow: subtract */ \n\t" //
365 "/* (tricky point: if A and B are in the range we do " //
366 "not need to do anything special for the possible " //
367 "carry flag) */ \n\t" //
368 "jc subtract%= \n\t" //
369 "/* check for overflow */ \n\t" //
373 "/* subtract mod if overflow */ \n\t" //
374 "subtract%=: \n\t" //
380 : [A] "r"(this->mont_repr.data),
381 [B] "r"(other.mont_repr.data),
382 [mod] "r"(modulus.data)
383 : "cc", "memory", "%rax");
385 __asm__( // Preserve alignment
386 "/* perform bignum addition */ \n\t" //
391 "/* if overflow: subtract */ \n\t" //
392 "/* (tricky point: if A and B are in the range we do " //
393 "not need to do anything special for the possible " //
394 "carry flag) */ \n\t" //
395 "jc subtract%= \n\t" //
396 "/* check for overflow */ \n\t" //
401 "/* subtract mod if overflow */ \n\t" //
402 "subtract%=: \n\t" //
409 : [A] "r"(this->mont_repr.data),
410 [B] "r"(other.mont_repr.data),
411 [mod] "r"(modulus.data)
412 : "cc", "memory", "%rax");
414 __asm__( // Preserve alignment
415 "/* perform bignum addition */ \n\t" //
421 "/* if overflow: subtract */ \n\t" //
422 "/* (tricky point: if A and B are in the range we do " //
423 "not need to do anything special for the possible " //
424 "carry flag) */ \n\t" //
425 "jc subtract%= \n\t" //
426 "/* check for overflow */ \n\t" //
432 "/* subtract mod if overflow */ \n\t" //
433 "subtract%=: \n\t" //
442 : [A] "r"(this->mont_repr.data),
443 [B] "r"(other.mont_repr.data),
444 [mod] "r"(modulus.data)
445 : "cc", "memory", "%rax");
449 mp_limb_t scratch[n + 1];
450 const mp_limb_t carry =
451 mpn_add_n(scratch, this->mont_repr.data, other.mont_repr.data, n);
454 if (carry || mpn_cmp(scratch, modulus.data, n) >= 0) {
455 const mp_limb_t borrow =
456 mpn_sub(scratch, scratch, n + 1, modulus.data, n);
461 mpn_copyi(this->mont_repr.data, scratch, n);
467 template<mp_size_t n, const bigint<n> &modulus>
468 Fp_model<n, modulus> &Fp_model<n, modulus>::operator-=(
469 const Fp_model<n, modulus> &other)
471 #ifdef PROFILE_OP_COUNTS
474 #if defined(__x86_64__) && defined(USE_ASM)
476 __asm__( // Preserve alignment
486 : [A] "r"(this->mont_repr.data),
487 [B] "r"(other.mont_repr.data),
488 [mod] "r"(modulus.data)
489 : "cc", "memory", "%rax");
491 __asm__( // Preserve alignment
503 : [A] "r"(this->mont_repr.data),
504 [B] "r"(other.mont_repr.data),
505 [mod] "r"(modulus.data)
506 : "cc", "memory", "%rax");
508 __asm__( // Preserve alignment
522 : [A] "r"(this->mont_repr.data),
523 [B] "r"(other.mont_repr.data),
524 [mod] "r"(modulus.data)
525 : "cc", "memory", "%rax");
529 mp_limb_t scratch[n + 1];
530 if (mpn_cmp(this->mont_repr.data, other.mont_repr.data, n) < 0) {
531 const mp_limb_t carry =
532 mpn_add_n(scratch, this->mont_repr.data, modulus.data, n);
535 mpn_copyi(scratch, this->mont_repr.data, n);
539 const mp_limb_t borrow =
540 mpn_sub(scratch, scratch, n + 1, other.mont_repr.data, n);
544 mpn_copyi(this->mont_repr.data, scratch, n);
549 template<mp_size_t n, const bigint<n> &modulus>
550 Fp_model<n, modulus> &Fp_model<n, modulus>::operator*=(
551 const Fp_model<n, modulus> &other)
553 #ifdef PROFILE_OP_COUNTS
557 mul_reduce(other.mont_repr);
561 template<mp_size_t n, const bigint<n> &modulus>
562 Fp_model<n, modulus> &Fp_model<n, modulus>::operator^=(const unsigned long pow)
564 (*this) = power<Fp_model<n, modulus>>(*this, pow);
568 template<mp_size_t n, const bigint<n> &modulus>
569 template<mp_size_t m>
570 Fp_model<n, modulus> &Fp_model<n, modulus>::operator^=(const bigint<m> &pow)
572 (*this) = power<Fp_model<n, modulus>, m>(*this, pow);
576 template<mp_size_t n, const bigint<n> &modulus>
577 Fp_model<n, modulus> Fp_model<n, modulus>::operator+(
578 const Fp_model<n, modulus> &other) const
580 Fp_model<n, modulus> r(*this);
584 template<mp_size_t n, const bigint<n> &modulus>
585 Fp_model<n, modulus> Fp_model<n, modulus>::operator-(
586 const Fp_model<n, modulus> &other) const
588 Fp_model<n, modulus> r(*this);
592 template<mp_size_t n, const bigint<n> &modulus>
593 Fp_model<n, modulus> Fp_model<n, modulus>::operator*(
594 const Fp_model<n, modulus> &other) const
596 Fp_model<n, modulus> r(*this);
600 template<mp_size_t n, const bigint<n> &modulus>
601 Fp_model<n, modulus> Fp_model<n, modulus>::operator^(
602 const unsigned long pow) const
604 Fp_model<n, modulus> r(*this);
608 template<mp_size_t n, const bigint<n> &modulus>
609 template<mp_size_t m>
610 Fp_model<n, modulus> Fp_model<n, modulus>::operator^(const bigint<m> &pow) const
612 Fp_model<n, modulus> r(*this);
616 template<mp_size_t n, const bigint<n> &modulus>
617 Fp_model<n, modulus> Fp_model<n, modulus>::operator-() const
619 #ifdef PROFILE_OP_COUNTS
623 if (this->is_zero()) {
626 Fp_model<n, modulus> r;
627 mpn_sub_n(r.mont_repr.data, modulus.data, this->mont_repr.data, n);
632 template<mp_size_t n, const bigint<n> &modulus>
633 Fp_model<n, modulus> Fp_model<n, modulus>::squared() const
635 #ifdef PROFILE_OP_COUNTS
637 // zero out the upcoming mul
640 // stupid pre-processor tricks; beware
641 #if defined(__x86_64__) && defined(USE_ASM)
642 // use asm-optimized Comba squaring
644 mp_limb_t res[2 * n];
645 mp_limb_t c0, c1, c2;
646 COMBA_3_BY_3_SQR(c0, c1, c2, res, this->mont_repr.data);
649 mp_limb_t tmp1, tmp2, tmp3;
650 REDUCE_6_LIMB_PRODUCT(k, tmp1, tmp2, tmp3, inv, res, modulus.data);
653 __asm__ volatile( // Preserve alignment
654 "/* check for overflow */ \n\t" //
658 "/* subtract mod if overflow */ \n\t" //
659 "subtract%=: \n\t" //
665 : [tmp] "r"(res + n), [M] "r"(modulus.data)
666 : "cc", "memory", "%rax");
668 Fp_model<n, modulus> r;
669 mpn_copyi(r.mont_repr.data, res + n, n);
674 Fp_model<n, modulus> r(*this);
679 template<mp_size_t n, const bigint<n> &modulus>
680 Fp_model<n, modulus> &Fp_model<n, modulus>::invert()
682 #ifdef PROFILE_OP_COUNTS
686 assert(!this->is_zero());
688 // gp should have room for vn = n limbs
691 mp_limb_t s[n + 1]; /* sp should have room for vn+1 limbs */
694 // both source operands are destroyed by mpn_gcdext
695 bigint<n> v = modulus;
697 // computes gcd(u, v) = g = u*s + v*t, so s*u will be 1 (mod v)
699 mpn_gcdext(g.data, s, &sn, this->mont_repr.data, n, v.data, n);
700 assert(gn == 1 && g.data[0] == 1); // inverse exists
703 // division result fits into q, as sn <= n+1
704 // sn < 0 indicates negative sn; will fix up later
707 if (std::abs(sn) >= n) {
708 // if sn could require modulus reduction, do it here
710 &q, this->mont_repr.data, 0, s, std::abs(sn), modulus.data, n);
712 // otherwise just copy it over
713 mpn_zero(this->mont_repr.data, n);
714 mpn_copyi(this->mont_repr.data, s, std::abs(sn));
717 // fix up the negative sn
719 const mp_limb_t borrow = mpn_sub_n(
720 this->mont_repr.data, modulus.data, this->mont_repr.data, n);
729 template<mp_size_t n, const bigint<n> &modulus>
730 Fp_model<n, modulus> Fp_model<n, modulus>::inverse() const
732 Fp_model<n, modulus> r(*this);
736 template<mp_size_t n, const bigint<n> &modulus>
737 Fp_model<n, modulus> Fp_model<n, modulus>::random_element()
739 // note that as Montgomery representation is a bijection then
740 // selecting a random element of {xR} is the same as selecting a
741 // random element of {x}
742 Fp_model<n, modulus> r;
744 r.mont_repr.randomize();
746 // clear all bits higher than MSB of modulus
747 size_t bitno = GMP_NUMB_BITS * n - 1;
748 while (modulus.test_bit(bitno) == false) {
749 const std::size_t part = bitno / GMP_NUMB_BITS;
750 const std::size_t bit = bitno - (GMP_NUMB_BITS * part);
752 r.mont_repr.data[part] &= ~(1ul << bit);
757 // if r.data is still >= modulus -- repeat (rejection sampling)
758 while (mpn_cmp(r.mont_repr.data, modulus.data, n) >= 0);
763 template<mp_size_t n, const bigint<n> &modulus>
764 Fp_model<n, modulus> Fp_model<n, modulus>::sqrt() const
766 Fp_model<n, modulus> one = Fp_model<n, modulus>::one();
768 size_t v = Fp_model<n, modulus>::s;
769 Fp_model<n, modulus> z = Fp_model<n, modulus>::nqr_to_t;
770 Fp_model<n, modulus> w = (*this) ^ Fp_model<n, modulus>::t_minus_1_over_2;
771 Fp_model<n, modulus> x = (*this) * w;
772 Fp_model<n, modulus> b = x * w; // b = (*this)^t
775 // check if square with euler's criterion
776 Fp_model<n, modulus> check = b;
777 for (size_t i = 0; i < v - 1; ++i) {
778 check = check.squared();
785 // compute square root with Tonelli--Shanks
786 // (does not terminate if not a square!)
790 Fp_model<n, modulus> b2m = b;
792 // invariant: b2m = b^(2^m) after entering this loop
814 template<mp_size_t n, const bigint<n> &modulus>
815 std::ostream &operator<<(std::ostream &out, const Fp_model<n, modulus> &p)
817 field_write<DEFAULT_ENCODING, DEFAULT_FORM>(p, out);
821 template<mp_size_t n, const bigint<n> &modulus>
822 std::istream &operator>>(std::istream &in, Fp_model<n, modulus> &p)
824 field_read<DEFAULT_ENCODING, DEFAULT_FORM>(p, in);