Clearmatics Libff  0.1
C++ library for Finite Fields and Elliptic Curves
multiexp.tcc
Go to the documentation of this file.
1 /** @file
2  *****************************************************************************
3 
4  Implementation of interfaces for multi-exponentiation routines.
5 
6  See multiexp.hpp .
7 
8  *****************************************************************************
9  * @author This file is part of libff, developed by SCIPR Lab
10  * and contributors (see AUTHORS).
11  * @copyright MIT license (see LICENSE file)
12  *****************************************************************************/
13 
14 #ifndef MULTIEXP_TCC_
15 #define MULTIEXP_TCC_
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <libff/algebra/curves/curve_serialization.hpp>
20 #include <libff/algebra/fields/bigint.hpp>
21 #include <libff/algebra/fields/fp_aux.tcc>
22 #include <libff/algebra/scalar_multiplication/multiexp.hpp>
23 #include <libff/algebra/scalar_multiplication/wnaf.hpp>
24 #include <libff/common/concurrent_fifo.hpp>
25 #include <libff/common/profiling.hpp>
26 #include <libff/common/utils.hpp>
27 #include <type_traits>
28 
29 namespace libff
30 {
31 
32 namespace internal
33 {
34 
35 inline size_t pippenger_optimal_c(const size_t num_elements)
36 {
37  // empirically, this seems to be a decent estimate of the optimal value of c
38  const size_t log2_num_elements = log2(num_elements);
39  return log2_num_elements - (log2_num_elements / 3 - 2);
40 }
41 
42 /// Add/subtract base_element to/from the correct bucket, based on a signed
43 /// digit, using and updating the bucket_hit flags. Supports regular / mixed
44 /// addition, based on base element form.
45 template<typename GroupT, multi_exp_base_form BaseForm>
46 void multi_exp_add_element_to_bucket_with_signed_digit(
47  std::vector<GroupT> &buckets,
48  std::vector<bool> &bucket_hit,
49  const GroupT &base_element,
50  ssize_t digit)
51 {
52  if (digit < 0) {
53  const size_t bucket_idx = (-digit) - 1;
54  assert(bucket_idx < buckets.size());
55  if (bucket_hit[bucket_idx]) {
56  if (BaseForm == multi_exp_base_form_special) {
57  buckets[bucket_idx] =
58  buckets[bucket_idx].mixed_add(-base_element);
59  } else {
60  buckets[bucket_idx] = buckets[bucket_idx].add(-base_element);
61  }
62  } else {
63  buckets[bucket_idx] = -base_element;
64  bucket_hit[bucket_idx] = true;
65  }
66  } else if (digit > 0) {
67  const size_t bucket_idx = digit - 1;
68  assert(bucket_idx < buckets.size());
69  if (bucket_hit[bucket_idx]) {
70  if (BaseForm == multi_exp_base_form_special) {
71  buckets[bucket_idx] =
72  buckets[bucket_idx].mixed_add(base_element);
73  } else {
74  buckets[bucket_idx] = buckets[bucket_idx].add(base_element);
75  }
76  } else {
77  buckets[bucket_idx] = base_element;
78  bucket_hit[bucket_idx] = true;
79  }
80  }
81 }
82 
83 /// Compute:
84 /// \sum_{i=1}^{num_buckets} [i] B_i
85 /// where
86 /// B_i is the i-th bucket value (stored in buckets[i - 1])
87 ///
88 /// Caller must ensure that at least one bucket is not empty (i.e.
89 /// bucket_hit[i] == true for at least one i).
90 template<typename GroupT, multi_exp_base_form Form>
91 GroupT multiexp_accumulate_buckets(
92  std::vector<GroupT> &buckets,
93  std::vector<bool> &bucket_hit,
94  const size_t num_buckets)
95 {
96  // Find first set bucket and initialize the accumulator. For each remaining
97  // buckets (set or unset), add the bucket value to the accumulator (if
98  // set), and add the accumulator to the sum. In this way, the i-th bucket
99  // will be added to sum exactly i times.
100 
101  size_t i = num_buckets - 1;
102  while (!bucket_hit[i]) {
103  --i;
104 
105  // Ensure that at least one bucket is initialized.
106  assert(i < num_buckets);
107  }
108 
109  GroupT sum;
110  GroupT accumulator;
111  sum = accumulator = buckets[i];
112  while (i > 0) {
113  --i;
114  if (bucket_hit[i]) {
115  if (Form == multi_exp_base_form_special) {
116  accumulator = accumulator.mixed_add(buckets[i]);
117  } else {
118  accumulator = accumulator + buckets[i];
119  }
120  }
121  sum = sum + accumulator;
122  }
123 
124  return sum;
125 }
126 
127 template<mp_size_t n> class ordered_exponent
128 {
129  // to use std::push_heap and friends later
130 public:
131  size_t idx;
132  bigint<n> r;
133 
134  ordered_exponent(const size_t idx, const bigint<n> &r) : idx(idx), r(r){};
135 
136  bool operator<(const ordered_exponent<n> &other) const
137  {
138 #if defined(__x86_64__) && defined(USE_ASM)
139  if (n == 3) {
140  long res;
141  __asm__( // Preserve alignment
142  "// check for overflow \n\t" //
143  "mov $0, %[res] \n\t" //
144  ADD_CMP(16) //
145  ADD_CMP(8) //
146  ADD_CMP(0) //
147  "jmp done%= \n\t" //
148  "subtract%=: \n\t" //
149  "mov $1, %[res] \n\t" //
150  "done%=: \n\t" //
151  : [res] "=&r"(res)
152  : [A] "r"(other.r.data), [mod] "r"(this->r.data)
153  : "cc", "%rax");
154  return res;
155  } else if (n == 4) {
156  long res;
157  __asm__( // Preserve alignment
158  "// check for overflow \n\t" //
159  "mov $0, %[res] \n\t" //
160  ADD_CMP(24) //
161  ADD_CMP(16) //
162  ADD_CMP(8) //
163  ADD_CMP(0) //
164  "jmp done%= \n\t" //
165  "subtract%=: \n\t" //
166  "mov $1, %[res] \n\t" //
167  "done%=: \n\t" //
168  : [res] "=&r"(res)
169  : [A] "r"(other.r.data), [mod] "r"(this->r.data)
170  : "cc", "%rax");
171  return res;
172  } else if (n == 5) {
173  long res;
174  __asm__( // Preserve alignment
175  "// check for overflow \n\t" //
176  "mov $0, %[res] \n\t" //
177  ADD_CMP(32) //
178  ADD_CMP(24) //
179  ADD_CMP(16) //
180  ADD_CMP(8) //
181  ADD_CMP(0) //
182  "jmp done%= \n\t" //
183  "subtract%=: \n\t" //
184  "mov $1, %[res] \n\t" //
185  "done%=: \n\t" //
186  : [res] "=&r"(res)
187  : [A] "r"(other.r.data), [mod] "r"(this->r.data)
188  : "cc", "%rax");
189  return res;
190  } else
191 #endif
192  {
193  return (mpn_cmp(this->r.data, other.r.data, n) < 0);
194  }
195  }
196 };
197 
198 // Class holding the specialized multi exp implementations. Must implement a
199 // public static method of the form:
200 // static GroupT multi_exp_inner(
201 // typename std::vector<GroupT>::const_iterator bases,
202 // typename std::vector<GroupT>::const_iterator bases_end,
203 // typename std::vector<FieldT>::const_iterator exponents,
204 // typename std::vector<FieldT>::const_iterator exponents_end);
205 template<
206  typename GroupT,
207  typename FieldT,
208  multi_exp_method Method,
209  multi_exp_base_form BaseForm>
210 class multi_exp_implementation
211 {
212 };
213 
214 /// Naive multi_exp_implementation
215 template<typename GroupT, typename FieldT, multi_exp_base_form BaseForm>
216 class multi_exp_implementation<GroupT, FieldT, multi_exp_method_naive, BaseForm>
217 {
218 public:
219  static GroupT multi_exp_inner(
220  typename std::vector<GroupT>::const_iterator vec_start,
221  typename std::vector<GroupT>::const_iterator vec_end,
222  typename std::vector<FieldT>::const_iterator scalar_start,
223  typename std::vector<FieldT>::const_iterator scalar_end)
224  {
225  GroupT result(GroupT::zero());
226 
227  typename std::vector<GroupT>::const_iterator vec_it;
228  typename std::vector<FieldT>::const_iterator scalar_it;
229 
230  for (vec_it = vec_start, scalar_it = scalar_start; vec_it != vec_end;
231  ++vec_it, ++scalar_it) {
232  bigint<FieldT::num_limbs> scalar_bigint = scalar_it->as_bigint();
233  result =
234  result + opt_window_wnaf_exp(
235  *vec_it, scalar_bigint, scalar_bigint.num_bits());
236  }
237  assert(scalar_it == scalar_end);
238  UNUSED(scalar_end);
239 
240  return result;
241  }
242 };
243 
244 // Naive plain multi_exp_implementation
245 template<typename GroupT, typename FieldT, multi_exp_base_form BaseForm>
246 class multi_exp_implementation<
247  GroupT,
248  FieldT,
249  multi_exp_method_naive_plain,
250  BaseForm>
251 {
252 public:
253  static GroupT multi_exp_inner(
254  typename std::vector<GroupT>::const_iterator vec_start,
255  typename std::vector<GroupT>::const_iterator vec_end,
256  typename std::vector<FieldT>::const_iterator scalar_start,
257  typename std::vector<FieldT>::const_iterator scalar_end)
258  {
259  GroupT result(GroupT::zero());
260 
261  typename std::vector<GroupT>::const_iterator vec_it;
262  typename std::vector<FieldT>::const_iterator scalar_it;
263 
264  for (vec_it = vec_start, scalar_it = scalar_start; vec_it != vec_end;
265  ++vec_it, ++scalar_it) {
266  result = result + (*scalar_it) * (*vec_it);
267  }
268  assert(scalar_it == scalar_end);
269  UNUSED(scalar_end);
270 
271  return result;
272  }
273 };
274 
275 // multi_exp_implementation for BDLO12
276 template<typename GroupT, typename FieldT, multi_exp_base_form BaseForm>
277 class multi_exp_implementation<
278  GroupT,
279  FieldT,
280  multi_exp_method_BDLO12,
281  BaseForm>
282 {
283 public:
284  static GroupT multi_exp_inner(
285  typename std::vector<GroupT>::const_iterator bases,
286  typename std::vector<GroupT>::const_iterator bases_end,
287  typename std::vector<FieldT>::const_iterator exponents,
288  typename std::vector<FieldT>::const_iterator exponents_end)
289  {
290  UNUSED(exponents_end);
291  const size_t length = bases_end - bases;
292  const size_t c = internal::pippenger_optimal_c(length);
293 
294  const mp_size_t exp_num_limbs =
295  std::remove_reference<decltype(*exponents)>::type::num_limbs;
296  std::vector<bigint<exp_num_limbs>> bi_exponents(length);
297  size_t num_bits = 0;
298 
299  for (size_t i = 0; i < length; i++) {
300  bi_exponents[i] = exponents[i].as_bigint();
301  num_bits = std::max(num_bits, bi_exponents[i].num_bits());
302  }
303 
304  const size_t num_groups = (num_bits + c - 1) / c;
305 
306  GroupT result;
307  bool result_nonzero = false;
308 
309  for (size_t k = num_groups - 1; k <= num_groups; k--) {
310  if (result_nonzero) {
311  for (size_t i = 0; i < c; i++) {
312  result = result.dbl();
313  }
314  }
315 
316  std::vector<GroupT> buckets(1 << c);
317  std::vector<bool> bucket_nonzero(1 << c);
318 
319  for (size_t i = 0; i < length; i++) {
320  // id = k-th "digit" of bi_exponents[i], radix 2^c
321  // = (bi_exponents[i] >> (c*k)) & (2^c - 1)
322  size_t id = 0;
323  for (size_t j = 0; j < c; j++) {
324  if (bi_exponents[i].test_bit(k * c + j)) {
325  id |= 1 << j;
326  }
327  }
328 
329  // Skip 0 digits.
330  if (id == 0) {
331  continue;
332  }
333 
334  // Add (or write) the group element into the appropriate bucket.
335  if (bucket_nonzero[id]) {
336  if (BaseForm == multi_exp_base_form_special) {
337  buckets[id] = buckets[id].mixed_add(bases[i]);
338  } else {
339  buckets[id] = buckets[id] + bases[i];
340  }
341  } else {
342  buckets[id] = bases[i];
343  bucket_nonzero[id] = true;
344  }
345  }
346 
347 #ifdef USE_MIXED_ADDITION
348  batch_to_special(buckets);
349 #endif
350 
351  GroupT running_sum;
352  bool running_sum_nonzero = false;
353 
354  for (size_t i = (1u << c) - 1; i > 0; i--) {
355  if (bucket_nonzero[i]) {
356  if (running_sum_nonzero) {
357 #ifdef USE_MIXED_ADDITION
358  running_sum = running_sum.mixed_add(buckets[i]);
359 #else
360  running_sum = running_sum + buckets[i];
361 #endif
362  } else {
363  running_sum = buckets[i];
364  running_sum_nonzero = true;
365  }
366  }
367 
368  if (running_sum_nonzero) {
369  if (result_nonzero) {
370  result = result + running_sum;
371  } else {
372  result = running_sum;
373  result_nonzero = true;
374  }
375  }
376  }
377  }
378 
379  return result;
380  }
381 };
382 
383 template<typename GroupT, typename FieldT, multi_exp_base_form BaseForm>
384 class multi_exp_implementation<
385  GroupT,
386  FieldT,
387  multi_exp_method_bos_coster,
388  BaseForm>
389 {
390 public:
391  static GroupT multi_exp_inner(
392  typename std::vector<GroupT>::const_iterator vec_start,
393  typename std::vector<GroupT>::const_iterator vec_end,
394  typename std::vector<FieldT>::const_iterator scalar_start,
395  typename std::vector<FieldT>::const_iterator scalar_end)
396  {
397  const mp_size_t n =
398  std::remove_reference<decltype(*scalar_start)>::type::num_limbs;
399 
400  if (vec_start == vec_end) {
401  return GroupT::zero();
402  }
403 
404  if (vec_start + 1 == vec_end) {
405  return (*scalar_start) * (*vec_start);
406  }
407 
408  std::vector<ordered_exponent<n>> opt_q;
409  const size_t vec_len = scalar_end - scalar_start;
410  const size_t odd_vec_len = (vec_len % 2 == 1 ? vec_len : vec_len + 1);
411  opt_q.reserve(odd_vec_len);
412  std::vector<GroupT> g;
413  g.reserve(odd_vec_len);
414 
415  typename std::vector<GroupT>::const_iterator vec_it;
416  typename std::vector<FieldT>::const_iterator scalar_it;
417  size_t i;
418  for (i = 0, vec_it = vec_start, scalar_it = scalar_start;
419  vec_it != vec_end;
420  ++vec_it, ++scalar_it, ++i) {
421  g.emplace_back(*vec_it);
422 
423  opt_q.emplace_back(ordered_exponent<n>(i, scalar_it->as_bigint()));
424  }
425  std::make_heap(opt_q.begin(), opt_q.end());
426  assert(scalar_it == scalar_end);
427 
428  if (vec_len != odd_vec_len) {
429  g.emplace_back(GroupT::zero());
430  opt_q.emplace_back(
431  ordered_exponent<n>(odd_vec_len - 1, bigint<n>(0ul)));
432  }
433  assert(g.size() % 2 == 1);
434  assert(opt_q.size() == g.size());
435 
436  GroupT opt_result = GroupT::zero();
437 
438  while (true) {
439  ordered_exponent<n> &a = opt_q[0];
440  ordered_exponent<n> &b =
441  (opt_q[1] < opt_q[2] ? opt_q[2] : opt_q[1]);
442 
443  const size_t abits = a.r.num_bits();
444 
445  if (b.r.is_zero()) {
446  // opt_result = opt_result + (a.r * g[a.idx]);
447  opt_result =
448  opt_result + opt_window_wnaf_exp(g[a.idx], a.r, abits);
449  break;
450  }
451 
452  const size_t bbits = b.r.num_bits();
453  const size_t limit = (abits - bbits >= 20 ? 20 : abits - bbits);
454 
455  if (bbits < 1ul << limit) {
456  /*
457  In this case, exponentiating to the power of a is cheaper than
458  subtracting b from a multiple times, so let's do it directly
459  */
460  // opt_result = opt_result + (a.r * g[a.idx]);
461  opt_result =
462  opt_result + opt_window_wnaf_exp(g[a.idx], a.r, abits);
463 #ifdef DEBUG
464  printf(
465  "Skipping the following pair (%zu bit number vs %zu "
466  "bit):\n",
467  abits,
468  bbits);
469  a.r.print();
470  b.r.print();
471 #endif
472  a.r.clear();
473  } else {
474  // x A + y B => (x-y) A + y (B+A)
475  mpn_sub_n(a.r.data, a.r.data, b.r.data, n);
476  g[b.idx] = g[b.idx] + g[a.idx];
477  }
478 
479  // regardless of whether a was cleared or subtracted from we push it
480  // down, then take back up
481 
482  /* heapify A down */
483  size_t a_pos = 0;
484  while (2 * a_pos + 2 < odd_vec_len) {
485  // this is a max-heap so to maintain a heap property we swap
486  // with the largest of the two
487  if (opt_q[2 * a_pos + 1] < opt_q[2 * a_pos + 2]) {
488  std::swap(opt_q[a_pos], opt_q[2 * a_pos + 2]);
489  a_pos = 2 * a_pos + 2;
490  } else {
491  std::swap(opt_q[a_pos], opt_q[2 * a_pos + 1]);
492  a_pos = 2 * a_pos + 1;
493  }
494  }
495 
496  /* now heapify A up appropriate amount of times */
497  while (a_pos > 0 && opt_q[(a_pos - 1) / 2] < opt_q[a_pos]) {
498  std::swap(opt_q[a_pos], opt_q[(a_pos - 1) / 2]);
499  a_pos = (a_pos - 1) / 2;
500  }
501  }
502 
503  return opt_result;
504  }
505 };
506 
507 template<typename GroupT, typename FieldT, multi_exp_base_form BaseForm>
508 class multi_exp_implementation<
509  GroupT,
510  FieldT,
511  multi_exp_method_BDLO12_signed,
512  BaseForm>
513 {
514 public:
515  using BigInt =
516  typename std::decay<decltype(((FieldT *)nullptr)->mont_repr)>::type;
517 
518  /// buckets and bucket_hit should have at least 2^{c-1} entries.
519  static GroupT signed_digits_round(
520  typename std::vector<GroupT>::const_iterator bases,
521  typename std::vector<GroupT>::const_iterator bases_end,
522  typename std::vector<BigInt>::const_iterator exponents,
523  std::vector<GroupT> &buckets,
524  std::vector<bool> &bucket_hit,
525  const size_t num_entries,
526  const size_t num_buckets,
527  const size_t c,
528  const size_t digit_idx)
529  {
530  UNUSED(bases_end);
531 
532  assert(buckets.size() >= num_buckets);
533  assert(bucket_hit.size() >= num_buckets);
534 
535  // Zero bucket_hit array.
536  bucket_hit.assign(num_buckets, false);
537 
538  // For each scalar, element pair ...
539  size_t non_zero = 0;
540  for (size_t i = 0; i < num_entries; ++i) {
541  const ssize_t digit =
542  field_get_signed_digit(exponents[i], c, digit_idx);
543  if (digit == 0) {
544  continue;
545  }
546 
547  multi_exp_add_element_to_bucket_with_signed_digit<GroupT, BaseForm>(
548  buckets, bucket_hit, bases[i], digit);
549  ++non_zero;
550  }
551 
552  // Check up-front for the edge-case where no buckets have been touched.
553  if (non_zero == 0) {
554  return GroupT::zero();
555  }
556 
557  // TODO: consider converting buckets to special form
558 
559  return multiexp_accumulate_buckets<GroupT, multi_exp_base_form_normal>(
560  buckets, bucket_hit, num_buckets);
561  }
562 
563  static GroupT multi_exp_inner(
564  typename std::vector<GroupT>::const_iterator bases,
565  typename std::vector<GroupT>::const_iterator bases_end,
566  typename std::vector<FieldT>::const_iterator exponents,
567  typename std::vector<FieldT>::const_iterator exponents_end)
568  {
569  UNUSED(exponents_end);
570 
571  const size_t num_entries = bases_end - bases;
572  assert(exponents_end - exponents == (ssize_t)num_entries);
573  const size_t c = bdlo12_signed_optimal_c(num_entries);
574  assert(c > 0);
575 
576  // Pre-compute the bigint values
577  size_t num_bits = 0;
578  std::vector<BigInt> bi_exponents(num_entries);
579  for (size_t i = 0; i < num_entries; ++i) {
580  bi_exponents[i] = exponents[i].as_bigint();
581  num_bits = std::max(num_bits, bi_exponents[i].num_bits());
582  }
583 
584  // Allow sufficient rounds for num_bits + 2, to accomodate overflow +
585  // negative final digit.
586  const size_t num_rounds = (num_bits + 2 + c - 1) / c;
587 
588  // Digits have values between -(2^{c-1}) and 2^{c-1} - 1. Negative
589  // values are negated (to make them positive since we have cheap
590  // inversion of base elements), and 0 values are ignored. Hence we
591  // require up to 2^{c-1} buckets
592  const size_t num_buckets = 1 << (c - 1);
593 
594  // Allocate the round state once, and reuse it.
595  std::vector<GroupT> buckets(num_buckets);
596  std::vector<bool> bucket_hit(num_buckets);
597  assert(buckets.size() == num_buckets);
598  assert(bucket_hit.size() == num_buckets);
599 
600  // Compute from highest-order to lowest-order digits, accumulating at
601  // the same time.
602  GroupT result = signed_digits_round(
603  bases,
604  bases_end,
605  bi_exponents.begin(),
606  buckets,
607  bucket_hit,
608  num_entries,
609  num_buckets,
610  c,
611  num_rounds - 1);
612  for (size_t round_idx = 1; round_idx < num_rounds; ++round_idx) {
613  const size_t digit_idx = num_rounds - 1 - round_idx;
614  for (size_t i = 0; i < c; ++i) {
615  result = result.dbl();
616  }
617 
618  const GroupT round_result = signed_digits_round(
619  bases,
620  bases_end,
621  bi_exponents.begin(),
622  buckets,
623  bucket_hit,
624  num_entries,
625  num_buckets,
626  c,
627  digit_idx);
628  result = result + round_result;
629  }
630 
631  return result;
632  }
633 };
634 
635 } // namespace internal
636 
637 static inline size_t bdlo12_signed_optimal_c(size_t num_entries)
638 {
639  // For now, this seems like a good estimate in most cases.
640  return internal::pippenger_optimal_c(num_entries) + 1;
641 }
642 
643 template<
644  typename GroupT,
645  typename FieldT,
646  multi_exp_method Method,
647  multi_exp_base_form BaseForm>
648 GroupT multi_exp(
649  typename std::vector<GroupT>::const_iterator vec_start,
650  typename std::vector<GroupT>::const_iterator vec_end,
651  typename std::vector<FieldT>::const_iterator scalar_start,
652  typename std::vector<FieldT>::const_iterator scalar_end,
653  const size_t chunks)
654 {
655  const size_t total = vec_end - vec_start;
656  if ((total < chunks) || (chunks == 1)) {
657  // no need to split into "chunks", can call implementation directly
658  return internal::
659  multi_exp_implementation<GroupT, FieldT, Method, BaseForm>::
660  multi_exp_inner(vec_start, vec_end, scalar_start, scalar_end);
661  }
662 
663  const size_t one = total / chunks;
664 
665  std::vector<GroupT> partial(chunks, GroupT::zero());
666 
667 #ifdef MULTICORE
668 #pragma omp parallel for
669 #endif
670  for (size_t i = 0; i < chunks; ++i) {
671  partial[i] = internal::
672  multi_exp_implementation<GroupT, FieldT, Method, BaseForm>::
673  multi_exp_inner(
674  vec_start + i * one,
675  (i == chunks - 1) ? vec_end : (vec_start + (i + 1) * one),
676  scalar_start + i * one,
677  (i == chunks - 1) ? scalar_end
678  : (scalar_start + (i + 1) * one));
679  }
680 
681  GroupT final = GroupT::zero();
682 
683  for (size_t i = 0; i < chunks; ++i) {
684  final = final + partial[i];
685  }
686 
687  return final;
688 }
689 
690 template<
691  typename GroupT,
692  typename FieldT,
693  multi_exp_method Method,
694  multi_exp_base_form BaseForm>
695 GroupT multi_exp_filter_one_zero(
696  typename std::vector<GroupT>::const_iterator vec_start,
697  typename std::vector<GroupT>::const_iterator vec_end,
698  typename std::vector<FieldT>::const_iterator scalar_start,
699  typename std::vector<FieldT>::const_iterator scalar_end,
700  const size_t chunks)
701 {
702  assert(
703  std::distance(vec_start, vec_end) ==
704  std::distance(scalar_start, scalar_end));
705  UNUSED(vec_end);
706  enter_block("Process scalar vector");
707  auto value_it = vec_start;
708  auto scalar_it = scalar_start;
709 
710  std::vector<FieldT> p;
711  std::vector<GroupT> g;
712 
713  GroupT acc = GroupT::zero();
714 
715  size_t num_skip = 0;
716  size_t num_add = 0;
717  size_t num_other = 0;
718 
719  for (; scalar_it != scalar_end; ++scalar_it, ++value_it) {
720  if (scalar_it->is_zero()) {
721  // do nothing
722  ++num_skip;
723  } else if (*scalar_it == FieldT::one()) {
724  if (BaseForm == multi_exp_base_form_special) {
725  acc = acc.mixed_add(*value_it);
726  } else {
727  acc = acc + (*value_it);
728  }
729  ++num_add;
730  } else {
731  p.emplace_back(*scalar_it);
732  g.emplace_back(*value_it);
733  ++num_other;
734  }
735  }
736 
737  print_indent();
738  printf(
739  "* Elements of w skipped: %zu (%0.2f%%)\n",
740  num_skip,
741  100. * num_skip / (num_skip + num_add + num_other));
742  print_indent();
743  printf(
744  "* Elements of w processed with special addition: %zu (%0.2f%%)\n",
745  num_add,
746  100. * num_add / (num_skip + num_add + num_other));
747  print_indent();
748  printf(
749  "* Elements of w remaining: %zu (%0.2f%%)\n",
750  num_other,
751  100. * num_other / (num_skip + num_add + num_other));
752 
753  leave_block("Process scalar vector");
754 
755  return acc + multi_exp<GroupT, FieldT, Method, BaseForm>(
756  g.begin(), g.end(), p.begin(), p.end(), chunks);
757 }
758 
759 template<typename T>
760 T inner_product(
761  typename std::vector<T>::const_iterator a_start,
762  typename std::vector<T>::const_iterator a_end,
763  typename std::vector<T>::const_iterator b_start,
764  typename std::vector<T>::const_iterator b_end)
765 {
766  return multi_exp<T, T, multi_exp_method_naive_plain>(
767  a_start, a_end, b_start, b_end, 1);
768 }
769 
770 template<typename T> size_t get_exp_window_size(const size_t num_scalars)
771 {
772  if (T::fixed_base_exp_window_table.empty()) {
773 #ifdef LOWMEM
774  return 14;
775 #else
776  return 17;
777 #endif
778  }
779  size_t window = 1;
780  for (long i = T::fixed_base_exp_window_table.size() - 1; i >= 0; --i) {
781 #ifdef DEBUG
782  if (!inhibit_profiling_info) {
783  printf(
784  "%ld %zu %zu\n",
785  i,
786  num_scalars,
787  T::fixed_base_exp_window_table[i]);
788  }
789 #endif
790  if (T::fixed_base_exp_window_table[i] != 0 &&
791  num_scalars >= T::fixed_base_exp_window_table[i]) {
792  window = i + 1;
793  break;
794  }
795  }
796 
797  if (!inhibit_profiling_info) {
798  print_indent();
799  printf(
800  "Choosing window size %zu for %zu elements\n", window, num_scalars);
801  }
802 
803 #ifdef LOWMEM
804  window = std::min((size_t)14, window);
805 #endif
806  return window;
807 }
808 
809 template<typename T>
810 window_table<T> get_window_table(
811  const size_t scalar_size, const size_t window, const T &g)
812 {
813  const size_t in_window = 1ul << window;
814  const size_t outerc = (scalar_size + window - 1) / window;
815  const size_t last_in_window = 1ul << (scalar_size - (outerc - 1) * window);
816 #ifdef DEBUG
817  if (!inhibit_profiling_info) {
818  print_indent();
819  printf(
820  "* scalar_size=%zu; window=%zu; in_window=%zu; outerc=%zu\n",
821  scalar_size,
822  window,
823  in_window,
824  outerc);
825  }
826 #endif
827 
828  window_table<T> powers_of_g(outerc, std::vector<T>(in_window, T::zero()));
829 
830  T gouter = g;
831 
832  for (size_t outer = 0; outer < outerc; ++outer) {
833  T ginner = T::zero();
834  size_t cur_in_window = outer == outerc - 1 ? last_in_window : in_window;
835  for (size_t inner = 0; inner < cur_in_window; ++inner) {
836  powers_of_g[outer][inner] = ginner;
837  ginner = ginner + gouter;
838  }
839 
840  for (size_t i = 0; i < window; ++i) {
841  gouter = gouter + gouter;
842  }
843  }
844 
845  return powers_of_g;
846 }
847 
848 template<typename T, typename FieldT>
849 T windowed_exp(
850  const size_t scalar_size,
851  const size_t window,
852  const window_table<T> &powers_of_g,
853  const FieldT &pow)
854 {
855  const size_t outerc = (scalar_size + window - 1) / window;
856  const bigint<FieldT::num_limbs> pow_val = pow.as_bigint();
857 
858  /* exp */
859  T res = powers_of_g[0][0];
860 
861  for (size_t outer = 0; outer < outerc; ++outer) {
862  size_t inner = 0;
863  for (size_t i = 0; i < window; ++i) {
864  if (pow_val.test_bit(outer * window + i)) {
865  inner |= 1u << i;
866  }
867  }
868 
869  res = res + powers_of_g[outer][inner];
870  }
871 
872  return res;
873 }
874 
875 template<typename T, typename FieldT>
876 std::vector<T> batch_exp(
877  const size_t scalar_size,
878  const size_t window,
879  const window_table<T> &table,
880  const std::vector<FieldT> &v)
881 {
882  return batch_exp(scalar_size, window, table, v, v.size());
883 }
884 
885 template<typename T, typename FieldT>
886 std::vector<T> batch_exp(
887  const size_t scalar_size,
888  const size_t window,
889  const window_table<T> &table,
890  const std::vector<FieldT> &v,
891  size_t num_entries)
892 {
893  if (!inhibit_profiling_info) {
894  print_indent();
895  }
896  std::vector<T> res(num_entries, table[0][0]);
897 
898 #ifdef MULTICORE
899 #pragma omp parallel for
900 #endif
901  for (size_t i = 0; i < num_entries; ++i) {
902  res[i] = windowed_exp(scalar_size, window, table, v[i]);
903 
904  if (!inhibit_profiling_info && (i % 10000 == 0)) {
905  printf(".");
906  fflush(stdout);
907  }
908  }
909 
910  if (!inhibit_profiling_info) {
911  printf(" DONE!\n");
912  }
913 
914  return res;
915 }
916 
917 template<typename T, typename FieldT>
918 std::vector<T> batch_exp_with_coeff(
919  const size_t scalar_size,
920  const size_t window,
921  const window_table<T> &table,
922  const FieldT &coeff,
923  const std::vector<FieldT> &v)
924 {
925  if (!inhibit_profiling_info) {
926  print_indent();
927  }
928  std::vector<T> res(v.size(), table[0][0]);
929 
930 #ifdef MULTICORE
931 #pragma omp parallel for
932 #endif
933  for (size_t i = 0; i < v.size(); ++i) {
934  res[i] = windowed_exp(scalar_size, window, table, coeff * v[i]);
935 
936  if (!inhibit_profiling_info && (i % 10000 == 0)) {
937  printf(".");
938  fflush(stdout);
939  }
940  }
941 
942  if (!inhibit_profiling_info) {
943  printf(" DONE!\n");
944  }
945 
946  return res;
947 }
948 
949 template<typename T> void batch_to_special(std::vector<T> &vec)
950 {
951  enter_block("Batch-convert elements to special form");
952 
953  std::vector<T> non_zero_vec;
954  for (size_t i = 0; i < vec.size(); ++i) {
955  if (!vec[i].is_zero()) {
956  non_zero_vec.emplace_back(vec[i]);
957  }
958  }
959 
960  T::batch_to_special_all_non_zeros(non_zero_vec);
961  auto it = non_zero_vec.begin();
962  T zero_special = T::zero();
963  zero_special.to_special();
964 
965  for (size_t i = 0; i < vec.size(); ++i) {
966  if (!vec[i].is_zero()) {
967  vec[i] = *it;
968  ++it;
969  } else {
970  vec[i] = zero_special;
971  }
972  }
973  leave_block("Batch-convert elements to special form");
974 }
975 
976 } // namespace libff
977 
978 #endif // MULTIEXP_TCC_