1 /* Generate perfect square testing data.
3 Copyright 2002, 2003, 2004, 2012 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23 #include "bootstrap.c"
26 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
27 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
28 residues and non-residues modulo small factors of that modulus.
30 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
31 function exists specifically because 2^24-1 and 2^48-1 have nice sets of
32 prime factors. For other limb sizes it's considered, but if it doesn't
33 have good factors then mpn_mod_1 will be used instead.
35 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
36 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
37 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
38 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
39 calculation within a single limb.
41 In either case primes can be combined to make divisors. The table data
42 then effectively indicates remainders which are quadratic residues mod
43 all the primes. This sort of combining reduces the number of steps
44 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
45 Nothing is gained or lost in terms of detections, the same total fraction
46 of non-residues will be identified.
48 Nothing particularly sophisticated is attempted for combining factors to
49 make divisors. This is probably a kind of knapsack problem so it'd be
50 too hard to attempt anything completely general. For the usual 32 and 64
51 bit limbs we get a good enough result just pairing the biggest and
52 smallest which fit together, repeatedly.
54 Another aim is to get powerful combinations, ie. divisors which identify
55 biggest fraction of non-residues, and have those run first. Again for
56 the usual 32 and 64 bits it seems good enough just to pair for big
57 divisors then sort according to the resulting fraction of non-residues
60 Also in this program, a table sq_res_0x100 of residues modulo 256 is
61 generated. This simply fills bits into limbs of the appropriate
62 build-time GMP_LIMB_BITS each.
67 /* Normally we aren't using const in gen*.c programs, so as not to have to
68 bother figuring out if it works, but using it with f_cmp_divisor and
69 f_cmp_fraction avoids warnings from the qsort calls. */
71 /* Same tests as gmp.h. */
72 #if defined (__STDC__) \
73 || defined (__cplusplus) \
76 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
77 || defined (_MSC_VER) \
87 mpz_t *sq_res_0x100; /* table of limbs */
88 int nsq_res_0x100; /* elements in sq_res_0x100 array */
89 int sq_res_0x100_num; /* squares in sq_res_0x100 */
90 double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
92 int mod34_bits; /* 3*GMP_NUMB_BITS/4 */
93 int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
94 int max_divisor; /* all divisors <= max_divisor */
95 int max_divisor_bits; /* ceil(log2(max_divisor)) */
96 double total_fraction; /* of squares */
97 mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
98 mpz_t pp_norm; /* pp shifted so NUMB high bit set */
99 mpz_t pp_inverted; /* invert_limb style inverse */
100 mpz_t mod_mask; /* 2^mod_bits-1 */
101 char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
103 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
108 struct rawfactor_t *rawfactor;
111 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
114 mpz_t inverse; /* 1/divisor mod 2^mod_bits */
115 mpz_t mask; /* indicating squares mod divisor */
116 double fraction; /* squares/total */
118 struct factor_t *factor;
119 int nfactor; /* entries in use in factor array */
120 int factor_alloc; /* entries allocated to factor array */
124 f_cmp_divisor (const void *parg, const void *qarg)
126 const struct factor_t *p, *q;
129 if (p->divisor > q->divisor)
131 else if (p->divisor < q->divisor)
138 f_cmp_fraction (const void *parg, const void *qarg)
140 const struct factor_t *p, *q;
143 if (p->fraction > q->fraction)
145 else if (p->fraction < q->fraction)
151 /* Remove array[idx] by copying the remainder down, and adjust narray
153 #define COLLAPSE_ELEMENT(array, idx, narray) \
155 memmove (&(array)[idx], \
157 ((narray)-((idx)+1)) * sizeof (array[0])); \
162 /* return n*2^p mod m */
164 mul_2exp_mod (int n, int p, int m)
167 for (i = 0; i < p; i++)
172 /* return -n mod m */
174 neg_mod (int n, int m)
176 assert (n >= 0 && n < m);
177 return (n == 0 ? 0 : m-n);
180 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
181 "-(idx<<mod_bits)" can be a square modulo m. */
183 square_mask (mpz_t mask, int m)
187 p = mul_2exp_mod (1, mod_bits, m);
190 mpz_set_ui (mask, 0L);
191 for (i = 0; i < m; i++)
195 mpz_setbit (mask, (unsigned long) idx);
200 generate_sq_res_0x100 (int limb_bits)
204 nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
205 sq_res_0x100 = xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
207 for (i = 0; i < nsq_res_0x100; i++)
208 mpz_init_set_ui (sq_res_0x100[i], 0L);
210 for (i = 0; i < 0x100; i++)
212 res = (i * i) % 0x100;
213 mpz_setbit (sq_res_0x100[res / limb_bits],
214 (unsigned long) (res % limb_bits));
217 sq_res_0x100_num = 0;
218 for (i = 0; i < nsq_res_0x100; i++)
219 sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
220 sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
224 generate_mod (int limb_bits, int nail_bits)
226 int numb_bits = limb_bits - nail_bits;
229 mpz_init_set_ui (pp, 0L);
230 mpz_init_set_ui (pp_norm, 0L);
231 mpz_init_set_ui (pp_inverted, 0L);
233 /* no more than limb_bits many factors in a one limb modulus (and of
234 course in reality nothing like that many) */
235 factor_alloc = limb_bits;
236 factor = xmalloc (factor_alloc * sizeof (*factor));
237 rawfactor = xmalloc (factor_alloc * sizeof (*rawfactor));
239 if (numb_bits % 4 != 0)
241 strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
245 max_divisor = 2*limb_bits;
246 max_divisor_bits = log2_ceil (max_divisor);
248 if (numb_bits / 4 < max_divisor_bits)
250 /* Wind back to one limb worth of max_divisor, if that will let us use
252 max_divisor = limb_bits;
253 max_divisor_bits = log2_ceil (max_divisor);
255 if (numb_bits / 4 < max_divisor_bits)
257 strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
263 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
267 mod34_bits = (numb_bits / 4) * 3;
269 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
270 the mod34_bits mark, adding the two halves for a remainder of at most
271 mod34_bits+1 many bits */
272 mod_bits = mod34_bits + 1;
274 mpz_init_set_ui (m, 1L);
275 mpz_mul_2exp (m, m, mod34_bits);
276 mpz_sub_ui (m, m, 1L);
281 for (i = 3; i <= max_divisor; i++)
286 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
287 if (mpz_sgn (r) != 0)
290 /* if a repeated prime is found it's used as an i^n in one factor */
295 if (divisor > max_divisor / i)
299 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
301 while (mpz_sgn (r) == 0);
303 assert (nrawfactor < factor_alloc);
304 rawfactor[nrawfactor].divisor = i;
305 rawfactor[nrawfactor].multiplicity = multiplicity;
318 sprintf (mod34_excuse, "only %d small factor%s",
319 nrawfactor, nrawfactor == 1 ? "" : "s");
322 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
323 tried with just one */
324 max_divisor = 2*limb_bits;
325 max_divisor_bits = log2_ceil (max_divisor);
329 mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
331 /* one copy of each small prime */
333 for (i = 3; i <= max_divisor; i++)
338 mpz_mul_ui (new_pp, pp, (unsigned long) i);
339 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
341 mpz_set (pp, new_pp);
343 assert (nrawfactor < factor_alloc);
344 rawfactor[nrawfactor].divisor = i;
345 rawfactor[nrawfactor].multiplicity = 1;
349 /* Plus an extra copy of one or more of the primes selected, if that
350 still fits in max_divisor and the total in mod_bits. Usually only
351 3 or 5 will be candidates */
352 for (i = nrawfactor-1; i >= 0; i--)
354 if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
356 mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
357 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
359 mpz_set (pp, new_pp);
361 rawfactor[i].multiplicity++;
364 mod_bits = mpz_sizeinbase (pp, 2);
366 mpz_set (pp_norm, pp);
367 while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
368 mpz_add (pp_norm, pp_norm, pp_norm);
370 mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
375 /* start the factor array */
376 for (i = 0; i < nrawfactor; i++)
379 assert (nfactor < factor_alloc);
380 factor[nfactor].divisor = 1;
381 for (j = 0; j < rawfactor[i].multiplicity; j++)
382 factor[nfactor].divisor *= rawfactor[i].divisor;
387 /* Combine entries in the factor array. Combine the smallest entry with
388 the biggest one that will fit with it (ie. under max_divisor), then
389 repeat that with the new smallest entry. */
390 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
391 for (i = nfactor-1; i >= 1; i--)
393 if (factor[i].divisor <= max_divisor / factor[0].divisor)
395 factor[0].divisor *= factor[i].divisor;
396 COLLAPSE_ELEMENT (factor, i, nfactor);
401 total_fraction = 1.0;
402 for (i = 0; i < nfactor; i++)
404 mpz_init (factor[i].inverse);
405 mpz_invert_ui_2exp (factor[i].inverse,
406 (unsigned long) factor[i].divisor,
407 (unsigned long) mod_bits);
409 mpz_init (factor[i].mask);
410 square_mask (factor[i].mask, factor[i].divisor);
412 /* fraction of possible squares */
413 factor[i].fraction = (double) mpz_popcount (factor[i].mask)
416 /* total fraction of possible squares */
417 total_fraction *= factor[i].fraction;
420 /* best tests first (ie. smallest fraction) */
421 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
425 print (int limb_bits, int nail_bits)
430 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
433 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
434 limb_bits, nail_bits);
435 printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
436 limb_bits, nail_bits);
440 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
441 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
442 (1.0 - sq_res_0x100_fraction) * 100.0,
443 0x100 - sq_res_0x100_num);
444 printf ("static const mp_limb_t\n");
445 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
446 for (i = 0; i < nsq_res_0x100; i++)
448 printf (" CNST_LIMB(0x");
449 mpz_out_str (stdout, 16, sq_res_0x100[i]);
455 if (mpz_sgn (pp) != 0)
457 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
458 printf ("/* PERFSQR_PP = ");
461 printf ("/* 2^%d-1 = ", mod34_bits);
462 for (i = 0; i < nrawfactor; i++)
466 printf ("%d", rawfactor[i].divisor);
467 if (rawfactor[i].multiplicity != 1)
468 printf ("^%d", rawfactor[i].multiplicity);
470 printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
472 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
473 if (mpz_sgn (pp) != 0)
475 printf ("#define PERFSQR_PP CNST_LIMB(0x");
476 mpz_out_str (stdout, 16, pp);
478 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
479 mpz_out_str (stdout, 16, pp_norm);
481 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
482 mpz_out_str (stdout, 16, pp_inverted);
490 printf ("/* This test identifies %.2f%% as non-squares. */\n",
491 (1.0 - total_fraction) * 100.0);
492 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
493 printf (" do { \\\n");
494 printf (" mp_limb_t r; \\\n");
495 if (mpz_sgn (pp) != 0)
496 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
498 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
500 for (i = 0; i < nfactor; i++)
503 printf (" /* %5.2f%% */ \\\n",
504 (1.0 - factor[i].fraction) * 100.0);
506 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
507 factor[i].divisor <= limb_bits ? 1 : 2,
509 mpz_out_str (stdout, 16, factor[i].inverse);
511 printf (" CNST_LIMB(0x");
513 if ( factor[i].divisor <= limb_bits)
515 mpz_out_str (stdout, 16, factor[i].mask);
519 mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
520 mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
521 mpz_out_str (stdout, 16, mhi);
522 printf ("), CNST_LIMB(0x");
523 mpz_out_str (stdout, 16, mlo);
528 printf (" } while (0)\n");
531 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
532 (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
535 printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
536 printf ("#define PERFSQR_DIVISORS { 256,");
537 for (i = 0; i < nfactor; i++)
538 printf (" %d,", factor[i].divisor);
547 main (int argc, char *argv[])
549 int limb_bits, nail_bits;
553 fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
557 limb_bits = atoi (argv[1]);
558 nail_bits = atoi (argv[2]);
562 || nail_bits >= limb_bits)
564 fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
565 limb_bits, nail_bits);
569 generate_sq_res_0x100 (limb_bits);
570 generate_mod (limb_bits, nail_bits);
572 print (limb_bits, nail_bits);