1 /* Generate perfect square testing data.
3 Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 MA 02110-1301, USA. */
28 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
29 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
30 residues and non-residues modulo small factors of that modulus.
32 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
33 function exists specifically because 2^24-1 and 2^48-1 have nice sets of
34 prime factors. For other limb sizes it's considered, but if it doesn't
35 have good factors then mpn_mod_1 will be used instead.
37 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
38 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
39 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
40 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
41 calculation within a single limb.
43 In either case primes can be combined to make divisors. The table data
44 then effectively indicates remainders which are quadratic residues mod
45 all the primes. This sort of combining reduces the number of steps
46 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
47 Nothing is gained or lost in terms of detections, the same total fraction
48 of non-residues will be identified.
50 Nothing particularly sophisticated is attempted for combining factors to
51 make divisors. This is probably a kind of knapsack problem so it'd be
52 too hard to attempt anything completely general. For the usual 32 and 64
53 bit limbs we get a good enough result just pairing the biggest and
54 smallest which fit together, repeatedly.
56 Another aim is to get powerful combinations, ie. divisors which identify
57 biggest fraction of non-residues, and have those run first. Again for
58 the usual 32 and 64 bits it seems good enough just to pair for big
59 divisors then sort according to the resulting fraction of non-residues
62 Also in this program, a table sq_res_0x100 of residues modulo 256 is
63 generated. This simply fills bits into limbs of the appropriate
64 build-time GMP_LIMB_BITS each.
69 /* Normally we aren't using const in gen*.c programs, so as not to have to
70 bother figuring out if it works, but using it with f_cmp_divisor and
71 f_cmp_fraction avoids warnings from the qsort calls. */
73 /* Same tests as gmp.h. */
74 #if defined (__STDC__) \
75 || defined (__cplusplus) \
78 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
79 || defined (_MSC_VER) \
89 mpz_t *sq_res_0x100; /* table of limbs */
90 int nsq_res_0x100; /* elements in sq_res_0x100 array */
91 int sq_res_0x100_num; /* squares in sq_res_0x100 */
92 double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
94 int mod34_bits; /* 3*GMP_NUMB_BITS/4 */
95 int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
96 int max_divisor; /* all divisors <= max_divisor */
97 int max_divisor_bits; /* ceil(log2(max_divisor)) */
98 double total_fraction; /* of squares */
99 mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
100 mpz_t pp_norm; /* pp shifted so NUMB high bit set */
101 mpz_t pp_inverted; /* invert_limb style inverse */
102 mpz_t mod_mask; /* 2^mod_bits-1 */
103 char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
105 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
110 struct rawfactor_t *rawfactor;
113 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
116 mpz_t inverse; /* 1/divisor mod 2^mod_bits */
117 mpz_t mask; /* indicating squares mod divisor */
118 double fraction; /* squares/total */
120 struct factor_t *factor;
121 int nfactor; /* entries in use in factor array */
122 int factor_alloc; /* entries allocated to factor array */
126 f_cmp_divisor (const void *parg, const void *qarg)
128 const struct factor_t *p, *q;
131 if (p->divisor > q->divisor)
133 else if (p->divisor < q->divisor)
140 f_cmp_fraction (const void *parg, const void *qarg)
142 const struct factor_t *p, *q;
145 if (p->fraction > q->fraction)
147 else if (p->fraction < q->fraction)
153 /* Remove array[idx] by copying the remainder down, and adjust narray
155 #define COLLAPSE_ELEMENT(array, idx, narray) \
157 mem_copyi ((char *) &(array)[idx], \
158 (char *) &(array)[idx+1], \
159 ((narray)-((idx)+1)) * sizeof (array[0])); \
164 /* return n*2^p mod m */
166 mul_2exp_mod (int n, int p, int m)
169 for (i = 0; i < p; i++)
174 /* return -n mod m */
176 neg_mod (int n, int m)
178 ASSERT (n >= 0 && n < m);
179 return (n == 0 ? 0 : m-n);
182 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
183 "-(idx<<mod_bits)" can be a square modulo m. */
185 square_mask (mpz_t mask, int m)
189 p = mul_2exp_mod (1, mod_bits, m);
192 mpz_set_ui (mask, 0L);
193 for (i = 0; i < m; i++)
197 mpz_setbit (mask, (unsigned long) idx);
202 generate_sq_res_0x100 (int limb_bits)
206 nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
207 sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
209 for (i = 0; i < nsq_res_0x100; i++)
210 mpz_init_set_ui (sq_res_0x100[i], 0L);
212 for (i = 0; i < 0x100; i++)
214 res = (i * i) % 0x100;
215 mpz_setbit (sq_res_0x100[res / limb_bits],
216 (unsigned long) (res % limb_bits));
219 sq_res_0x100_num = 0;
220 for (i = 0; i < nsq_res_0x100; i++)
221 sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
222 sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
226 generate_mod (int limb_bits, int nail_bits)
228 int numb_bits = limb_bits - nail_bits;
231 mpz_init_set_ui (pp, 0L);
232 mpz_init_set_ui (pp_norm, 0L);
233 mpz_init_set_ui (pp_inverted, 0L);
235 /* no more than limb_bits many factors in a one limb modulus (and of
236 course in reality nothing like that many) */
237 factor_alloc = limb_bits;
238 factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
239 rawfactor = (struct rawfactor_t *)
240 xmalloc (factor_alloc * sizeof (*rawfactor));
242 if (numb_bits % 4 != 0)
244 strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
248 max_divisor = 2*limb_bits;
249 max_divisor_bits = log2_ceil (max_divisor);
251 if (numb_bits / 4 < max_divisor_bits)
253 /* Wind back to one limb worth of max_divisor, if that will let us use
255 max_divisor = limb_bits;
256 max_divisor_bits = log2_ceil (max_divisor);
258 if (numb_bits / 4 < max_divisor_bits)
260 strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
266 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
270 mod34_bits = (numb_bits / 4) * 3;
272 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
273 the mod34_bits mark, adding the two halves for a remainder of at most
274 mod34_bits+1 many bits */
275 mod_bits = mod34_bits + 1;
277 mpz_init_set_ui (m, 1L);
278 mpz_mul_2exp (m, m, mod34_bits);
279 mpz_sub_ui (m, m, 1L);
284 for (i = 3; i <= max_divisor; i++)
289 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
290 if (mpz_sgn (r) != 0)
293 /* if a repeated prime is found it's used as an i^n in one factor */
298 if (divisor > max_divisor / i)
302 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
304 while (mpz_sgn (r) == 0);
306 ASSERT (nrawfactor < factor_alloc);
307 rawfactor[nrawfactor].divisor = i;
308 rawfactor[nrawfactor].multiplicity = multiplicity;
321 sprintf (mod34_excuse, "only %d small factor%s",
322 nrawfactor, nrawfactor == 1 ? "" : "s");
325 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
326 tried with just one */
327 max_divisor = 2*limb_bits;
328 max_divisor_bits = log2_ceil (max_divisor);
332 mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
334 /* one copy of each small prime */
336 for (i = 3; i <= max_divisor; i++)
341 mpz_mul_ui (new_pp, pp, (unsigned long) i);
342 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
344 mpz_set (pp, new_pp);
346 ASSERT (nrawfactor < factor_alloc);
347 rawfactor[nrawfactor].divisor = i;
348 rawfactor[nrawfactor].multiplicity = 1;
352 /* Plus an extra copy of one or more of the primes selected, if that
353 still fits in max_divisor and the total in mod_bits. Usually only
354 3 or 5 will be candidates */
355 for (i = nrawfactor-1; i >= 0; i--)
357 if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
359 mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
360 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
362 mpz_set (pp, new_pp);
364 rawfactor[i].multiplicity++;
367 mod_bits = mpz_sizeinbase (pp, 2);
369 mpz_set (pp_norm, pp);
370 while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
371 mpz_add (pp_norm, pp_norm, pp_norm);
373 mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
378 /* start the factor array */
379 for (i = 0; i < nrawfactor; i++)
382 ASSERT (nfactor < factor_alloc);
383 factor[nfactor].divisor = 1;
384 for (j = 0; j < rawfactor[i].multiplicity; j++)
385 factor[nfactor].divisor *= rawfactor[i].divisor;
390 /* Combine entries in the factor array. Combine the smallest entry with
391 the biggest one that will fit with it (ie. under max_divisor), then
392 repeat that with the new smallest entry. */
393 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
394 for (i = nfactor-1; i >= 1; i--)
396 if (factor[i].divisor <= max_divisor / factor[0].divisor)
398 factor[0].divisor *= factor[i].divisor;
399 COLLAPSE_ELEMENT (factor, i, nfactor);
404 total_fraction = 1.0;
405 for (i = 0; i < nfactor; i++)
407 mpz_init (factor[i].inverse);
408 mpz_invert_ui_2exp (factor[i].inverse,
409 (unsigned long) factor[i].divisor,
410 (unsigned long) mod_bits);
412 mpz_init (factor[i].mask);
413 square_mask (factor[i].mask, factor[i].divisor);
415 /* fraction of possible squares */
416 factor[i].fraction = (double) mpz_popcount (factor[i].mask)
419 /* total fraction of possible squares */
420 total_fraction *= factor[i].fraction;
423 /* best tests first (ie. smallest fraction) */
424 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
428 print (int limb_bits, int nail_bits)
433 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
436 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
437 limb_bits, nail_bits);
438 printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
439 limb_bits, nail_bits);
443 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
444 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
445 (1.0 - sq_res_0x100_fraction) * 100.0,
446 0x100 - sq_res_0x100_num);
447 printf ("static const mp_limb_t\n");
448 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
449 for (i = 0; i < nsq_res_0x100; i++)
451 printf (" CNST_LIMB(0x");
452 mpz_out_str (stdout, 16, sq_res_0x100[i]);
458 if (mpz_sgn (pp) != 0)
460 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
461 printf ("/* PERFSQR_PP = ");
464 printf ("/* 2^%d-1 = ", mod34_bits);
465 for (i = 0; i < nrawfactor; i++)
469 printf ("%d", rawfactor[i].divisor);
470 if (rawfactor[i].multiplicity != 1)
471 printf ("^%d", rawfactor[i].multiplicity);
473 printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
475 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
476 if (mpz_sgn (pp) != 0)
478 printf ("#define PERFSQR_PP CNST_LIMB(0x");
479 mpz_out_str (stdout, 16, pp);
481 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
482 mpz_out_str (stdout, 16, pp_norm);
484 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
485 mpz_out_str (stdout, 16, pp_inverted);
493 printf ("/* This test identifies %.2f%% as non-squares. */\n",
494 (1.0 - total_fraction) * 100.0);
495 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
496 printf (" do { \\\n");
497 printf (" mp_limb_t r; \\\n");
498 if (mpz_sgn (pp) != 0)
499 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
501 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
503 for (i = 0; i < nfactor; i++)
506 printf (" /* %5.2f%% */ \\\n",
507 (1.0 - factor[i].fraction) * 100.0);
509 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
510 factor[i].divisor <= limb_bits ? 1 : 2,
512 mpz_out_str (stdout, 16, factor[i].inverse);
514 printf (" CNST_LIMB(0x");
516 if ( factor[i].divisor <= limb_bits)
518 mpz_out_str (stdout, 16, factor[i].mask);
522 mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
523 mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
524 mpz_out_str (stdout, 16, mhi);
525 printf ("), CNST_LIMB(0x");
526 mpz_out_str (stdout, 16, mlo);
531 printf (" } while (0)\n");
534 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
535 (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
538 printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
539 printf ("#define PERFSQR_DIVISORS { 256,");
540 for (i = 0; i < nfactor; i++)
541 printf (" %d,", factor[i].divisor);
550 main (int argc, char *argv[])
552 int limb_bits, nail_bits;
556 fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
560 limb_bits = atoi (argv[1]);
561 nail_bits = atoi (argv[2]);
565 || nail_bits >= limb_bits)
567 fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
568 limb_bits, nail_bits);
572 generate_sq_res_0x100 (limb_bits);
573 generate_mod (limb_bits, nail_bits);
575 print (limb_bits, nail_bits);