1 /* Run some tests on various mpn routines.
3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
6 Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 /* Usage: try [options] <function>...
27 For example, "./try mpn_add_n" to run tests of that function.
29 Combinations of alignments and overlaps are tested, with redzones above
30 or below the destinations, and with the sources write-protected.
32 The number of tests performed becomes ridiculously large with all the
33 combinations, and for that reason this can't be a part of a "make check",
34 it's meant only for development. The code isn't very pretty either.
36 During development it can help to disable the redzones, since seeing the
37 rest of the destination written can show where the wrong part is, or if
38 the dst pointers are off by 1 or whatever. The magic DEADVAL initial
39 fill (see below) will show locations never written.
41 The -s option can be used to test only certain size operands, which is
42 useful if some new code doesn't yet support say sizes less than the
43 unrolling, or whatever.
45 When a problem occurs it'll of course be necessary to run the program
46 under gdb to find out quite where, how and why it's going wrong. Disable
47 the spinner with the -W option when doing this, or single stepping won't
48 work. Using the "-1" option to run with simple data can be useful.
50 New functions to test can be added in try_array[]. If a new TYPE is
51 required then add it to the existing constants, set up its parameters in
52 param_init(), and add it to the call() function. Extra parameter fields
53 can be added if necessary, or further interpretations given to existing
59 This program is not designed for use on Cray vector systems under Unicos,
60 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems
61 don't really have pages or mprotect. We could arrange to run the tests
62 without the redzones, but we haven't bothered currently.
67 umul_ppmm support is not very good, lots of source data is generated
68 whereas only two limbs are needed.
70 Make a little scheme for interpreting the "SIZE" selections uniformly.
72 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
73 source limbs. Possibly increase the default repetitions in that case.
75 Automatically detect gdb and disable the spinner (use -W for now).
77 Make a way to re-run a failing case in the debugger. Have an option to
78 snapshot each test case before it's run so the data is available if a
79 segv occurs. (This should be more reliable than the current print_all()
80 in the signal handler.)
82 When alignment means a dst isn't hard against the redzone, check the
83 space in between remains unchanged.
85 When a source overlaps a destination, don't run both s[i].high 0 and 1,
86 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
88 When partial overlaps aren't done, don't loop over source alignments
91 Try to make the looping code a bit less horrible. Right now it's pretty
92 hard to see what iterations are actually done.
94 Perhaps specific setups and loops for each style of function under test
95 would be clearer than a parameterized general loop. There's lots of
96 stuff common to all functions, but the exceptions get messy.
98 When there's no overlap, run with both src>dst and src<dst. A subtle
99 calling-conventions violation occurred in a P6 copy which depended on the
100 relative location of src and dst.
102 multiplier_N is more or less a third source region for the addmul_N
103 routines, and could be done with the redzoned region scheme.
108 /* always do assertion checking */
109 #define WANT_ASSERT 1
126 #include <sys/mman.h>
130 #include "gmp-impl.h"
131 #include "longlong.h"
135 #if !HAVE_DECL_OPTARG
137 extern int optind, opterr;
140 #if ! HAVE_DECL_SYS_NERR
144 #if ! HAVE_DECL_SYS_ERRLIST
145 extern char *sys_errlist[];
152 if (n < 0 || n >= sys_nerr)
153 return "errno out of range";
155 return sys_errlist[n];
159 /* Rumour has it some systems lack a define of PROT_NONE. */
164 /* Dummy defines for when mprotect doesn't exist. */
172 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
173 _SC_PAGE_SIZE instead. */
174 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
175 #define _SC_PAGESIZE _SC_PAGE_SIZE
187 #define DEFAULT_REPETITIONS 10
189 int option_repetitions = DEFAULT_REPETITIONS;
190 int option_spinner = 1;
191 int option_redzones = 1;
192 int option_firstsize = 0;
193 int option_lastsize = 500;
194 int option_firstsize2 = 0;
198 #define CARRY_RANDOMS 5
199 #define MULTIPLIER_RANDOMS 5
200 #define DIVISOR_RANDOMS 5
201 #define FRACTION_COUNT 4
203 int option_print = 0;
210 int option_data = DATA_TRAND;
214 #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
216 /* must be a multiple of the page size */
217 #define REDZONE_BYTES (pagesize * 16)
218 #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
221 #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
223 #if GMP_LIMB_BITS == 32
224 #define DEADVAL CNST_LIMB(0xDEADBEEF)
226 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
236 #define TRAP_NOWHERE 0
239 #define TRAP_SETUPS 3
240 int trap_location = TRAP_NOWHERE;
243 #define NUM_SOURCES 2
247 struct region_t region;
253 struct source_t s[NUM_SOURCES];
261 struct dest_t d[NUM_DESTS];
263 struct source_each_t {
268 struct region_t region;
277 mp_limb_t multiplier;
278 mp_limb_t multiplier_N[8];
282 struct dest_each_t d[NUM_DESTS];
283 struct source_each_t s[NUM_SOURCES];
287 struct each_t ref = { "Ref" };
288 struct each_t fun = { "Fun" };
290 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
292 void validate_fail __GMP_PROTO ((void));
300 typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
309 #define SIZE_ALLOW_ZERO 2
310 #define SIZE_1 3 /* 1 limb */
311 #define SIZE_2 4 /* 2 limbs */
312 #define SIZE_3 5 /* 3 limbs */
313 #define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
315 #define SIZE_PLUS_1 8
318 #define SIZE_DIFF_PLUS_1 11
319 #define SIZE_RETVAL 12
320 #define SIZE_CEIL_HALF 13
321 #define SIZE_GET_STR 14
322 #define SIZE_PLUS_MSIZE_SUB_1 15 /* size+msize-1 */
327 /* multiplier_N size in limbs */
334 #define CARRY_BIT 1 /* single bit 0 or 1 */
335 #define CARRY_3 2 /* 0, 1, 2 */
336 #define CARRY_4 3 /* 0 to 3 */
337 #define CARRY_LIMB 4 /* any limb value */
338 #define CARRY_DIVISOR 5 /* carry<divisor */
341 /* a fudge to tell the output when to print negatives */
347 #define DIVISOR_LIMB 1
348 #define DIVISOR_NORM 2
349 #define DIVISOR_ODD 3
352 #define DATA_NON_ZERO 1
354 #define DATA_SRC0_ODD 3
355 #define DATA_SRC0_HIGHBIT 4
356 #define DATA_SRC1_ODD 5
357 #define DATA_SRC1_HIGHBIT 6
358 #define DATA_MULTIPLE_DIVISOR 7
359 #define DATA_UDIV_QRNND 8
362 /* Default is allow full overlap. */
363 #define OVERLAP_NONE 1
364 #define OVERLAP_LOW_TO_HIGH 2
365 #define OVERLAP_HIGH_TO_LOW 3
366 #define OVERLAP_NOT_SRCS 4
367 #define OVERLAP_NOT_SRC2 8
371 const char *reference_name;
373 void (*validate) __GMP_PROTO ((void));
374 const char *validate_name;
381 validate_mod_34lsub1 (void)
383 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
385 mp_srcptr ptr = s[0].p;
387 mp_limb_t got, got_mod, want, want_mod;
392 got_mod = got % CNST_34LSUB1;
394 want = refmpn_mod_34lsub1 (ptr, size);
395 want_mod = want % CNST_34LSUB1;
397 if (got_mod != want_mod)
399 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
400 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
409 validate_divexact_1 (void)
411 mp_srcptr src = s[0].p;
412 mp_srcptr dst = fun.d[0].p;
418 mp_ptr tp = refmpn_malloc_limbs (size);
421 rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
424 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
427 if (! refmpn_equal_anynail (tp, dst, size))
429 printf ("Quotient a/d wrong\n");
430 mpn_trace ("fun ", dst, size);
431 mpn_trace ("want", tp, size);
443 validate_modexact_1c_odd (void)
445 mp_srcptr ptr = s[0].p;
446 mp_limb_t r = fun.retval;
450 ASSERT (divisor & 1);
452 if ((r & GMP_NAIL_MASK) != 0)
453 printf ("r has non-zero nail\n");
459 printf ("Don't have r < divisor\n");
463 else /* carry >= divisor */
465 if (! (r <= divisor))
467 printf ("Don't have r <= divisor\n");
473 mp_limb_t c = carry % divisor;
474 mp_ptr tp = refmpn_malloc_limbs (size+1);
477 for (k = size-1; k <= size; k++)
479 /* set {tp,size+1} to r*b^k + a - c */
480 refmpn_copyi (tp, ptr, size);
482 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
483 if (refmpn_sub_1 (tp, tp, size+1, c))
484 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
486 if (refmpn_mod_1 (tp, size+1, divisor) == 0)
489 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
501 validate_modexact_1_odd (void)
504 validate_modexact_1c_odd ();
509 validate_sqrtrem (void)
511 mp_srcptr orig_ptr = s[0].p;
512 mp_size_t orig_size = size;
513 mp_size_t root_size = (size+1)/2;
514 mp_srcptr root_ptr = fun.d[0].p;
515 mp_size_t rem_size = fun.retval;
516 mp_srcptr rem_ptr = fun.d[1].p;
517 mp_size_t prod_size = 2*root_size;
521 if (rem_size < 0 || rem_size > size)
523 printf ("Bad remainder size retval %ld\n", (long) rem_size);
527 p = refmpn_malloc_limbs (prod_size);
529 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
530 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
532 printf ("Remainder bigger than 2*root\n");
536 refmpn_sqr (p, root_ptr, root_size);
538 refmpn_add (p, p, prod_size, rem_ptr, rem_size);
539 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
541 printf ("root^2+rem != original\n");
542 mpn_trace ("prod", p, prod_size);
552 /* These types are indexes into the param[] array and are arbitrary so long
553 as they're all distinct and within the size of param[]. Renumber
554 whenever necessary or desired. */
558 #define TYPE_ADD_NC 3
561 #define TYPE_SUB_NC 6
564 #define TYPE_MUL_1C 8
567 #define TYPE_MUL_3 92
568 #define TYPE_MUL_4 93
570 #define TYPE_ADDMUL_1 10
571 #define TYPE_ADDMUL_1C 11
572 #define TYPE_SUBMUL_1 12
573 #define TYPE_SUBMUL_1C 13
575 #define TYPE_ADDMUL_2 14
576 #define TYPE_ADDMUL_3 15
577 #define TYPE_ADDMUL_4 16
578 #define TYPE_ADDMUL_5 17
579 #define TYPE_ADDMUL_6 18
580 #define TYPE_ADDMUL_7 19
581 #define TYPE_ADDMUL_8 20
583 #define TYPE_ADDSUB_N 21
584 #define TYPE_ADDSUB_NC 22
586 #define TYPE_RSHIFT 23
587 #define TYPE_LSHIFT 24
588 #define TYPE_LSHIFTC 25
591 #define TYPE_COPYI 27
592 #define TYPE_COPYD 28
595 #define TYPE_ADDLSH1_N 30
596 #define TYPE_ADDLSH2_N 48
597 #define TYPE_ADDLSH_N 49
598 #define TYPE_SUBLSH1_N 31
599 #define TYPE_SUBLSH_N 130
600 #define TYPE_RSBLSH1_N 34
601 #define TYPE_RSBLSH2_N 46
602 #define TYPE_RSBLSH_N 47
603 #define TYPE_RSH1ADD_N 32
604 #define TYPE_RSH1SUB_N 33
606 #define TYPE_MOD_1 35
607 #define TYPE_MOD_1C 36
608 #define TYPE_DIVMOD_1 37
609 #define TYPE_DIVMOD_1C 38
610 #define TYPE_DIVREM_1 39
611 #define TYPE_DIVREM_1C 40
612 #define TYPE_PREINV_DIVREM_1 41
613 #define TYPE_PREINV_MOD_1 42
614 #define TYPE_MOD_34LSUB1 43
615 #define TYPE_UDIV_QRNND 44
616 #define TYPE_UDIV_QRNND_R 45
618 #define TYPE_DIVEXACT_1 50
619 #define TYPE_DIVEXACT_BY3 51
620 #define TYPE_DIVEXACT_BY3C 52
621 #define TYPE_MODEXACT_1_ODD 53
622 #define TYPE_MODEXACT_1C_ODD 54
624 #define TYPE_INVERT 55
625 #define TYPE_BINVERT 56
628 #define TYPE_GCD_1 61
629 #define TYPE_GCD_FINDA 62
630 #define TYPE_MPZ_JACOBI 63
631 #define TYPE_MPZ_KRONECKER 64
632 #define TYPE_MPZ_KRONECKER_UI 65
633 #define TYPE_MPZ_KRONECKER_SI 66
634 #define TYPE_MPZ_UI_KRONECKER 67
635 #define TYPE_MPZ_SI_KRONECKER 68
637 #define TYPE_AND_N 70
638 #define TYPE_NAND_N 71
639 #define TYPE_ANDN_N 72
640 #define TYPE_IOR_N 73
641 #define TYPE_IORN_N 74
642 #define TYPE_NIOR_N 75
643 #define TYPE_XOR_N 76
644 #define TYPE_XNOR_N 77
646 #define TYPE_MUL_MN 80
647 #define TYPE_MUL_N 81
649 #define TYPE_UMUL_PPMM 83
650 #define TYPE_UMUL_PPMM_R 84
651 #define TYPE_MULLO_N 85
653 #define TYPE_SBPI1_DIV_QR 90
654 #define TYPE_TDIV_QR 91
656 #define TYPE_SQRTREM 100
657 #define TYPE_ZERO 101
658 #define TYPE_GET_STR 102
659 #define TYPE_POPCOUNT 103
660 #define TYPE_HAMDIST 104
662 #define TYPE_EXTRA 110
664 struct try_t param[150];
672 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
675 #define REFERENCE(fun) \
676 p->reference = (tryfun_t) fun; \
677 p->reference_name = #fun
678 #define VALIDATE(fun) \
680 p->validate_name = #fun
682 #define REFERENCE(fun) \
683 p->reference = (tryfun_t) fun; \
684 p->reference_name = "fun"
685 #define VALIDATE(fun) \
687 p->validate_name = "fun"
691 p = ¶m[TYPE_ADD_N];
696 REFERENCE (refmpn_add_n);
698 p = ¶m[TYPE_ADD_NC];
700 p->carry = CARRY_BIT;
701 REFERENCE (refmpn_add_nc);
703 p = ¶m[TYPE_SUB_N];
705 REFERENCE (refmpn_sub_n);
707 p = ¶m[TYPE_SUB_NC];
709 REFERENCE (refmpn_sub_nc);
711 p = ¶m[TYPE_ADD];
713 p->size = SIZE_ALLOW_ZERO;
715 REFERENCE (refmpn_add);
717 p = ¶m[TYPE_SUB];
719 REFERENCE (refmpn_sub);
722 p = ¶m[TYPE_MUL_1];
727 p->overlap = OVERLAP_LOW_TO_HIGH;
728 REFERENCE (refmpn_mul_1);
730 p = ¶m[TYPE_MUL_1C];
732 p->carry = CARRY_LIMB;
733 REFERENCE (refmpn_mul_1c);
736 p = ¶m[TYPE_MUL_2];
739 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
743 p->overlap = OVERLAP_NOT_SRC2;
744 REFERENCE (refmpn_mul_2);
746 p = ¶m[TYPE_MUL_3];
749 REFERENCE (refmpn_mul_3);
751 p = ¶m[TYPE_MUL_4];
754 REFERENCE (refmpn_mul_4);
757 p = ¶m[TYPE_ADDMUL_1];
762 p->dst0_from_src1 = 1;
763 REFERENCE (refmpn_addmul_1);
765 p = ¶m[TYPE_ADDMUL_1C];
766 COPY (TYPE_ADDMUL_1);
767 p->carry = CARRY_LIMB;
768 REFERENCE (refmpn_addmul_1c);
770 p = ¶m[TYPE_SUBMUL_1];
771 COPY (TYPE_ADDMUL_1);
772 REFERENCE (refmpn_submul_1);
774 p = ¶m[TYPE_SUBMUL_1C];
775 COPY (TYPE_ADDMUL_1C);
776 REFERENCE (refmpn_submul_1c);
779 p = ¶m[TYPE_ADDMUL_2];
782 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
786 p->dst0_from_src1 = 1;
787 p->overlap = OVERLAP_NOT_SRC2;
788 REFERENCE (refmpn_addmul_2);
790 p = ¶m[TYPE_ADDMUL_3];
791 COPY (TYPE_ADDMUL_2);
793 REFERENCE (refmpn_addmul_3);
795 p = ¶m[TYPE_ADDMUL_4];
796 COPY (TYPE_ADDMUL_2);
798 REFERENCE (refmpn_addmul_4);
800 p = ¶m[TYPE_ADDMUL_5];
801 COPY (TYPE_ADDMUL_2);
803 REFERENCE (refmpn_addmul_5);
805 p = ¶m[TYPE_ADDMUL_6];
806 COPY (TYPE_ADDMUL_2);
808 REFERENCE (refmpn_addmul_6);
810 p = ¶m[TYPE_ADDMUL_7];
811 COPY (TYPE_ADDMUL_2);
813 REFERENCE (refmpn_addmul_7);
815 p = ¶m[TYPE_ADDMUL_8];
816 COPY (TYPE_ADDMUL_2);
818 REFERENCE (refmpn_addmul_8);
821 p = ¶m[TYPE_AND_N];
825 REFERENCE (refmpn_and_n);
827 p = ¶m[TYPE_ANDN_N];
829 REFERENCE (refmpn_andn_n);
831 p = ¶m[TYPE_NAND_N];
833 REFERENCE (refmpn_nand_n);
835 p = ¶m[TYPE_IOR_N];
837 REFERENCE (refmpn_ior_n);
839 p = ¶m[TYPE_IORN_N];
841 REFERENCE (refmpn_iorn_n);
843 p = ¶m[TYPE_NIOR_N];
845 REFERENCE (refmpn_nior_n);
847 p = ¶m[TYPE_XOR_N];
849 REFERENCE (refmpn_xor_n);
851 p = ¶m[TYPE_XNOR_N];
853 REFERENCE (refmpn_xnor_n);
856 p = ¶m[TYPE_ADDSUB_N];
862 REFERENCE (refmpn_add_n_sub_n);
864 p = ¶m[TYPE_ADDSUB_NC];
865 COPY (TYPE_ADDSUB_N);
867 REFERENCE (refmpn_add_n_sub_nc);
870 p = ¶m[TYPE_COPY];
873 p->overlap = OVERLAP_NONE;
874 p->size = SIZE_ALLOW_ZERO;
875 REFERENCE (refmpn_copy);
877 p = ¶m[TYPE_COPYI];
880 p->overlap = OVERLAP_LOW_TO_HIGH;
881 p->size = SIZE_ALLOW_ZERO;
882 REFERENCE (refmpn_copyi);
884 p = ¶m[TYPE_COPYD];
887 p->overlap = OVERLAP_HIGH_TO_LOW;
888 p->size = SIZE_ALLOW_ZERO;
889 REFERENCE (refmpn_copyd);
891 p = ¶m[TYPE_COM];
894 REFERENCE (refmpn_com);
897 p = ¶m[TYPE_ADDLSH1_N];
899 REFERENCE (refmpn_addlsh1_n);
901 p = ¶m[TYPE_ADDLSH2_N];
903 REFERENCE (refmpn_addlsh2_n);
905 p = ¶m[TYPE_ADDLSH_N];
908 REFERENCE (refmpn_addlsh_n);
910 p = ¶m[TYPE_SUBLSH1_N];
912 REFERENCE (refmpn_sublsh1_n);
914 p = ¶m[TYPE_SUBLSH_N];
915 COPY (TYPE_ADDLSH_N);
916 REFERENCE (refmpn_sublsh_n);
918 p = ¶m[TYPE_RSBLSH1_N];
920 REFERENCE (refmpn_rsblsh1_n);
922 p = ¶m[TYPE_RSBLSH2_N];
924 REFERENCE (refmpn_rsblsh2_n);
926 p = ¶m[TYPE_RSBLSH_N];
927 COPY (TYPE_ADDLSH_N);
928 REFERENCE (refmpn_rsblsh_n);
930 p = ¶m[TYPE_RSH1ADD_N];
932 REFERENCE (refmpn_rsh1add_n);
934 p = ¶m[TYPE_RSH1SUB_N];
936 REFERENCE (refmpn_rsh1sub_n);
939 p = ¶m[TYPE_MOD_1];
942 p->size = SIZE_ALLOW_ZERO;
943 p->divisor = DIVISOR_LIMB;
944 REFERENCE (refmpn_mod_1);
946 p = ¶m[TYPE_MOD_1C];
948 p->carry = CARRY_DIVISOR;
949 REFERENCE (refmpn_mod_1c);
951 p = ¶m[TYPE_DIVMOD_1];
954 REFERENCE (refmpn_divmod_1);
956 p = ¶m[TYPE_DIVMOD_1C];
957 COPY (TYPE_DIVMOD_1);
958 p->carry = CARRY_DIVISOR;
959 REFERENCE (refmpn_divmod_1c);
961 p = ¶m[TYPE_DIVREM_1];
962 COPY (TYPE_DIVMOD_1);
963 p->size2 = SIZE_FRACTION;
964 p->dst_size[0] = SIZE_SUM;
965 REFERENCE (refmpn_divrem_1);
967 p = ¶m[TYPE_DIVREM_1C];
968 COPY (TYPE_DIVREM_1);
969 p->carry = CARRY_DIVISOR;
970 REFERENCE (refmpn_divrem_1c);
972 p = ¶m[TYPE_PREINV_DIVREM_1];
973 COPY (TYPE_DIVREM_1);
974 p->size = SIZE_YES; /* ie. no size==0 */
975 REFERENCE (refmpn_preinv_divrem_1);
977 p = ¶m[TYPE_PREINV_MOD_1];
980 p->divisor = DIVISOR_NORM;
981 REFERENCE (refmpn_preinv_mod_1);
983 p = ¶m[TYPE_MOD_34LSUB1];
986 VALIDATE (validate_mod_34lsub1);
988 p = ¶m[TYPE_UDIV_QRNND];
992 p->dst_size[0] = SIZE_1;
993 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
994 p->data = DATA_UDIV_QRNND;
995 p->overlap = OVERLAP_NONE;
996 REFERENCE (refmpn_udiv_qrnnd);
998 p = ¶m[TYPE_UDIV_QRNND_R];
999 COPY (TYPE_UDIV_QRNND);
1000 REFERENCE (refmpn_udiv_qrnnd_r);
1003 p = ¶m[TYPE_DIVEXACT_1];
1006 p->divisor = DIVISOR_LIMB;
1007 p->data = DATA_MULTIPLE_DIVISOR;
1008 VALIDATE (validate_divexact_1);
1009 REFERENCE (refmpn_divmod_1);
1012 p = ¶m[TYPE_DIVEXACT_BY3];
1016 REFERENCE (refmpn_divexact_by3);
1018 p = ¶m[TYPE_DIVEXACT_BY3C];
1019 COPY (TYPE_DIVEXACT_BY3);
1021 REFERENCE (refmpn_divexact_by3c);
1024 p = ¶m[TYPE_MODEXACT_1_ODD];
1027 p->divisor = DIVISOR_ODD;
1028 VALIDATE (validate_modexact_1_odd);
1030 p = ¶m[TYPE_MODEXACT_1C_ODD];
1031 COPY (TYPE_MODEXACT_1_ODD);
1032 p->carry = CARRY_LIMB;
1033 VALIDATE (validate_modexact_1c_odd);
1036 p = ¶m[TYPE_GCD_1];
1039 p->data = DATA_NON_ZERO;
1040 p->divisor = DIVISOR_LIMB;
1041 REFERENCE (refmpn_gcd_1);
1043 p = ¶m[TYPE_GCD];
1049 p->dst_size[0] = SIZE_RETVAL;
1050 p->overlap = OVERLAP_NOT_SRCS;
1052 REFERENCE (refmpn_gcd);
1055 p = ¶m[TYPE_MPZ_JACOBI];
1058 p->size = SIZE_ALLOW_ZERO;
1060 p->data = DATA_SRC1_ODD;
1064 REFERENCE (refmpz_jacobi);
1066 p = ¶m[TYPE_MPZ_KRONECKER];
1067 COPY (TYPE_MPZ_JACOBI);
1068 p->data = 0; /* clear inherited DATA_SRC1_ODD */
1069 REFERENCE (refmpz_kronecker);
1072 p = ¶m[TYPE_MPZ_KRONECKER_UI];
1075 p->size = SIZE_ALLOW_ZERO;
1077 p->carry = CARRY_BIT;
1078 REFERENCE (refmpz_kronecker_ui);
1080 p = ¶m[TYPE_MPZ_KRONECKER_SI];
1081 COPY (TYPE_MPZ_KRONECKER_UI);
1082 REFERENCE (refmpz_kronecker_si);
1084 p = ¶m[TYPE_MPZ_UI_KRONECKER];
1085 COPY (TYPE_MPZ_KRONECKER_UI);
1086 REFERENCE (refmpz_ui_kronecker);
1088 p = ¶m[TYPE_MPZ_SI_KRONECKER];
1089 COPY (TYPE_MPZ_KRONECKER_UI);
1090 REFERENCE (refmpz_si_kronecker);
1093 p = ¶m[TYPE_SQR];
1096 p->dst_size[0] = SIZE_SUM;
1097 p->overlap = OVERLAP_NONE;
1098 REFERENCE (refmpn_sqr);
1100 p = ¶m[TYPE_MUL_N];
1103 REFERENCE (refmpn_mul_n);
1105 p = ¶m[TYPE_MULLO_N];
1108 REFERENCE (refmpn_mullo_n);
1110 p = ¶m[TYPE_MUL_MN];
1113 REFERENCE (refmpn_mul_basecase);
1115 p = ¶m[TYPE_UMUL_PPMM];
1119 p->dst_size[0] = SIZE_1;
1120 p->overlap = OVERLAP_NONE;
1121 REFERENCE (refmpn_umul_ppmm);
1123 p = ¶m[TYPE_UMUL_PPMM_R];
1124 COPY (TYPE_UMUL_PPMM);
1125 REFERENCE (refmpn_umul_ppmm_r);
1128 p = ¶m[TYPE_RSHIFT];
1133 p->overlap = OVERLAP_LOW_TO_HIGH;
1134 REFERENCE (refmpn_rshift);
1136 p = ¶m[TYPE_LSHIFT];
1138 p->overlap = OVERLAP_HIGH_TO_LOW;
1139 REFERENCE (refmpn_lshift);
1141 p = ¶m[TYPE_LSHIFTC];
1143 p->overlap = OVERLAP_HIGH_TO_LOW;
1144 REFERENCE (refmpn_lshiftc);
1147 p = ¶m[TYPE_POPCOUNT];
1150 REFERENCE (refmpn_popcount);
1152 p = ¶m[TYPE_HAMDIST];
1153 COPY (TYPE_POPCOUNT);
1155 REFERENCE (refmpn_hamdist);
1158 p = ¶m[TYPE_SBPI1_DIV_QR];
1164 p->data = DATA_SRC1_HIGHBIT;
1166 p->dst_size[0] = SIZE_DIFF;
1167 p->overlap = OVERLAP_NONE;
1168 REFERENCE (refmpn_sb_div_qr);
1170 p = ¶m[TYPE_TDIV_QR];
1176 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1177 p->dst_size[1] = SIZE_SIZE2;
1178 p->overlap = OVERLAP_NONE;
1179 REFERENCE (refmpn_tdiv_qr);
1181 p = ¶m[TYPE_SQRTREM];
1186 p->dst_size[0] = SIZE_CEIL_HALF;
1187 p->dst_size[1] = SIZE_RETVAL;
1188 p->overlap = OVERLAP_NONE;
1189 VALIDATE (validate_sqrtrem);
1190 REFERENCE (refmpn_sqrtrem);
1192 p = ¶m[TYPE_ZERO];
1194 p->size = SIZE_ALLOW_ZERO;
1195 REFERENCE (refmpn_zero);
1197 p = ¶m[TYPE_GET_STR];
1200 p->size = SIZE_ALLOW_ZERO;
1203 p->dst_size[0] = SIZE_GET_STR;
1204 p->dst_bytes[0] = 1;
1205 p->overlap = OVERLAP_NONE;
1206 REFERENCE (refmpn_get_str);
1208 p = ¶m[TYPE_BINVERT];
1211 p->data = DATA_SRC0_ODD;
1212 p->overlap = OVERLAP_NONE;
1213 REFERENCE (refmpn_binvert);
1215 p = ¶m[TYPE_INVERT];
1218 p->data = DATA_SRC0_HIGHBIT;
1219 p->overlap = OVERLAP_NONE;
1220 REFERENCE (refmpn_invert);
1222 #ifdef EXTRA_PARAM_INIT
1228 /* The following are macros if there's no native versions, so wrap them in
1229 functions that can be in try_array[]. */
1232 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1233 { MPN_COPY (rp, sp, size); }
1236 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1237 { MPN_COPY_INCR (rp, sp, size); }
1240 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1241 { MPN_COPY_DECR (rp, sp, size); }
1244 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1245 { __GMPN_COPY (rp, sp, size); }
1247 #ifdef __GMPN_COPY_INCR
1249 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1250 { __GMPN_COPY_INCR (rp, sp, size); }
1254 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1255 { mpn_com (rp, sp, size); }
1258 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1259 { mpn_and_n (rp, s1, s2, size); }
1262 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1263 { mpn_andn_n (rp, s1, s2, size); }
1266 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1267 { mpn_nand_n (rp, s1, s2, size); }
1270 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1271 { mpn_ior_n (rp, s1, s2, size); }
1274 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1275 { mpn_iorn_n (rp, s1, s2, size); }
1278 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1279 { mpn_nior_n (rp, s1, s2, size); }
1282 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1283 { mpn_xor_n (rp, s1, s2, size); }
1286 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1287 { mpn_xnor_n (rp, s1, s2, size); }
1290 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1293 udiv_qrnnd (q, *remptr, n1, n0, d);
1298 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1300 return mpn_divexact_by3 (rp, sp, size);
1304 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1306 return mpn_modexact_1_odd (ptr, size, divisor);
1310 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1315 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1316 mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1320 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1325 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1326 mpn_toom2_sqr (dst, src, size, tspace);
1330 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1335 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1336 mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1340 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1345 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1346 mpn_toom3_sqr (dst, src, size, tspace);
1350 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1355 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1356 mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1360 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1365 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1366 mpn_toom4_sqr (dst, src, size, tspace);
1371 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1374 umul_ppmm (high, *lowptr, m1, m2);
1379 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1380 { MPN_ZERO (ptr, size); }
1391 #define TRY(fun) #fun, (tryfun_t) fun
1392 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1394 #define TRY(fun) "fun", (tryfun_t) fun
1395 #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1398 const struct choice_t choice_array[] = {
1399 { TRY(mpn_add), TYPE_ADD },
1400 { TRY(mpn_sub), TYPE_SUB },
1402 { TRY(mpn_add_n), TYPE_ADD_N },
1403 { TRY(mpn_sub_n), TYPE_SUB_N },
1405 #if HAVE_NATIVE_mpn_add_nc
1406 { TRY(mpn_add_nc), TYPE_ADD_NC },
1408 #if HAVE_NATIVE_mpn_sub_nc
1409 { TRY(mpn_sub_nc), TYPE_SUB_NC },
1412 #if HAVE_NATIVE_mpn_add_n_sub_n
1413 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N },
1415 #if HAVE_NATIVE_mpn_add_n_sub_nc
1416 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1419 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1420 { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1421 #if HAVE_NATIVE_mpn_addmul_1c
1422 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1424 #if HAVE_NATIVE_mpn_submul_1c
1425 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1428 #if HAVE_NATIVE_mpn_addmul_2
1429 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1431 #if HAVE_NATIVE_mpn_addmul_3
1432 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1434 #if HAVE_NATIVE_mpn_addmul_4
1435 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1437 #if HAVE_NATIVE_mpn_addmul_5
1438 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1440 #if HAVE_NATIVE_mpn_addmul_6
1441 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1443 #if HAVE_NATIVE_mpn_addmul_7
1444 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1446 #if HAVE_NATIVE_mpn_addmul_8
1447 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1450 { TRY_FUNFUN(mpn_com), TYPE_COM },
1452 { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1453 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1454 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1456 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1457 #ifdef __GMPN_COPY_INCR
1458 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1461 #if HAVE_NATIVE_mpn_copyi
1462 { TRY(mpn_copyi), TYPE_COPYI },
1464 #if HAVE_NATIVE_mpn_copyd
1465 { TRY(mpn_copyd), TYPE_COPYD },
1468 #if HAVE_NATIVE_mpn_addlsh1_n
1469 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1471 #if HAVE_NATIVE_mpn_addlsh2_n
1472 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1474 #if HAVE_NATIVE_mpn_addlsh_n
1475 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1477 #if HAVE_NATIVE_mpn_sublsh1_n
1478 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1480 #if HAVE_NATIVE_mpn_sublsh_n
1481 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1483 #if HAVE_NATIVE_mpn_rsblsh1_n
1484 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1486 #if HAVE_NATIVE_mpn_rsblsh2_n
1487 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1489 #if HAVE_NATIVE_mpn_rsblsh_n
1490 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1492 #if HAVE_NATIVE_mpn_rsh1add_n
1493 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1495 #if HAVE_NATIVE_mpn_rsh1sub_n
1496 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1499 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1500 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1501 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1502 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1503 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1504 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1505 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1506 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1508 { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1509 #if USE_PREINV_DIVREM_1
1510 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1512 { TRY(mpn_mod_1), TYPE_MOD_1 },
1513 #if USE_PREINV_MOD_1
1514 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1516 #if HAVE_NATIVE_mpn_divrem_1c
1517 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1519 #if HAVE_NATIVE_mpn_mod_1c
1520 { TRY(mpn_mod_1c), TYPE_MOD_1C },
1522 #if GMP_NUMB_BITS % 4 == 0
1523 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1526 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1527 #if HAVE_NATIVE_mpn_udiv_qrnnd
1528 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1530 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
1531 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1534 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1535 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1536 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1538 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1539 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1542 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1543 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1545 { TRY(mpn_mul_1), TYPE_MUL_1 },
1546 #if HAVE_NATIVE_mpn_mul_1c
1547 { TRY(mpn_mul_1c), TYPE_MUL_1C },
1549 #if HAVE_NATIVE_mpn_mul_2
1550 { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1552 #if HAVE_NATIVE_mpn_mul_3
1553 { TRY(mpn_mul_3), TYPE_MUL_3, 3 },
1555 #if HAVE_NATIVE_mpn_mul_4
1556 { TRY(mpn_mul_4), TYPE_MUL_4, 4 },
1559 { TRY(mpn_rshift), TYPE_RSHIFT },
1560 { TRY(mpn_lshift), TYPE_LSHIFT },
1561 { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1564 { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1565 { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1566 #if SQR_TOOM2_THRESHOLD > 0
1567 { TRY(mpn_sqr_basecase), TYPE_SQR },
1570 { TRY(mpn_mul), TYPE_MUL_MN },
1571 { TRY(mpn_mul_n), TYPE_MUL_N },
1572 { TRY(mpn_sqr), TYPE_SQR },
1574 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1575 #if HAVE_NATIVE_mpn_umul_ppmm
1576 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1578 #if HAVE_NATIVE_mpn_umul_ppmm_r
1579 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1582 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE },
1583 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE },
1584 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE },
1585 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE },
1586 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
1587 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
1589 { TRY(mpn_gcd_1), TYPE_GCD_1 },
1590 { TRY(mpn_gcd), TYPE_GCD },
1591 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
1592 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1593 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1594 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1595 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1597 { TRY(mpn_popcount), TYPE_POPCOUNT },
1598 { TRY(mpn_hamdist), TYPE_HAMDIST },
1600 { TRY(mpn_sqrtrem), TYPE_SQRTREM },
1602 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1604 { TRY(mpn_get_str), TYPE_GET_STR },
1606 { TRY(mpn_binvert), TYPE_BINVERT },
1607 { TRY(mpn_invert), TYPE_INVERT },
1609 #ifdef EXTRA_ROUTINES
1614 const struct choice_t *choice = NULL;
1618 mprotect_maybe (void *addr, size_t len, int prot)
1620 if (!option_redzones)
1624 if (mprotect (addr, len, prot) != 0)
1626 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
1627 addr, (unsigned) len, prot, strerror (errno));
1632 static int warned = 0;
1636 "mprotect not available, bounds testing not performed\n");
1643 /* round "a" up to a multiple of "m" */
1645 round_up_multiple (size_t a, size_t m)
1657 /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
1658 for instance HP-UX 10.
1660 mmap will almost certainly return a pointer already aligned to a page
1661 boundary, but it's easy enough to share the alignment handling with the
1665 malloc_region (struct region_t *r, mp_size_t n)
1670 ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
1672 n = round_up_multiple (n, PAGESIZE_LIMBS);
1675 nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
1677 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
1678 #define MAP_ANON MAP_ANONYMOUS
1681 #if HAVE_MMAP && defined (MAP_ANON)
1682 /* note must pass fd=-1 for MAP_ANON on BSD */
1683 p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
1684 if (p == (void *) -1)
1686 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
1687 (unsigned) nbytes, strerror (errno));
1691 p = (mp_ptr) malloc (nbytes);
1692 ASSERT_ALWAYS (p != NULL);
1695 p = align_pointer (p, pagesize);
1697 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
1701 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
1705 mprotect_region (const struct region_t *r, int prot)
1707 mprotect_maybe (r->ptr, r->size, prot);
1711 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
1713 mp_limb_t carry_array[] = {
1722 #define CARRY_COUNT \
1723 ((tr->carry == CARRY_BIT) ? 2 \
1724 : tr->carry == CARRY_3 ? 3 \
1725 : tr->carry == CARRY_4 ? 4 \
1726 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
1727 ? numberof(carry_array) + CARRY_RANDOMS \
1730 #define MPN_RANDOM_ALT(index,dst,size) \
1731 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
1733 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1735 #define CARRY_ITERATION \
1736 for (carry_index = 0; \
1737 (carry_index < numberof (carry_array) \
1738 ? (carry = carry_array[carry_index]) \
1739 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
1740 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
1741 carry_index < CARRY_COUNT; \
1745 mp_limb_t multiplier_array[] = {
1753 int multiplier_index;
1755 mp_limb_t divisor_array[] = {
1759 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
1760 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
1762 GMP_NUMB_HIGHBIT + 1,
1770 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1772 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
1774 (index < numberof (array) \
1775 ? (var = array[index]) \
1776 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
1780 #define MULTIPLIER_COUNT \
1782 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
1785 #define MULTIPLIER_ITERATION \
1786 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
1787 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
1789 #define DIVISOR_COUNT \
1791 ? numberof (divisor_array) + DIVISOR_RANDOMS \
1794 #define DIVISOR_ITERATION \
1795 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
1796 DIVISOR_RANDOMS, TRY_DIVISOR)
1799 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
1800 d[0] or d[1] respectively, -1 means a separate (write-protected)
1805 } overlap_array[] = {
1817 struct overlap_t *overlap, *overlap_limit;
1819 #define OVERLAP_COUNT \
1820 (tr->overlap & OVERLAP_NONE ? 1 \
1821 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
1822 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
1828 #define OVERLAP_ITERATION \
1829 for (overlap = &overlap_array[0], \
1830 overlap_limit = &overlap_array[OVERLAP_COUNT]; \
1831 overlap < overlap_limit; \
1837 #define T_RAND_COUNT 2
1841 t_random (mp_ptr ptr, mp_size_t n)
1846 switch (option_data) {
1849 case 0: refmpn_random (ptr, n); break;
1850 case 1: refmpn_random2 (ptr, n); break;
1856 static mp_limb_t counter = 0;
1858 for (i = 0; i < n; i++)
1863 refmpn_zero (ptr, n);
1866 refmpn_fill (ptr, n, GMP_NUMB_MAX);
1869 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
1870 inducing the q1_ff special case in the mul-by-inverse part of some
1871 versions of divrem_1 and mod_1. */
1872 refmpn_fill (ptr, n, (mp_limb_t) -1);
1881 #define T_RAND_ITERATION \
1882 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
1886 print_each (const struct each_t *e)
1890 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
1892 mpn_trace (" retval", &e->retval, 1);
1894 for (i = 0; i < NUM_DESTS; i++)
1898 if (tr->dst_bytes[i])
1899 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1901 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1902 printf (" located %p\n", (void *) (e->d[i].p));
1906 for (i = 0; i < NUM_SOURCES; i++)
1908 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p));
1918 printf ("size %ld\n", (long) size);
1920 printf ("size2 %ld\n", (long) size2);
1922 for (i = 0; i < NUM_DESTS; i++)
1923 if (d[i].size != size)
1924 printf ("d[%d].size %ld\n", i, (long) d[i].size);
1927 mpn_trace (" multiplier", &multiplier, 1);
1929 mpn_trace (" divisor", &divisor, 1);
1931 printf (" shift %lu\n", shift);
1933 mpn_trace (" carry", &carry, 1);
1935 mpn_trace (" multiplier_N", multiplier_N, tr->msize);
1937 for (i = 0; i < NUM_DESTS; i++)
1939 printf (" d[%d] %s, align %ld, size %ld\n",
1940 i, d[i].high ? "high" : "low",
1941 (long) d[i].align, (long) d[i].size);
1943 for (i = 0; i < NUM_SOURCES; i++)
1947 printf (" s[%d] %s, align %ld, ",
1948 i, s[i].high ? "high" : "low", (long) s[i].align);
1949 switch (overlap->s[i]) {
1951 printf ("no overlap\n");
1954 printf ("==d[%d]%s\n",
1956 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
1957 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
1961 printf (" s[%d]=", i);
1962 if (tr->carry_sign && (carry & (1 << i)))
1964 mpn_trace (NULL, s[i].p, SRC_SIZE(i));
1968 if (tr->dst0_from_src1)
1969 mpn_trace (" d[0]", s[1].region.ptr, size);
1982 if (tr->retval && ref.retval != fun.retval)
1984 gmp_printf ("Different return values (%Mu, %Mu)\n",
1985 ref.retval, fun.retval);
1989 for (i = 0; i < NUM_DESTS; i++)
1991 switch (tr->dst_size[i]) {
1994 d[i].size = ref.retval;
1999 for (i = 0; i < NUM_DESTS; i++)
2004 if (tr->dst_bytes[i])
2006 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2008 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2010 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2011 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2018 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2020 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2022 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2023 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2037 /* The functions are cast if the return value should be a long rather than
2038 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2039 might not be enough if some actual calling conventions checking is
2040 implemented on a long long limb system. */
2043 call (struct each_t *e, tryfun_t function)
2045 switch (choice->type) {
2048 e->retval = CALLING_CONVENTIONS (function)
2049 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2054 case TYPE_ADDLSH1_N:
2055 case TYPE_ADDLSH2_N:
2056 case TYPE_SUBLSH1_N:
2057 case TYPE_RSBLSH1_N:
2058 case TYPE_RSBLSH2_N:
2059 case TYPE_RSH1ADD_N:
2060 case TYPE_RSH1SUB_N:
2061 e->retval = CALLING_CONVENTIONS (function)
2062 (e->d[0].p, e->s[0].p, e->s[1].p, size);
2067 e->retval = CALLING_CONVENTIONS (function)
2068 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2072 e->retval = CALLING_CONVENTIONS (function)
2073 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2079 e->retval = CALLING_CONVENTIONS (function)
2080 (e->d[0].p, e->s[0].p, size, multiplier);
2083 case TYPE_ADDMUL_1C:
2084 case TYPE_SUBMUL_1C:
2085 e->retval = CALLING_CONVENTIONS (function)
2086 (e->d[0].p, e->s[0].p, size, multiplier, carry);
2094 e->retval = CALLING_CONVENTIONS (function)
2095 (e->d[0].p, e->s[0].p, size, multiplier_N);
2107 e->retval = CALLING_CONVENTIONS (function)
2108 (e->d[0].p, e->s[0].p, size, multiplier_N);
2119 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2123 e->retval = CALLING_CONVENTIONS (function)
2124 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2126 case TYPE_ADDSUB_NC:
2127 e->retval = CALLING_CONVENTIONS (function)
2128 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2135 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2139 case TYPE_DIVEXACT_BY3:
2140 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2142 case TYPE_DIVEXACT_BY3C:
2143 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2149 case TYPE_DIVEXACT_1:
2150 e->retval = CALLING_CONVENTIONS (function)
2151 (e->d[0].p, e->s[0].p, size, divisor);
2153 case TYPE_DIVMOD_1C:
2154 e->retval = CALLING_CONVENTIONS (function)
2155 (e->d[0].p, e->s[0].p, size, divisor, carry);
2158 e->retval = CALLING_CONVENTIONS (function)
2159 (e->d[0].p, size2, e->s[0].p, size, divisor);
2161 case TYPE_DIVREM_1C:
2162 e->retval = CALLING_CONVENTIONS (function)
2163 (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2165 case TYPE_PREINV_DIVREM_1:
2169 shift = refmpn_count_leading_zeros (divisor);
2170 dinv = refmpn_invert_limb (divisor << shift);
2171 e->retval = CALLING_CONVENTIONS (function)
2172 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2176 case TYPE_MODEXACT_1_ODD:
2177 e->retval = CALLING_CONVENTIONS (function)
2178 (e->s[0].p, size, divisor);
2181 case TYPE_MODEXACT_1C_ODD:
2182 e->retval = CALLING_CONVENTIONS (function)
2183 (e->s[0].p, size, divisor, carry);
2185 case TYPE_PREINV_MOD_1:
2186 e->retval = CALLING_CONVENTIONS (function)
2187 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2189 case TYPE_MOD_34LSUB1:
2190 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2193 case TYPE_UDIV_QRNND:
2194 e->retval = CALLING_CONVENTIONS (function)
2195 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2197 case TYPE_UDIV_QRNND_R:
2198 e->retval = CALLING_CONVENTIONS (function)
2199 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2202 case TYPE_SBPI1_DIV_QR:
2205 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2206 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2207 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2208 e->retval = CALLING_CONVENTIONS (function)
2209 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2210 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2215 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2216 e->s[0].p, size, e->s[1].p, size2);
2220 /* Must have a non-zero src, but this probably isn't the best way to do
2222 if (refmpn_zero_p (e->s[0].p, size))
2225 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2229 /* Sources are destroyed, so they're saved and replaced, but a general
2230 approach to this might be better. Note that it's still e->s[0].p and
2231 e->s[1].p that are passed, to get the desired alignments. */
2233 mp_ptr s0 = refmpn_malloc_limbs (size);
2234 mp_ptr s1 = refmpn_malloc_limbs (size2);
2235 refmpn_copyi (s0, e->s[0].p, size);
2236 refmpn_copyi (s1, e->s[1].p, size2);
2238 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2239 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2240 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2243 refmpn_copyi (e->s[0].p, s0, size);
2244 refmpn_copyi (e->s[1].p, s1, size2);
2250 case TYPE_GCD_FINDA:
2252 /* FIXME: do this with a flag */
2254 c[0] = e->s[0].p[0];
2255 c[0] += (c[0] == 0);
2256 c[1] = e->s[0].p[0];
2257 c[1] += (c[1] == 0);
2258 e->retval = CALLING_CONVENTIONS (function) (c);
2262 case TYPE_MPZ_JACOBI:
2263 case TYPE_MPZ_KRONECKER:
2266 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2267 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2268 e->retval = CALLING_CONVENTIONS (function) (a, b);
2271 case TYPE_MPZ_KRONECKER_UI:
2274 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2275 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2278 case TYPE_MPZ_KRONECKER_SI:
2281 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2282 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2285 case TYPE_MPZ_UI_KRONECKER:
2288 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2289 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2292 case TYPE_MPZ_SI_KRONECKER:
2295 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2296 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2301 CALLING_CONVENTIONS (function)
2302 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2306 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2309 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2312 case TYPE_UMUL_PPMM:
2313 e->retval = CALLING_CONVENTIONS (function)
2314 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2316 case TYPE_UMUL_PPMM_R:
2317 e->retval = CALLING_CONVENTIONS (function)
2318 (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2324 e->retval = CALLING_CONVENTIONS (function)
2325 (e->d[0].p, e->s[0].p, size, shift);
2329 e->retval = (* (unsigned long (*)(ANYARGS))
2330 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2333 e->retval = (* (unsigned long (*)(ANYARGS))
2334 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2338 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2339 (e->d[0].p, e->d[1].p, e->s[0].p, size);
2343 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2348 size_t sizeinbase, fill;
2350 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2351 ASSERT_ALWAYS (sizeinbase <= d[0].size);
2352 fill = d[0].size - sizeinbase;
2355 memset (e->d[0].p, 0xBA, fill);
2356 dst = (char *) e->d[0].p + fill;
2360 dst = (char *) e->d[0].p;
2361 memset (dst + sizeinbase, 0xBA, fill);
2365 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2370 refmpn_copy (e->d[1].p, e->s[0].p, size);
2371 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2374 refmpn_zero (e->d[1].p, size); /* clobbered or unused */
2383 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2384 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2393 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2394 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2404 printf ("Unknown routine type %d\n", choice->type);
2412 pointer_setup (struct each_t *e)
2416 for (i = 0; i < NUM_DESTS; i++)
2418 switch (tr->dst_size[i]) {
2420 case SIZE_RETVAL: /* will be adjusted later */
2437 case SIZE_PLUS_MSIZE_SUB_1:
2438 d[i].size = size + tr->msize - 1;
2443 d[i].size = size + size2;
2453 d[i].size = size - size2;
2456 case SIZE_DIFF_PLUS_1:
2457 d[i].size = size - size2 + 1;
2460 case SIZE_CEIL_HALF:
2461 d[i].size = (size+1)/2;
2466 mp_limb_t ff = GMP_NUMB_MAX;
2467 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2472 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2477 /* establish e->d[].p destinations */
2478 for (i = 0; i < NUM_DESTS; i++)
2480 mp_size_t offset = 0;
2482 /* possible room for overlapping sources */
2483 for (j = 0; j < numberof (overlap->s); j++)
2484 if (overlap->s[j] == i)
2485 offset = MAX (offset, s[j].align);
2489 if (tr->dst_bytes[i])
2491 e->d[i].p = (mp_ptr)
2492 ((char *) (e->d[i].region.ptr + e->d[i].region.size)
2493 - d[i].size - d[i].align);
2497 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2498 - d[i].size - d[i].align;
2499 if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2500 e->d[i].p -= offset;
2505 if (tr->dst_bytes[i])
2507 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
2511 e->d[i].p = e->d[i].region.ptr + d[i].align;
2512 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2513 e->d[i].p += offset;
2518 /* establish e->s[].p sources */
2519 for (i = 0; i < NUM_SOURCES; i++)
2521 int o = overlap->s[i];
2529 /* overlap with d[o] */
2530 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2531 e->s[i].p = e->d[o].p - s[i].align;
2532 else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2533 e->s[i].p = e->d[o].p + s[i].align;
2534 else if (tr->size2 == SIZE_FRACTION)
2535 e->s[i].p = e->d[o].p + size2;
2537 e->s[i].p = e->d[o].p;
2548 validate_fail (void)
2552 trap_location = TRAP_REF;
2553 call (&ref, tr->reference);
2554 trap_location = TRAP_NOWHERE;
2571 trap_location = TRAP_SETUPS;
2573 if (tr->divisor == DIVISOR_NORM)
2574 divisor |= GMP_NUMB_HIGHBIT;
2575 if (tr->divisor == DIVISOR_ODD)
2578 for (i = 0; i < NUM_SOURCES; i++)
2581 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
2583 s[i].p = s[i].region.ptr + s[i].align;
2586 pointer_setup (&ref);
2587 pointer_setup (&fun);
2589 ref.retval = 0x04152637;
2590 fun.retval = 0x8C9DAEBF;
2592 t_random (multiplier_N, tr->msize);
2594 for (i = 0; i < NUM_SOURCES; i++)
2599 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
2600 t_random (s[i].p, SRC_SIZE(i));
2604 if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
2608 case DATA_MULTIPLE_DIVISOR:
2609 /* same number of low zero bits as divisor */
2610 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
2611 refmpn_sub_1 (s[i].p, s[i].p, size,
2612 refmpn_mod_1 (s[i].p, size, divisor));
2616 /* s[1] no more bits than s[0] */
2617 if (i == 1 && size2 == size)
2618 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
2620 /* high limb non-zero */
2621 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
2637 case DATA_SRC1_HIGHBIT:
2641 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
2643 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2647 case DATA_SRC0_HIGHBIT:
2650 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2654 case DATA_UDIV_QRNND:
2655 s[i].p[1] %= divisor;
2659 mprotect_region (&s[i].region, PROT_READ);
2662 for (i = 0; i < NUM_DESTS; i++)
2667 if (tr->dst0_from_src1 && i==0)
2669 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
2670 mp_size_t fill = MAX (0, d[0].size - copy);
2671 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
2672 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
2673 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
2674 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
2676 else if (tr->dst_bytes[i])
2678 memset (ref.d[i].p, 0xBA, d[i].size);
2679 memset (fun.d[i].p, 0xBA, d[i].size);
2683 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
2684 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
2688 for (i = 0; i < NUM_SOURCES; i++)
2693 if (ref.s[i].p != s[i].p)
2695 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
2696 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
2703 if (tr->validate != NULL)
2705 trap_location = TRAP_FUN;
2706 call (&fun, choice->function);
2707 trap_location = TRAP_NOWHERE;
2709 if (! CALLING_CONVENTIONS_CHECK ())
2719 trap_location = TRAP_REF;
2720 call (&ref, tr->reference);
2721 trap_location = TRAP_FUN;
2722 call (&fun, choice->function);
2723 trap_location = TRAP_NOWHERE;
2725 if (! CALLING_CONVENTIONS_CHECK ())
2736 #define SIZE_ITERATION \
2737 for (size = MAX3 (option_firstsize, \
2739 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
2740 size <= option_lastsize; \
2743 #define SIZE2_FIRST \
2744 (tr->size2 == SIZE_2 ? 2 \
2745 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
2747 MAX (choice->minsize, (option_firstsize2 != 0 \
2748 ? option_firstsize2 : 1)) \
2751 #define SIZE2_LAST \
2752 (tr->size2 == SIZE_2 ? 2 \
2753 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
2754 : tr->size2 ? size \
2757 #define SIZE2_ITERATION \
2758 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
2760 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
2761 #define ALIGN_ITERATION(w,n,cond) \
2762 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
2764 #define HIGH_LIMIT(cond) ((cond) != 0)
2765 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
2766 #define HIGH_ITERATION(w,n,cond) \
2767 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
2769 #define SHIFT_LIMIT \
2770 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
2772 #define SHIFT_ITERATION \
2773 for (shift = 1; shift <= SHIFT_LIMIT; shift++)
2782 unsigned long total = 1;
2784 total *= option_repetitions;
2785 total *= option_lastsize;
2786 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
2787 else if (tr->size2) total *= (option_lastsize+1)/2;
2789 total *= SHIFT_LIMIT;
2790 total *= MULTIPLIER_COUNT;
2791 total *= DIVISOR_COUNT;
2792 total *= CARRY_COUNT;
2793 total *= T_RAND_COUNT;
2795 total *= HIGH_COUNT (tr->dst[0]);
2796 total *= HIGH_COUNT (tr->dst[1]);
2797 total *= HIGH_COUNT (tr->src[0]);
2798 total *= HIGH_COUNT (tr->src[1]);
2800 total *= ALIGN_COUNT (tr->dst[0]);
2801 total *= ALIGN_COUNT (tr->dst[1]);
2802 total *= ALIGN_COUNT (tr->src[0]);
2803 total *= ALIGN_COUNT (tr->src[1]);
2805 total *= OVERLAP_COUNT;
2807 printf ("%s %lu\n", choice->name, total);
2812 for (i = 0; i < option_repetitions; i++)
2817 MULTIPLIER_ITERATION
2819 CARRY_ITERATION /* must be after divisor */
2822 HIGH_ITERATION(d,0, tr->dst[0])
2823 HIGH_ITERATION(d,1, tr->dst[1])
2824 HIGH_ITERATION(s,0, tr->src[0])
2825 HIGH_ITERATION(s,1, tr->src[1])
2827 ALIGN_ITERATION(d,0, tr->dst[0])
2828 ALIGN_ITERATION(d,1, tr->dst[1])
2829 ALIGN_ITERATION(s,0, tr->src[0])
2830 ALIGN_ITERATION(s,1, tr->src[1])
2839 /* Usually print_all() doesn't show much, but it might give a hint as to
2840 where the function was up to when it died. */
2844 const char *name = "noname";
2847 case SIGILL: name = "SIGILL"; break;
2849 case SIGBUS: name = "SIGBUS"; break;
2851 case SIGSEGV: name = "SIGSEGV"; break;
2852 case SIGFPE: name = "SIGFPE"; break;
2855 printf ("\n\nSIGNAL TRAP: %s\n", name);
2857 switch (trap_location) {
2859 printf (" in reference function: %s\n", tr->reference_name);
2862 printf (" in test function: %s\n", choice->name);
2866 printf (" in parameter setups\n");
2870 printf (" somewhere unknown\n");
2880 #if HAVE_GETPAGESIZE
2881 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
2882 know _SC_PAGESIZE. */
2883 pagesize = getpagesize ();
2886 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
2888 /* According to the linux man page, sysconf doesn't set errno */
2889 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
2893 Error, error, cannot get page size
2897 printf ("pagesize is 0x%lX bytes\n", pagesize);
2899 signal (SIGILL, trap);
2901 signal (SIGBUS, trap);
2903 signal (SIGSEGV, trap);
2904 signal (SIGFPE, trap);
2909 for (i = 0; i < NUM_SOURCES; i++)
2911 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
2912 printf ("s[%d] %p to %p (0x%lX bytes)\n",
2913 i, (void *) (s[i].region.ptr),
2914 (void *) (s[i].region.ptr + s[i].region.size),
2915 (long) s[i].region.size * BYTES_PER_MP_LIMB);
2918 #define INIT_EACH(e,es) \
2919 for (i = 0; i < NUM_DESTS; i++) \
2921 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
2922 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
2923 es, i, (void *) (e.d[i].region.ptr), \
2924 (void *) (e.d[i].region.ptr + e.d[i].region.size), \
2925 (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \
2928 INIT_EACH(ref, "ref");
2929 INIT_EACH(fun, "fun");
2934 strmatch_wild (const char *pattern, const char *str)
2938 /* wildcard at start */
2939 if (pattern[0] == '*')
2942 plen = strlen (pattern);
2943 slen = strlen (str);
2945 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
2948 /* wildcard at end */
2949 plen = strlen (pattern);
2950 if (plen >= 1 && pattern[plen-1] == '*')
2951 return (memcmp (pattern, str, plen-1) == 0);
2954 return (strcmp (pattern, str) == 0);
2958 try_name (const char *name)
2963 for (i = 0; i < numberof (choice_array); i++)
2965 if (strmatch_wild (name, choice_array[i].name))
2967 choice = &choice_array[i];
2968 tr = ¶m[choice->type];
2976 printf ("%s unknown\n", name);
2983 usage (const char *prog)
2988 printf ("Usage: %s [options] function...\n", prog);
2989 printf (" -1 use limb data 1,2,3,etc\n");
2990 printf (" -9 use limb data all 0xFF..FFs\n");
2991 printf (" -a zeros use limb data all zeros\n");
2992 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
2993 printf (" -a 2fd use data 0x2FFF...FFFD\n");
2994 printf (" -p print each case tried (try this if seg faulting)\n");
2995 printf (" -R seed random numbers from time()\n");
2996 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
2997 printf (" -s size starting size to test\n");
2998 printf (" -S size2 starting size2 to test\n");
2999 printf (" -s s1-s2 range of sizes to test\n");
3000 printf (" -W don't show the spinner (use this in gdb)\n");
3001 printf (" -z disable mprotect() redzones\n");
3002 printf ("Default data is refmpn_random() and refmpn_random2().\n");
3004 printf ("Functions that can be tested:\n");
3006 for (i = 0; i < numberof (choice_array); i++)
3008 if (col + 1 + strlen (choice_array[i].name) > 79)
3013 printf (" %s", choice_array[i].name);
3014 col += 1 + strlen (choice_array[i].name);
3023 main (int argc, char *argv[])
3027 /* unbuffered output */
3028 setbuf (stdout, NULL);
3029 setbuf (stderr, NULL);
3031 /* default trace in hex, and in upper-case so can paste into bc */
3032 mp_trace_base = -16;
3037 unsigned long seed = 123;
3040 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3044 /* use limb data values 1, 2, 3, ... etc */
3045 option_data = DATA_SEQ;
3048 /* use limb data values 0xFFF...FFF always */
3049 option_data = DATA_FFS;
3052 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3053 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3054 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3055 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3058 fprintf (stderr, "unrecognised data option: %s\n", optarg);
3063 mp_trace_base = atoi (optarg);
3067 sscanf (optarg, "%lu", &seed);
3068 printf ("Re-seeding with %lu\n", seed);
3076 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3079 option_repetitions = atoi (optarg);
3084 option_firstsize = strtol (optarg, 0, 0);
3085 if ((p = strchr (optarg, '-')) != NULL)
3086 option_lastsize = strtol (p+1, 0, 0);
3090 /* -S <size> sets the starting size for the second of a two size
3091 routine (like mpn_mul_basecase) */
3092 option_firstsize2 = strtol (optarg, 0, 0);
3095 /* use this when running in the debugger */
3099 /* disable redzones */
3100 option_redzones = 0;
3108 gmp_randinit_default (__gmp_rands);
3109 __gmp_rands_initialized = 1;
3110 gmp_randseed_ui (__gmp_rands, seed);
3118 for (i = optind; i < argc; i++)