1 /* Time routines for speed measurments.
3 Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23 The code in this file implements the lowest level of time measuring,
24 simple one-time measuring of time between two points.
26 void speed_starttime (void)
27 double speed_endtime (void)
28 Call speed_starttime to start measuring, and then call speed_endtime
31 speed_endtime returns the time taken, in seconds. Or if the timebase
32 is in CPU cycles and the CPU frequency is unknown then speed_endtime
33 returns cycles. Applications can identify the cycles return by
34 checking for speed_cycletime (described below) equal to 1.0.
36 If some sort of temporary glitch occurs then speed_endtime returns
37 0.0. Currently this is for various cases where a negative time has
38 occurred. This unfortunately occurs with getrusage on some systems,
39 and with the hppa cycle counter on hpux.
41 double speed_cycletime
42 The time in seconds for each CPU cycle. For example on a 100 MHz CPU
45 If the CPU frequency is unknown, then speed_cycletime is either 0.0
46 or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's
47 1.0 when speed_endtime is returning cycles.
49 It may be noted that "speed_endtime() / speed_cycletime" gives a
50 measured time in cycles, irrespective of whether speed_endtime is
51 returning cycles or seconds. (Assuming cycles can be had, ie. it's
52 either cycles already or the cpu frequency is known. See also
53 speed_cycletime_need_cycles below.)
56 The unit of time measurement accuracy for the timing method in use.
57 This is in seconds or cycles, as per speed_endtime.
59 char speed_time_string[]
60 A null-terminated string describing the time method in use.
62 void speed_time_init (void)
63 Initialize time measuring. speed_starttime() does this
64 automatically, so it's only needed if an application wants to inspect
65 the above global variables before making a measurement.
68 The intended accuracy of time measurements. speed_measure() in
69 common.c for instance runs target routines with enough repetitions so
70 it takes at least "speed_unittime * speed_precision" (this expression
71 works for both cycles or seconds from speed_endtime).
73 A program can provide an option so the user to set speed_precision.
74 If speed_precision is zero when speed_time_init or speed_starttime
75 first run then it gets a default based on the measuring method
76 chosen. (More precision for higher accuracy methods.)
78 void speed_cycletime_need_seconds (void)
79 Call this to demand that speed_endtime will return seconds, and not
80 cycles. If only cycles are available then an error is printed and
83 void speed_cycletime_need_cycles (void)
84 Call this to demand that speed_cycletime is non-zero, so that
85 "speed_endtime() / speed_cycletime" will give times in cycles.
91 Various combinations of cycle counter, read_real_time(), getrusage(),
92 gettimeofday() and times() can arise, according to which are available
96 Allowing speed_endtime() to return either seconds or cycles is only a
97 slight complication and makes it possible for the speed program to do
98 some sensible things without demanding the CPU frequency. If seconds are
99 being measured then it can always print seconds, and if cycles are being
100 measured then it can always print them without needing to know how long
101 they are. Also the tune program doesn't care at all what the units are.
103 GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
104 fail. This will be needed if times in seconds are wanted but a cycle
105 counter is being used, or if times in cycles are wanted but getrusage or
106 another seconds based timer is in use.
108 If the measuring method uses a cycle counter but supplements it with
109 getrusage or the like, then knowing the CPU frequency is mandatory since
110 the code compares values from the two.
115 Solaris gethrtime() seems no more than a slow way to access the Sparc V9
116 cycle counter. gethrvtime() seems to be relevant only to light weight
117 processes, it doesn't for instance give nanosecond virtual time. So
118 neither of these are used.
123 getrusage_microseconds_p is fundamentally flawed, getrusage and
124 gettimeofday can have resolutions other than clock ticks or microseconds,
125 for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
130 The SGI hardware counter has 64 bits on some machines, which could be
131 used when available. But perhaps 32 bits is enough range, and then rely
132 on the getrusage supplement.
134 Maybe getrusage (or times) should be used as a supplement for any
135 wall-clock measuring method. Currently a wall clock with a good range
136 (eg. a 64-bit cycle counter) is used without a supplement.
138 On PowerPC the timebase registers could be used, but would have to do
139 something to find out the speed. On 6xx chips it's normally 1/4 bus
140 speed, on 4xx chips it's either that or an external clock. Measuring
141 against gettimeofday might be ok. */
152 #include <stdlib.h> /* for getenv() */
155 #include <fcntl.h> /* for open() */
159 #include <stdint.h> /* for uint64_t */
163 #include <unistd.h> /* for sysconf() */
166 #include <sys/types.h>
168 #if TIME_WITH_SYS_TIME
169 # include <sys/time.h> /* for struct timeval */
173 # include <sys/time.h>
180 #include <sys/mman.h> /* for mmap() */
183 #if HAVE_SYS_RESOURCE_H
184 #include <sys/resource.h> /* for struct rusage */
187 #if HAVE_SYS_SYSSGI_H
188 #include <sys/syssgi.h> /* for syssgi() */
191 #if HAVE_SYS_SYSTEMCFG_H
192 #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
196 #include <sys/times.h> /* for times() and struct tms */
200 #include "gmp-impl.h"
205 /* strerror is only used for some stuff on newish systems, no need to have a
206 proper replacement */
208 #define strerror(n) "<strerror not available>"
212 char speed_time_string[256];
213 int speed_precision = 0;
214 double speed_unittime;
215 double speed_cycletime = 0.0;
218 /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
220 #define M_2POWU (((double) INT_MAX + 1.0) * 2.0)
222 #define M_2POW32 4294967296.0
223 #define M_2POW64 (M_2POW32 * M_2POW32)
226 /* Conditionals for the time functions available are done with normal C
227 code, which is a lot easier than wildly nested preprocessor directives.
229 The choice of what to use is partly made at run-time, according to
230 whether the cycle counter works and the measured accuracy of getrusage
233 A routine that's not available won't be getting called, but is an abort()
234 to be sure it isn't called mistakenly.
236 It can be assumed that if a function exists then its data type will, but
237 if the function doesn't then the data type might or might not exist, so
238 the type can't be used unconditionally. The "struct_rusage" etc macros
239 provide dummies when the respective function doesn't exist. */
242 #if HAVE_SPEED_CYCLECOUNTER
243 static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
245 static const int have_cycles = 0;
246 #define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available)
249 /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
250 microseconds. Same #ifdefs here as in longlong.h. */
251 #if defined (__GNUC__) && ! defined (NO_ASM) \
252 && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
253 static const int have_stck = 1;
254 static const int use_stck = 1; /* always use when available */
255 typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
256 #define STCK(timestamp) \
258 asm ("stck %0" : "=m" (timestamp)); \
261 static const int have_stck = 0;
262 static const int use_stck = 0;
263 typedef unsigned long stck_t; /* dummy */
264 #define STCK(timestamp) ASSERT_FAIL (stck instruction not available)
266 #define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */
269 Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
270 and a loop (see powerpc64.asm). */
271 #if HAVE_HOST_CPU_FAMILY_powerpc
272 static const int have_mftb = 1;
273 #if defined (__GNUC__) && ! defined (NO_ASM)
276 unsigned __h1, __l, __h2; \
278 asm volatile ("mftbu %0\n" \
284 } while (__h1 != __h2); \
289 #define MFTB(a) mftb_function (a)
291 #else /* ! powerpc */
292 static const int have_mftb = 0;
297 ASSERT_FAIL (mftb not available); \
301 /* Unicos 10.X has syssgi(), but not mmap(). */
302 #if HAVE_SYSSGI && HAVE_MMAP
303 static const int have_sgi = 1;
305 static const int have_sgi = 0;
308 #if HAVE_READ_REAL_TIME
309 static const int have_rrt = 1;
311 static const int have_rrt = 0;
312 #define read_real_time(t,s) ASSERT_FAIL (read_real_time not available)
313 #define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available)
315 #define RTC_POWER_PC 2
316 #define timebasestruct_t struct timebasestruct_dummy
317 struct timebasestruct_dummy {
319 unsigned int tb_high;
324 #if HAVE_CLOCK_GETTIME
325 static const int have_cgt = 1;
326 #define struct_timespec struct timespec
328 static const int have_cgt = 0;
329 #define struct_timespec struct timespec_dummy
330 #define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1)
331 #define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1)
335 static const int have_grus = 1;
336 #define struct_rusage struct rusage
338 static const int have_grus = 0;
339 #define getrusage(n,ru) ASSERT_FAIL (getrusage not available)
340 #define struct_rusage struct rusage_dummy
343 #if HAVE_GETTIMEOFDAY
344 static const int have_gtod = 1;
345 #define struct_timeval struct timeval
347 static const int have_gtod = 0;
348 #define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available)
349 #define struct_timeval struct timeval_dummy
353 static const int have_times = 1;
354 #define struct_tms struct tms
356 static const int have_times = 0;
357 #define times(tms) ASSERT_FAIL (times not available)
358 #define struct_tms struct tms_dummy
364 struct timeval_dummy {
368 struct rusage_dummy {
369 struct_timeval ru_utime;
371 struct timespec_dummy {
376 static int use_cycles;
383 static int use_times;
384 static int use_tick_boundary;
386 static unsigned start_cycles[2];
387 static stck_t start_stck;
388 static unsigned start_mftb[2];
389 static unsigned start_sgi;
390 static timebasestruct_t start_rrt;
391 static struct_timespec start_cgt;
392 static struct_rusage start_grus;
393 static struct_timeval start_gtod;
394 static struct_tms start_times;
396 static double cycles_limit = 1e100;
397 static double mftb_unittime;
398 static double sgi_unittime;
399 static double cgt_unittime;
400 static double grus_unittime;
401 static double gtod_unittime;
402 static double times_unittime;
404 /* for RTC_POWER format, ie. seconds and nanoseconds */
405 #define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9)
408 /* Return a string representing a time in seconds, nicely formatted.
411 unittime_string (double t)
413 static char buf[128];
418 /* choose units and scale */
420 t *= 1e9, unit = "ns";
422 t *= 1e6, unit = "us";
424 t *= 1e3, unit = "ms";
428 /* want 4 significant figures */
438 sprintf (buf, "%.*f%s", prec, t, unit);
443 static jmp_buf cycles_works_buf;
446 cycles_works_handler (int sig)
448 longjmp (cycles_works_buf, 1);
452 cycles_works_p (void)
454 static int result = -1;
461 RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
464 old_handler = signal (SIGILL, cycles_works_handler);
465 if (old_handler == SIG_ERR)
467 if (speed_option_verbose)
468 printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
471 if (setjmp (cycles_works_buf))
473 if (speed_option_verbose)
474 printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
478 speed_cyclecounter (cycles);
479 signal (SIGILL, old_handler);
480 if (speed_option_verbose)
481 printf ("cycles_works_p(): speed_cyclecounter() works\n");
485 if (speed_option_verbose)
486 printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
498 /* The number of clock ticks per second, but looking at sysconf rather than
499 just CLK_TCK, where possible. */
503 static long result = -1L;
508 result = sysconf (_SC_CLK_TCK);
511 if (speed_option_verbose)
512 printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
517 "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
522 if (speed_option_verbose)
523 printf ("CLK_TCK is %ld per second\n", result);
526 fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
532 /* If two times can be observed less than half a clock tick apart, then
533 assume "get" is microsecond accurate.
535 Two times only 1 microsecond apart are not believed, since some kernels
536 take it upon themselves to ensure gettimeofday doesn't return the same
537 value twice, for the benefit of applications using it for a timestamp.
538 This is obviously very stupid given the speed of CPUs these days.
540 Making "reps" many calls to noop_1() is designed to waste some CPU, with
541 a view to getting measurements 2 microseconds (or more) apart. "reps" is
542 increased progressively until such a period is seen.
544 The outer loop "attempts" are just to allow for any random nonsense or
545 system load upsetting the measurements (ie. making two successive calls
546 to "get" come out as a longer interval than normal).
550 The assumption that any interval less than a half tick implies
551 microsecond resolution is obviously fairly rash, the true resolution
552 could be anything between a microsecond and that half tick. Perhaps
553 something special would have to be done on a system where this is the
554 case, since there's no obvious reliable way to detect it
557 #define MICROSECONDS_P(name, type, get, sec, usec) \
559 static int result = -1; \
561 long dt, half_tick; \
562 unsigned attempt, reps, i, j; \
568 half_tick = (1000000L / clk_tck ()) / 2; \
570 for (attempt = 0; attempt < 5; attempt++) \
576 for (i = 0; i < reps; i++) \
577 for (j = 0; j < 100; j++) \
578 noop_1 (CNST_LIMB(0)); \
581 dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \
583 if (speed_option_verbose >= 2) \
584 printf ("%s attempt=%u, reps=%u, dt=%ld\n", \
585 name, attempt, reps, dt); \
590 reps = (reps == 0 ? 1 : 2*reps); \
592 break; /* uint overflow, not normal */ \
595 if (dt < half_tick) \
602 if (speed_option_verbose) \
605 printf ("%s is microsecond accurate\n", name); \
607 printf ("%s is only %s clock tick accurate\n", \
608 name, unittime_string (1.0/clk_tck())); \
615 gettimeofday_microseconds_p (void)
617 #define call_gettimeofday(t) gettimeofday (&(t), NULL)
618 #define timeval_tv_sec(t) ((t).tv_sec)
619 #define timeval_tv_usec(t) ((t).tv_usec)
620 MICROSECONDS_P ("gettimeofday", struct_timeval,
621 call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
625 getrusage_microseconds_p (void)
627 #define call_getrusage(t) getrusage (0, &(t))
628 #define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
629 #define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
630 MICROSECONDS_P ("getrusage", struct_rusage,
631 call_getrusage, rusage_tv_sec, rusage_tv_usec);
634 /* Test whether getrusage goes backwards, return non-zero if it does
635 (suggesting it's flawed).
637 On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
638 microsecond accurate, but has been seen remaining unchanged after many
639 microseconds have elapsed. It also regularly goes backwards by 1000 to
640 5000 usecs, this has been seen after between 500 and 4000 attempts taking
641 perhaps 0.03 seconds. We consider this too broken for good measuring.
642 We used to have configure pretend getrusage didn't exist on this system,
643 but a runtime test should be more reliable, since we imagine the problem
644 is not confined to just this exact system tuple. */
647 getrusage_backwards_p (void)
649 static int result = -1;
650 struct rusage start, prev, next;
657 getrusage (0, &start);
658 memcpy (&next, &start, sizeof (next));
664 memcpy (&prev, &next, sizeof (prev));
665 getrusage (0, &next);
667 if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
668 || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
669 && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
671 if (speed_option_verbose)
672 printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
674 prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
675 next.ru_utime.tv_sec, next.ru_utime.tv_usec);
680 /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
681 attempts, whichever comes first */
682 d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
683 + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
685 if (i > 50000 || (i > 1000 && d > 100000))
692 /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
693 of glibc (some time post 2.2).
695 CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
696 defined, but returning -1 for an error). */
698 #ifdef CLOCK_PROCESS_CPUTIME_ID
699 # define CGT_ID CLOCK_PROCESS_CPUTIME_ID
701 # ifdef CLOCK_VIRTUAL
702 # define CGT_ID CLOCK_VIRTUAL
706 const int have_cgt_id = 1;
708 const int have_cgt_id = 0;
709 # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
715 static int result = -1;
716 struct_timespec unit;
723 if (speed_option_verbose)
724 printf ("clock_gettime don't know what ID to use\n");
732 /* trial run to see if it works */
733 if (clock_gettime (CGT_ID, &unit) != 0)
735 if (speed_option_verbose)
736 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
741 /* get the resolution */
742 if (clock_getres (CGT_ID, &unit) != 0)
744 if (speed_option_verbose)
745 printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
750 cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
751 printf ("clock_gettime is %s accurate\n",
752 unittime_string (cgt_unittime));
759 freq_measure_mftb_one (void)
761 #define call_gettimeofday(t) gettimeofday (&(t), NULL)
762 #define timeval_tv_sec(t) ((t).tv_sec)
763 #define timeval_tv_usec(t) ((t).tv_usec)
764 FREQ_MEASURE_ONE ("mftb", struct_timeval,
765 call_gettimeofday, MFTB,
766 timeval_tv_sec, timeval_tv_usec);
770 static jmp_buf mftb_works_buf;
773 mftb_works_handler (int sig)
775 longjmp (mftb_works_buf, 1);
782 RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
785 /* suppress a warning about a[] unused */
792 old_handler = signal (SIGILL, mftb_works_handler);
793 if (old_handler == SIG_ERR)
795 if (speed_option_verbose)
796 printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
799 if (setjmp (mftb_works_buf))
801 if (speed_option_verbose)
802 printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
806 signal (SIGILL, old_handler);
807 if (speed_option_verbose)
808 printf ("mftb_works_p(): mftb works\n");
811 if (speed_option_verbose)
812 printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
815 #if ! HAVE_GETTIMEOFDAY
816 if (speed_option_verbose)
817 printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
821 /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
822 other chips it can be driven from an external clock. */
823 cycletime = freq_measure ("mftb", freq_measure_mftb_one);
824 if (cycletime == -1.0)
826 if (speed_option_verbose)
827 printf ("mftb_works_p(): cannot measure mftb period\n");
831 mftb_unittime = cycletime;
836 volatile unsigned *sgi_addr;
841 #if HAVE_SYSSGI && HAVE_MMAP
842 static int result = -1;
844 size_t pagesize, offset;
845 __psunsigned_t phys, physpage;
847 unsigned period_picoseconds;
853 phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
854 if (phys == (__psunsigned_t) -1)
856 /* ENODEV is the error when a counter is not available */
857 if (speed_option_verbose)
858 printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
862 sgi_unittime = period_picoseconds * 1e-12;
864 /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
865 Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
866 obvious way to identify that without SGI_CYCLECNTR_SIZE. */
867 #ifdef SGI_CYCLECNTR_SIZE
868 size = syssgi (SGI_CYCLECNTR_SIZE);
871 if (speed_option_verbose)
873 printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
874 printf (" will assume size==4\n");
884 printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
889 pagesize = getpagesize();
890 offset = (size_t) phys & (pagesize-1);
891 physpage = phys - offset;
893 /* shouldn't cross over a page boundary */
894 ASSERT_ALWAYS (offset + size/8 <= pagesize);
896 fd = open("/dev/mmem", O_RDONLY);
899 if (speed_option_verbose)
900 printf ("open /dev/mmem: %s\n", strerror (errno));
905 virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
906 if (virtpage == (void *) -1)
908 if (speed_option_verbose)
909 printf ("mmap /dev/mmem: %s\n", strerror (errno));
914 /* address of least significant 4 bytes, knowing mips is big endian */
915 sgi_addr = (unsigned *) ((char *) virtpage + offset
916 + size/8 - sizeof(unsigned));
920 #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
926 #define DEFAULT(var,n) \
933 speed_time_init (void)
935 double supplement_unittime = 0.0;
937 static int speed_time_initialized = 0;
938 if (speed_time_initialized)
940 speed_time_initialized = 1;
942 speed_cycletime_init ();
944 if (have_cycles && cycles_works_p ())
947 DEFAULT (speed_cycletime, 1.0);
948 speed_unittime = speed_cycletime;
949 DEFAULT (speed_precision, 10000);
950 strcpy (speed_time_string, "CPU cycle counter");
952 /* only used if a supplementary method is chosen below */
953 cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
956 if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
958 /* this is a good combination */
960 supplement_unittime = grus_unittime = 1.0e-6;
961 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
963 else if (have_cycles == 1)
965 /* When speed_cyclecounter has a limited range, look for something
967 if (have_gtod && gettimeofday_microseconds_p())
970 supplement_unittime = gtod_unittime = 1.0e-6;
971 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
976 supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
977 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
982 supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
983 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
988 supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
989 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
993 fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
994 fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n");
998 if (use_grus || use_times || use_gtod)
1000 /* must know cycle period to compare cycles to other measuring
1001 (via cycles_limit) */
1002 speed_cycletime_need_seconds ();
1004 if (speed_precision * supplement_unittime > cycles_limit)
1006 fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1007 fprintf (stderr, " cycle counter and limited precision supplemental method\n");
1008 fprintf (stderr, " (%s)\n", speed_time_string);
1014 strcpy (speed_time_string, "STCK timestamp");
1015 /* stck is in units of 2^-12 microseconds, which is very likely higher
1016 resolution than a cpu cycle */
1017 if (speed_cycletime == 0.0)
1018 speed_cycletime_fail
1019 ("Need to know CPU frequency for effective stck unit");
1020 speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1021 DEFAULT (speed_precision, 10000);
1023 else if (have_mftb && mftb_works_p ())
1026 DEFAULT (speed_precision, 10000);
1027 speed_unittime = mftb_unittime;
1028 sprintf (speed_time_string, "mftb counter (%s)",
1029 unittime_string (speed_unittime));
1031 else if (have_sgi && sgi_works_p ())
1034 DEFAULT (speed_precision, 10000);
1035 speed_unittime = sgi_unittime;
1036 sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1037 unittime_string (speed_unittime));
1038 /* supplemented with getrusage, which we assume to have 1ms resolution */
1040 supplement_unittime = 1e-3;
1046 DEFAULT (speed_precision, 10000);
1047 read_real_time (&t, sizeof(t));
1050 /* FIXME: What's the actual RTC resolution? */
1051 speed_unittime = 1e-7;
1052 strcpy (speed_time_string, "read_real_time() power nanoseconds");
1057 time_base_to_time (&t, sizeof(t));
1058 speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1059 sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1060 unittime_string (speed_unittime));
1063 fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1068 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1070 /* use clock_gettime if microsecond or better resolution */
1073 speed_unittime = cgt_unittime;
1074 DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1075 strcpy (speed_time_string, "microsecond accurate getrusage()");
1077 else if (have_times && clk_tck() > 1000000)
1079 /* Cray vector systems have times() which is clock cycle resolution
1081 DEFAULT (speed_precision, 10000);
1084 else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1087 speed_unittime = grus_unittime = 1.0e-6;
1088 DEFAULT (speed_precision, 1000);
1089 strcpy (speed_time_string, "microsecond accurate getrusage()");
1091 else if (have_gtod && gettimeofday_microseconds_p())
1094 speed_unittime = gtod_unittime = 1.0e-6;
1095 DEFAULT (speed_precision, 1000);
1096 strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1098 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1100 /* use clock_gettime if 1 tick or better resolution */
1103 else if (have_times)
1105 use_tick_boundary = 1;
1106 DEFAULT (speed_precision, 200);
1109 speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1110 sprintf (speed_time_string, "%s clock tick times()",
1111 unittime_string (speed_unittime));
1116 use_tick_boundary = 1;
1117 speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1118 DEFAULT (speed_precision, 200);
1119 sprintf (speed_time_string, "%s clock tick getrusage()\n",
1120 unittime_string (speed_unittime));
1125 use_tick_boundary = 1;
1126 speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1127 DEFAULT (speed_precision, 200);
1128 sprintf (speed_time_string, "%s clock tick gettimeofday()",
1129 unittime_string (speed_unittime));
1133 fprintf (stderr, "No time measuring method available\n");
1134 fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1138 if (speed_option_verbose)
1140 printf ("speed_time_init: %s\n", speed_time_string);
1141 printf (" speed_precision %d\n", speed_precision);
1142 printf (" speed_unittime %.2g\n", speed_unittime);
1143 if (supplement_unittime)
1144 printf (" supplement_unittime %.2g\n", supplement_unittime);
1145 printf (" use_tick_boundary %d\n", use_tick_boundary);
1147 printf (" cycles_limit %.2g seconds\n", cycles_limit);
1153 /* Burn up CPU until a clock tick boundary, for greater accuracy. Set the
1154 corresponding "start_foo" appropriately too. */
1157 grus_tick_boundary (void)
1160 getrusage (0, &prev);
1162 getrusage (0, &start_grus);
1163 } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1167 gtod_tick_boundary (void)
1169 struct_timeval prev;
1170 gettimeofday (&prev, NULL);
1172 gettimeofday (&start_gtod, NULL);
1173 } while (start_gtod.tv_usec == prev.tv_usec);
1177 times_tick_boundary (void)
1182 times (&start_times);
1183 while (start_times.tms_utime == prev.tms_utime);
1187 /* "have_" values are tested to let unused code go dead. */
1190 speed_starttime (void)
1194 if (have_grus && use_grus)
1196 if (use_tick_boundary)
1197 grus_tick_boundary ();
1199 getrusage (0, &start_grus);
1202 if (have_gtod && use_gtod)
1204 if (use_tick_boundary)
1205 gtod_tick_boundary ();
1207 gettimeofday (&start_gtod, NULL);
1210 if (have_times && use_times)
1212 if (use_tick_boundary)
1213 times_tick_boundary ();
1215 times (&start_times);
1218 if (have_cgt && use_cgt)
1219 clock_gettime (CGT_ID, &start_cgt);
1221 if (have_rrt && use_rrt)
1222 read_real_time (&start_rrt, sizeof(start_rrt));
1224 if (have_sgi && use_sgi)
1225 start_sgi = *sgi_addr;
1227 if (have_mftb && use_mftb)
1230 if (have_stck && use_stck)
1233 /* Cycles sampled last for maximum accuracy. */
1234 if (have_cycles && use_cycles)
1235 speed_cyclecounter (start_cycles);
1239 /* Calculate the difference between two cycle counter samples, as a "double"
1242 The start and end values are allowed to cancel in integers in case the
1243 counter values are bigger than the 53 bits that normally fit in a double.
1245 This works even if speed_cyclecounter() puts a value bigger than 32-bits
1246 in the low word (the high word always gets a 2**32 multiplier though). */
1249 speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1254 if (have_cycles == 1)
1256 t = (end[0] - start[0]);
1260 d = end[0] - start[0];
1261 t = d - (d > end[0] ? M_2POWU : 0.0);
1262 t += (end[1] - start[1]) * M_2POW32;
1269 speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1274 d = end[0] - start[0];
1275 t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1276 t += (end[1] - start[1]) * M_2POW32;
1281 /* Calculate the difference between "start" and "end" using fields "sec" and
1282 "psec", where each "psec" is a "punit" of a second.
1284 The seconds parts are allowed to cancel before being combined with the
1285 psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1288 Total time is only calculated in a "double" since an integer count of
1289 psecs might overflow. 2^32 microseconds is only a bit over an hour, or
1290 2^32 nanoseconds only about 4 seconds.
1292 The casts to "long" are for the benefit of timebasestruct_t, where the
1293 fields are only "unsigned int", but we want a signed difference. */
1295 #define DIFF_SECS_ROUTINE(sec, psec, punit) \
1297 long sec_diff, psec_diff; \
1298 sec_diff = (long) end->sec - (long) start->sec; \
1299 psec_diff = (long) end->psec - (long) start->psec; \
1300 return (double) sec_diff + punit * (double) psec_diff; \
1304 timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1306 DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1310 rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1312 DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1316 timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1318 DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1321 /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1323 timebasestruct_diff_secs (const timebasestruct_t *end,
1324 const timebasestruct_t *start)
1326 DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1331 speed_endtime (void)
1333 #define END_USE(name,value) \
1335 if (speed_option_verbose >= 3) \
1336 printf ("speed_endtime(): used %s\n", name); \
1341 #define END_ENOUGH(name,value) \
1343 if (speed_option_verbose >= 3) \
1344 printf ("speed_endtime(): %s gives enough precision\n", name); \
1349 #define END_EXCEED(name,value) \
1351 if (speed_option_verbose >= 3) \
1352 printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1358 unsigned end_cycles[2];
1360 unsigned end_mftb[2];
1362 timebasestruct_t end_rrt;
1363 struct_timespec end_cgt;
1364 struct_timeval end_gtod;
1365 struct_rusage end_grus;
1366 struct_tms end_times;
1367 double t_gtod, t_grus, t_times, t_cgt;
1368 double t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1371 /* Cycles sampled first for maximum accuracy.
1372 "have_" values tested to let unused code go dead. */
1374 if (have_cycles && use_cycles) speed_cyclecounter (end_cycles);
1375 if (have_stck && use_stck) STCK (end_stck);
1376 if (have_mftb && use_mftb) MFTB (end_mftb);
1377 if (have_sgi && use_sgi) end_sgi = *sgi_addr;
1378 if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt));
1379 if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt);
1380 if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL);
1381 if (have_grus && use_grus) getrusage (0, &end_grus);
1382 if (have_times && use_times) times (&end_times);
1386 if (speed_option_verbose >= 4)
1388 printf ("speed_endtime():\n");
1390 printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n",
1391 start_cycles[1], start_cycles[0],
1392 end_cycles[1], end_cycles[0]);
1395 printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck);
1398 printf (" mftb 0x%X,%08X -> 0x%X,%08X\n",
1399 start_mftb[1], start_mftb[0],
1400 end_mftb[1], end_mftb[0]);
1403 printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi);
1406 printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n",
1407 start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1408 end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1411 printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n",
1412 start_cgt.tv_sec, start_cgt.tv_nsec,
1413 end_cgt.tv_sec, end_cgt.tv_nsec);
1416 printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n",
1417 start_gtod.tv_sec, start_gtod.tv_usec,
1418 end_gtod.tv_sec, end_gtod.tv_usec);
1421 printf (" getrusage %ld.%06ld -> %ld.%06ld\n",
1422 start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
1423 end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
1426 printf (" times %ld -> %ld\n",
1427 start_times.tms_utime, end_times.tms_utime);
1432 time_base_to_time (&start_rrt, sizeof(start_rrt));
1433 time_base_to_time (&end_rrt, sizeof(end_rrt));
1434 t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1435 END_USE ("read_real_time()", t_rrt);
1440 t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1441 END_USE ("clock_gettime()", t_cgt);
1446 t_grus = rusage_diff_secs (&end_grus, &start_grus);
1448 /* Use getrusage() if the cycle counter limit would be exceeded, or if
1449 it provides enough accuracy already. */
1452 if (t_grus >= speed_precision*grus_unittime)
1453 END_ENOUGH ("getrusage()", t_grus);
1454 if (t_grus >= cycles_limit)
1455 END_EXCEED ("getrusage()", t_grus);
1461 t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1463 /* Use times() if the cycle counter limit would be exceeded, or if
1464 it provides enough accuracy already. */
1467 if (t_times >= speed_precision*times_unittime)
1468 END_ENOUGH ("times()", t_times);
1469 if (t_times >= cycles_limit)
1470 END_EXCEED ("times()", t_times);
1476 t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1478 /* Use gettimeofday() if it measured a value bigger than the cycle
1479 counter can handle. */
1482 if (t_gtod >= cycles_limit)
1483 END_EXCEED ("gettimeofday()", t_gtod);
1489 t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1490 END_USE ("mftb", t_mftb);
1495 t_stck = (end_stck - start_stck) * STCK_PERIOD;
1496 END_USE ("stck", t_stck);
1501 t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1502 END_USE ("SGI hardware counter", t_sgi);
1507 t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1509 END_USE ("cycle counter", t_cycles);
1512 if (use_grus && getrusage_microseconds_p())
1513 END_USE ("getrusage()", t_grus);
1515 if (use_gtod && gettimeofday_microseconds_p())
1516 END_USE ("gettimeofday()", t_gtod);
1518 if (use_times) END_USE ("times()", t_times);
1519 if (use_grus) END_USE ("getrusage()", t_grus);
1520 if (use_gtod) END_USE ("gettimeofday()", t_gtod);
1522 fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1528 if (speed_option_verbose >= 2)
1529 fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);