[bench] Use higher resolution timer on POSIX
authorNick Terrell <terrelln@fb.com>
Tue, 17 Oct 2017 19:42:27 +0000 (12:42 -0700)
committerNick Terrell <terrelln@fb.com>
Tue, 17 Oct 2017 20:55:35 +0000 (13:55 -0700)
The timer used was only accurate up to 0.01 seconds. This timer is accurate up to 1 ns.
It is a monotonic timer that measures the real time difference, not on CPU time.

Copied the benchmark code from https://github.com/facebook/zstd/commit/6ab4d5e9041aba962a810ffee191f95897c6208e

programs/bench.c
programs/util.h

index 5c83d59..fac2a87 100644 (file)
@@ -166,7 +166,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
     void* const compressedBuffer = malloc(maxCompressedSize);
     void* const resultBuffer = malloc(srcSize);
     U32 nbBlocks;
-    UTIL_time_t ticksPerSecond;
     struct compressionParameters compP;
     int cfunctionId;
 
@@ -176,7 +175,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
 
     /* init */
     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
-    UTIL_initTimer(&ticksPerSecond);
 
     /* Init */
     if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1;
@@ -229,17 +227,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
         size_t cSize = 0;
         double ratio = 0.;
 
-        UTIL_getTime(&coolTime);
+        coolTime = UTIL_getTime();
         DISPLAYLEVEL(2, "\r%79s\r", "");
         while (!cCompleted || !dCompleted) {
             UTIL_time_t clockStart;
             U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
 
             /* overheat protection */
-            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
+            if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
                 UTIL_sleep(COOLPERIOD_SEC);
-                UTIL_getTime(&coolTime);
+                coolTime = UTIL_getTime();
             }
 
             /* Compression */
@@ -247,8 +245,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
 
             UTIL_sleepMilli(1);  /* give processor time to other processes */
-            UTIL_waitForNextTick(ticksPerSecond);
-            UTIL_getTime(&clockStart);
+            UTIL_waitForNextTick();
+            clockStart = UTIL_getTime();
 
             if (!cCompleted) {   /* still some time to do compression tests */
                 U32 nbLoops = 0;
@@ -260,8 +258,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         blockTable[blockNb].cSize = rSize;
                     }
                     nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
-                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
+                } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
+                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart);
                     if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops;
                     totalCTime += clockSpan;
                     cCompleted = totalCTime>maxTime;
@@ -282,8 +280,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
 
             UTIL_sleepMilli(1); /* give processor time to other processes */
-            UTIL_waitForNextTick(ticksPerSecond);
-            UTIL_getTime(&clockStart);
+            UTIL_waitForNextTick();
+            clockStart = UTIL_getTime();
 
             if (!dCompleted) {
                 U32 nbLoops = 0;
@@ -300,8 +298,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         blockTable[blockNb].resSize = regenSize;
                     }
                     nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < DECOMP_MULT*clockLoop);
-                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
+                } while (UTIL_clockSpanMicro(clockStart) < DECOMP_MULT*clockLoop);
+                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart);
                     if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
                     totalDTime += clockSpan;
                     dCompleted = totalDTime>(DECOMP_MULT*maxTime);
index 5a69c55..1382cab 100644 (file)
@@ -140,45 +140,116 @@ extern "C" {
 /*-****************************************
 *  Time functions
 ******************************************/
-#if (PLATFORM_POSIX_VERSION >= 1)
-#include <unistd.h>
-#include <sys/times.h>   /* times */
-   typedef U64 UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=sysconf(_SC_CLK_TCK); }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; *x = (UTIL_time_t)newTicks; }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / ticksPerSecond; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / ticksPerSecond; }
-#elif defined(_WIN32)   /* Windows */
-   typedef LARGE_INTEGER UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { if (!QueryPerformanceFrequency(ticksPerSecond)) fprintf(stderr, "ERROR: QueryPerformance not present\n"); }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { QueryPerformanceCounter(x); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
+#if defined(_WIN32)   /* Windows */
+    typedef LARGE_INTEGER UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static LARGE_INTEGER ticksPerSecond;
+        static int init = 0;
+        if (!init) {
+            if (!QueryPerformanceFrequency(&ticksPerSecond))
+                fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n");
+            init = 1;
+        }
+        return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static LARGE_INTEGER ticksPerSecond;
+        static int init = 0;
+        if (!init) {
+            if (!QueryPerformanceFrequency(&ticksPerSecond))
+                fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n");
+            init = 1;
+        }
+        return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+    }
+#elif defined(__APPLE__) && defined(__MACH__)
+    #include <mach/mach_time.h>
+    typedef U64 UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static mach_timebase_info_data_t rate;
+        static int init = 0;
+        if (!init) {
+            mach_timebase_info(&rate);
+            init = 1;
+        }
+        return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+    {
+        static mach_timebase_info_data_t rate;
+        static int init = 0;
+        if (!init) {
+            mach_timebase_info(&rate);
+            init = 1;
+        }
+        return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
+    }
+#elif (PLATFORM_POSIX_VERSION >= 200112L)
+    #include <time.h>
+    typedef struct timespec UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void)
+    {
+        UTIL_time_t now;
+        if (clock_gettime(CLOCK_MONOTONIC, &now))
+            fprintf(stderr, "ERROR: Failed to get time\n");   /* we could also exit() */
+        return now;
+    }
+    UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t diff;
+        if (end.tv_nsec < begin.tv_nsec) {
+            diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
+            diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
+        } else {
+            diff.tv_sec = end.tv_sec - begin.tv_sec;
+            diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
+        }
+        return diff;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+        U64 micro = 0;
+        micro += 1000000ULL * diff.tv_sec;
+        micro += diff.tv_nsec / 1000ULL;
+        return micro;
+    }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
+    {
+        UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+        U64 nano = 0;
+        nano += 1000000000ULL * diff.tv_sec;
+        nano += diff.tv_nsec;
+        return nano;
+    }
 #else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
-   typedef clock_t UTIL_time_t;
-   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=0; }
-   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = clock(); }
-   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
-   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+    typedef clock_t UTIL_time_t;
+    UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); }
+    UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+    UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
 #endif
 
 
 /* returns time span in microseconds */
-UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart, UTIL_time_t ticksPerSecond )
+UTIL_STATIC U64 UTIL_clockSpanMicro(UTIL_time_t clockStart)
 {
-    UTIL_time_t clockEnd;
-    UTIL_getTime(&clockEnd);
-    return UTIL_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd);
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeMicro(clockStart, clockEnd);
 }
 
 
-UTIL_STATIC void UTIL_waitForNextTick(UTIL_time_t ticksPerSecond)
+UTIL_STATIC void UTIL_waitForNextTick(void)
 {
-    UTIL_time_t clockStart, clockEnd;
-    UTIL_getTime(&clockStart);
+    UTIL_time_t const clockStart = UTIL_getTime();
+    UTIL_time_t clockEnd;
     do {
-        UTIL_getTime(&clockEnd);
-    } while (UTIL_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0);
+        clockEnd = UTIL_getTime();
+    } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
 }