new test tool : datagen
authorYann Collet <yann.collet.73@gmail.com>
Tue, 3 Jun 2014 22:44:49 +0000 (23:44 +0100)
committerYann Collet <yann.collet.73@gmail.com>
Tue, 3 Jun 2014 22:44:49 +0000 (23:44 +0100)
programs/Makefile
programs/datagen.c [new file with mode: 0644]
programs/fullbench.c

index 219684f..522850e 100644 (file)
@@ -58,14 +58,16 @@ endif
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
 EXT =.exe
+VOID = nul
 else
 EXT =
+VOID = /dev/null
 endif
 
 
 default: lz4 lz4c
 
-all: lz4 lz4c lz4c32 fuzzer fuzzer32 fullbench fullbench32
+all: lz4 lz4c lz4c32 fullbench fullbench32 fuzzer fuzzer32 datagen
 
 lz4: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c
        $(CC)      $(FLAGS) -DDISABLE_LZ4C_LEGACY_OPTIONS $^ -o $@$(EXT)
@@ -76,22 +78,27 @@ lz4c  : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c
 lz4c32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c
        $(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
+fullbench  : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c
+       $(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+fullbench32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c
+       $(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
 fuzzer  : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fuzzer.c 
        $(CC)      $(FLAGS) $^ -o $@$(EXT)
 
 fuzzer32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fuzzer.c
        $(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
-fullbench  : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c
+datagen : datagen.c
        $(CC)      $(FLAGS) $^ -o $@$(EXT)
 
-fullbench32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c
-       $(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
 clean:
        @rm -f core *.o \
         lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) \
-        fuzzer$(EXT) fuzzer32$(EXT) fullbench$(EXT) fullbench32$(EXT) 
+        fullbench$(EXT) fullbench32$(EXT) \
+        fuzzer$(EXT) fuzzer32$(EXT) datagen$(EXT)
        @echo Cleaning completed
 
 
@@ -124,11 +131,17 @@ test-64: test-lz4 test-lz4c test-fullbench test-fuzzer
 
 test-32: test-lz4 test-lz4c32 test-fullbench32 test-fuzzer32
 
-test-lz4:
+test-lz4: lz4 datagen
+       ./datagen | ./lz4 | ./lz4 -vdq > $(VOID)
+       ./datagen -g256MB | ./lz4 -B4D | ./lz4 -vdq > $(VOID)
+       ./datagen -g6GB | ./lz4 -vqBD | ./lz4 -vdq > $(VOID)
 
-test-lz4c:
+test-lz4c: lz4c datagen
 
-test-lz4c32:
+test-lz4c32: lz4c32 datagen
+       ./datagen | ./lz4c32 | ./lz4c32 -vdq > $(VOID)
+       ./datagen -g256MB | ./lz4c32 -B4D | ./lz4c32 -vdq > $(VOID)
+       ./datagen -g6GB | ./lz4c32 -vqBD | ./lz4c32 -vdq > $(VOID)
 
 test-fullbench: fullbench
        ./fullbench --no-prompt $(BENCH_NB) $(TEST_FILES)
diff --git a/programs/datagen.c b/programs/datagen.c
new file mode 100644 (file)
index 0000000..05eb7f0
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+    datagen.c - compressible data generator test tool
+    Copyright (C) Yann Collet 2012-2014
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+    - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/**************************************
+ Remove Visual warning messages
+**************************************/
+#define _CRT_SECURE_NO_WARNINGS   // fgets
+
+
+/**************************************
+ Includes
+**************************************/
+//#include <stdlib.h>
+#include <stdio.h>      // fgets, sscanf
+#include <string.h>     // strcmp
+
+
+/**************************************
+   Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+ Constants
+**************************************/
+#ifndef LZ4_VERSION
+#  define LZ4_VERSION "rc118"
+#endif
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+#define CDG_SIZE_DEFAULT (64 KB)
+#define CDG_SEED_DEFAULT 0
+#define CDG_COMPRESSIBILITY_DEFAULT 50
+#define PRIME1   2654435761U
+#define PRIME2   2246822519U
+
+
+/**************************************
+  Macros
+**************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+
+
+/**************************************
+  Local Parameters
+**************************************/
+static int no_prompt = 0;
+static char* programName;
+static int displayLevel = 2;
+
+
+/*********************************************************
+  Fuzzer functions
+*********************************************************/
+
+#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static unsigned int CDG_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= PRIME1;
+    rand32 += PRIME2;
+    rand32  = CDG_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32;
+}
+
+
+#define CDG_RAND15BITS  ((CDG_rand(seed) >> 3) & 32767)
+#define CDG_RANDLENGTH  ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15)
+#define CDG_RANDCHAR    (((CDG_rand(seed) >> 9) & 63) + '0')
+static void CDG_generate(U64 size, U32* seed, double proba)
+{
+    BYTE buff[128 KB + 1];
+    U64 total=0;
+    U32 P32 = (U32)(32768 * proba);
+    U32 pos=0;
+    U32 genBlockSize = 128 KB;
+
+    while (total < size)
+    {
+        if (size-total < 128 KB) genBlockSize = (U32)(size-total);
+        total += genBlockSize;
+        buff[genBlockSize] = 0;
+        *buff = CDG_RANDCHAR;
+        pos = 1;
+        while (pos<genBlockSize)
+        {
+            // Select : Literal (char) or Match (within 32K)
+            if (CDG_RAND15BITS < P32)
+            {
+                // Copy (within 64K)
+                U32 ref, d;
+                int length = CDG_RANDLENGTH + 4;
+                U32 offset = CDG_RAND15BITS + 1;
+                if (offset > pos) offset = pos;
+                if (pos + length > 128 KB ) length = 128 KB - pos;
+                ref = pos - offset;
+                d = pos + length;
+                while (pos < d) buff[pos++] = buff[ref++];
+            }
+            else
+            {
+                // Literal (noise)
+                U32 d;
+                int length = CDG_RANDLENGTH;
+                if (pos + length > 128 KB) length = 128 KB - pos;
+                d = pos + length;
+                while (pos < d) buff[pos++] = CDG_RANDCHAR;
+            }
+        }
+        pos=0;
+        for (;pos+512<=genBlockSize;pos+=512) printf("%512.512s", buff+pos);
+        for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
+    }
+}
+
+
+int CDG_usage()
+{
+    DISPLAY( "Compressible data generator\n");
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [size] [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -g#    : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
+    DISPLAY( " -s#    : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
+    DISPLAY( " -p#    : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    int argNb;
+    int proba = CDG_COMPRESSIBILITY_DEFAULT;
+    U64 size = CDG_SIZE_DEFAULT;
+    U32 seed = CDG_SEED_DEFAULT;
+
+    // Check command line
+    programName = argv[0];
+    for(argNb=1; argNb<argc; argNb++)
+    {
+        char* argument = argv[argNb];
+
+        if(!argument) continue;   // Protection if argument empty
+
+        // Decode command (note : aggregated commands are allowed)
+        if (argument[0]=='-')
+        {
+            if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
+
+            while (argument[1]!=0)
+            {
+                argument++;
+                switch(*argument)
+                {
+                case 'h':
+                    return CDG_usage();
+                case 'g':
+                    argument++;
+                    size=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        size *= 10;
+                        size += *argument - '0';
+                        argument++;
+                    }
+                    if (*argument=='K') { size <<= 10; argument++; }
+                    if (*argument=='M') { size <<= 20; argument++; }
+                    if (*argument=='G') { size <<= 30; argument++; }
+                    if (*argument=='B') { argument++; }
+                    break;
+                case 's':
+                    argument++;
+                    seed=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        seed *= 10;
+                        seed += *argument - '0';
+                        argument++;
+                    }
+                    break;
+                case 'p':
+                    argument++;
+                    proba=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        proba *= 10;
+                        proba += *argument - '0';
+                        argument++;
+                    }
+                    if (proba<0) proba=0;
+                    if (proba>100) proba=100;
+                    break;
+                case 'v':
+                    displayLevel = 4;
+                    break;
+                default: ;
+                }
+            }
+
+        }
+    }
+
+    // Get Seed
+    DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION);
+    DISPLAYLEVEL(3, "Seed = %u \n", seed);
+    if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
+
+    CDG_generate(size, &seed, ((double)proba) / 100);
+
+    return 0;
+}
index ae33330..7249387 100755 (executable)
@@ -581,7 +581,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
             case 4: decompressionFunction = LZ4_decompress_safe_withPrefix64k; break;
             case 5: decompressionFunction = local_LZ4_decompress_safe_usingDict; break;
             case 6: decompressionFunction = local_LZ4_decompress_safe_partial; break;
-            default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); free(chunkP); return 1;
+            default : DISPLAY("ERROR ! Bad decompression algorithm Id !! \n"); free(chunkP); return 1;
             }
 
             for (loopNb = 1; loopNb <= nbIterations; loopNb++)