#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)\r
#endif\r
\r
+// GCC on does not support _rotl outside of Windows\r
+#if defined(__GNUC__)\r
+#define _rotl(x,r) ((x << r) | (x >> (32 - r)))\r
+#endif\r
+\r
\r
//**************************************\r
// Includes\r
\r
#define KNUTH 2654435761U\r
#define MAX_MEM (1984<<20)\r
-#define CHUNKSIZE (8<<20)\r
-#define MAX_NB_CHUNKS ((MAX_MEM / CHUNKSIZE) + 1)\r
+#define DEFAULT_CHUNKSIZE (8<<20)\r
\r
\r
//**************************************\r
\r
\r
\r
+//**************************************\r
+// Private Parameters\r
+//**************************************\r
+static int chunkSize = DEFAULT_CHUNKSIZE;\r
+\r
+void BMK_SetBlocksize(int bsize)\r
+{\r
+ chunkSize = bsize;\r
+ DISPLAY("Using Block Size of %i KB... ", chunkSize>>10);\r
+}\r
+\r
//*********************************************************\r
// Private functions\r
//*********************************************************\r
}\r
\r
\r
-static U32 BMK_checksum(char* buff, U32 length)\r
+static U32 BMK_checksum_MMH3A (char* buff, U32 length)\r
{\r
- BYTE* p = (BYTE*)buff;\r
- BYTE* bEnd = p + length;\r
- BYTE* limit = bEnd - 3;\r
- U32 idx = 1;\r
- U32 crc = KNUTH;\r
- \r
- while (p<limit)\r
- {\r
- crc += ((*(U32*)p) + idx++);\r
- crc *= KNUTH;\r
- p+=4;\r
- }\r
- while (p<bEnd)\r
- {\r
- crc += ((*p) + idx++);\r
- crc *= KNUTH;\r
- p++;\r
- }\r
- return crc;\r
-}\r
+ const BYTE* data = (const BYTE*)buff;\r
+ const int nblocks = length >> 2;\r
+\r
+ U32 h1 = KNUTH;\r
+ U32 c1 = 0xcc9e2d51;\r
+ U32 c2 = 0x1b873593;\r
+\r
+ const U32* blocks = (const U32*)(data + nblocks*4);\r
+ int i;\r
+\r
+ for(i = -nblocks; i; i++)\r
+ {\r
+ U32 k1 = blocks[i];\r
+\r
+ k1 *= c1;\r
+ k1 = _rotl(k1,15);\r
+ k1 *= c2;\r
+ \r
+ h1 ^= k1;\r
+ h1 = _rotl(h1,13); \r
+ h1 = h1*5+0xe6546b64;\r
+ }\r
+\r
+ {\r
+ const BYTE* tail = (const BYTE*)(data + nblocks*4);\r
+ U32 k1 = 0;\r
+\r
+ switch(length & 3)\r
+ {\r
+ case 3: k1 ^= tail[2] << 16;\r
+ case 2: k1 ^= tail[1] << 8;\r
+ case 1: k1 ^= tail[0];\r
+ k1 *= c1; k1 = _rotl(k1,15); k1 *= c2; h1 ^= k1;\r
+ };\r
+ }\r
+\r
+ h1 ^= length;\r
+ h1 ^= h1 >> 16;\r
+ h1 *= 0x85ebca6b;\r
+ h1 ^= h1 >> 13;\r
+ h1 *= 0xc2b2ae35;\r
+ h1 ^= h1 >> 16;\r
+\r
+ return h1;\r
+} \r
\r
\r
static size_t BMK_findMaxMem(U64 requiredMem)\r
size_t readSize;\r
char* in_buff;\r
char* out_buff; int out_buff_size;\r
- struct chunkParameters chunkP[MAX_NB_CHUNKS];\r
+ struct chunkParameters* chunkP;\r
U32 crcc, crcd;\r
struct compressionParameters compP;\r
\r
}\r
\r
// Alloc\r
+ chunkP = (struct chunkParameters*) malloc(((benchedsize / chunkSize)+1) * sizeof(struct chunkParameters));\r
in_buff = malloc((size_t )benchedsize);\r
- nbChunks = (benchedsize / CHUNKSIZE) + 1;\r
- maxCChunkSize = LZ4_compressBound(CHUNKSIZE);\r
+ nbChunks = (benchedsize / chunkSize) + 1;\r
+ maxCChunkSize = LZ4_compressBound(chunkSize);\r
out_buff_size = nbChunks * maxCChunkSize;\r
out_buff = malloc((size_t )out_buff_size);\r
\r
+\r
if(!in_buff || !out_buff)\r
{\r
DISPLAY("\nError: not enough memory!\n");\r
for (i=0; i<nbChunks; i++)\r
{\r
chunkP[i].id = i;\r
- chunkP[i].inputBuffer = in; in += CHUNKSIZE;\r
- if (remaining > CHUNKSIZE) { chunkP[i].inputSize = CHUNKSIZE; remaining -= CHUNKSIZE; } else { chunkP[i].inputSize = remaining; remaining = 0; }\r
+ chunkP[i].inputBuffer = in; in += chunkSize;\r
+ if ((int)remaining > chunkSize) { chunkP[i].inputSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].inputSize = remaining; remaining = 0; }\r
chunkP[i].outputBuffer = out; out += maxCChunkSize;\r
chunkP[i].outputSize = 0;\r
}\r
}\r
\r
// Calculating input Checksum\r
- crcc = BMK_checksum(in_buff, benchedsize);\r
+ crcc = BMK_checksum_MMH3A(in_buff, benchedsize);\r
\r
\r
// Bench\r
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);\r
\r
// CRC Checking\r
- crcd = BMK_checksum(in_buff, benchedsize);\r
+ crcd = BMK_checksum_MMH3A(in_buff, benchedsize);\r
if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }\r
}\r
\r
\r
free(in_buff);\r
free(out_buff);\r
+ free(chunkP);\r
}\r
\r
if (nbFiles > 1)\r
// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.\r
#endif\r
\r
-// Unaligned memory access ?\r
-// This feature is automatically enabled for "common" CPU, such as x86.\r
-// For others CPU, you may want to force this option manually to improve performance if your target CPU supports unaligned memory access\r
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.\r
+// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected\r
+// If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance\r
#if defined(__ARM_FEATURE_UNALIGNED)\r
#define LZ4_FORCE_UNALIGNED_ACCESS 1\r
#endif\r
\r
-// Uncomment this parameter if your target system does not support hardware bit count\r
-//#define _FORCE_SW_BITCOUNT\r
+// Uncomment this parameter if your target system or compiler does not support hardware bit count\r
+//#define LZ4_FORCE_SW_BITCOUNT\r
\r
\r
\r
#define inline __forceinline // Visual is not C99, but supports inline\r
#endif\r
\r
-#if (defined(__GNUC__) && (!defined(LZ4_FORCE_UNALIGNED_ACCESS)))\r
-#define _PACKED __attribute__ ((packed))\r
-#else\r
-#define _PACKED\r
-#endif\r
-\r
#ifdef _MSC_VER // Visual Studio\r
#define bswap16(i) _byteswap_ushort(i)\r
#else\r
#define U64 uint64_t\r
#endif\r
\r
+#ifndef LZ4_FORCE_UNALIGNED_ACCESS\r
+#pragma pack(push, 1) \r
+#endif\r
+\r
+typedef struct _U16_S { U16 v; } U16_S;\r
+typedef struct _U32_S { U32 v; } U32_S;\r
+typedef struct _U64_S { U64 v; } U64_S;\r
+\r
+#ifndef LZ4_FORCE_UNALIGNED_ACCESS\r
+#pragma pack(pop) \r
+#endif\r
+\r
+#define A64(x) (((U64_S *)(x))->v)\r
+#define A32(x) (((U32_S *)(x))->v)\r
+#define A16(x) (((U16_S *)(x))->v)\r
+\r
\r
//**************************************\r
// Constants\r
//**************************************\r
#define MINMATCH 4\r
+\r
+#define HASH_LOG COMPRESSIONLEVEL\r
+#define HASHTABLESIZE (1 << HASH_LOG)\r
+#define HASH_MASK (HASHTABLESIZE - 1)\r
+\r
#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION>2?NOTCOMPRESSIBLE_CONFIRMATION:2)\r
#define STACKLIMIT 13\r
#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).\r
#define MAXD_LOG 16\r
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)\r
\r
-#define HASH_LOG COMPRESSIONLEVEL\r
-#define HASHTABLESIZE (1 << HASH_LOG)\r
-#define HASH_MASK (HASHTABLESIZE - 1)\r
-\r
#define ML_BITS 4\r
#define ML_MASK ((1U<<ML_BITS)-1)\r
#define RUN_BITS (8-ML_BITS)\r
HTYPE hashTable[HASHTABLESIZE];\r
};\r
\r
-typedef struct _U64_S\r
-{\r
- U64 v;\r
-} _PACKED U64_S;\r
-\r
-typedef struct _U32_S\r
-{\r
- U32 v;\r
-} _PACKED U32_S;\r
-\r
-typedef struct _U16_S\r
-{\r
- U16 v;\r
-} _PACKED U16_S;\r
-\r
-#define A64(x) (((U64_S *)(x))->v)\r
-#define A32(x) (((U32_S *)(x))->v)\r
-#define A16(x) (((U16_S *)(x))->v)\r
-\r
\r
//**************************************\r
// Macros\r
inline static int LZ4_NbCommonBytes (register U64 val)\r
{\r
#if defined(LZ4_BIG_ENDIAN)\r
- #if defined(_MSC_VER) && !defined(_FORCE_SW_BITCOUNT)\r
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
unsigned long r = 0;\r
_BitScanReverse64( &r, val );\r
return (int)(r>>3);\r
- #elif defined(__GNUC__) && !defined(_FORCE_SW_BITCOUNT)\r
+ #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
return (__builtin_clzll(val) >> 3); \r
#else\r
int r;\r
return r;\r
#endif\r
#else\r
- #if defined(_MSC_VER) && !defined(_FORCE_SW_BITCOUNT)\r
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
unsigned long r = 0;\r
_BitScanForward64( &r, val );\r
return (int)(r>>3);\r
- #elif defined(__GNUC__) && !defined(_FORCE_SW_BITCOUNT)\r
+ #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
return (__builtin_ctzll(val) >> 3); \r
#else\r
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };\r
inline static int LZ4_NbCommonBytes (register U32 val)\r
{\r
#if defined(LZ4_BIG_ENDIAN)\r
- #if defined(_MSC_VER) && !defined(_FORCE_SW_BITCOUNT)\r
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
unsigned long r = 0;\r
_BitScanReverse( &r, val );\r
return (int)(r>>3);\r
- #elif defined(__GNUC__) && !defined(_FORCE_SW_BITCOUNT)\r
+ #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
return (__builtin_clz(val) >> 3); \r
#else\r
int r;\r
return r;\r
#endif\r
#else\r
- #if defined(_MSC_VER) && !defined(_FORCE_SW_BITCOUNT)\r
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
unsigned long r = 0;\r
_BitScanForward( &r, val );\r
return (int)(r>>3);\r
- #elif defined(__GNUC__) && !defined(_FORCE_SW_BITCOUNT)\r
+ #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
return (__builtin_ctz(val) >> 3); \r
#else\r
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };\r
- return DeBruijnBytePos[((U32)((val & -val) * 0x077CB531U)) >> 27];\r
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];\r
#endif\r
#endif\r
}\r
int i,\r
compression=1, // default action if no argument\r
decode=0,\r
- bench=0;\r
+ bench=0,\r
+ filenamesStart=2;\r
char* input_filename=0;\r
char* output_filename=0;\r
#ifdef _WIN32 \r
for(i=1; i<argc; i++)\r
{\r
char* argument = argv[i];\r
- char command = 0;\r
\r
if(!argument) continue; // Protection if argument empty\r
\r
- if (argument[0]=='-') command++; // valid command trigger\r
-\r
// Select command\r
- if (command)\r
+ if (argument[0]=='-')\r
{\r
- argument += command;\r
+ argument ++;\r
\r
// Display help on usage\r
if ( argument[0] =='h' ) { usage(); return 0; }\r
// Bench\r
if ( argument[0] =='b' ) { bench=1; continue; }\r
\r
+ // Modify Block Size (benchmark only)\r
+ if ( argument[0] =='B' ) { int B = argument[1] - '0'; int S = 1 << (10 + 2*B); BMK_SetBlocksize(S); continue; }\r
+\r
// Test\r
if ( argument[0] =='t' ) { decode=1; output_filename=nulmark; continue; }\r
}\r
\r
// first provided filename is input\r
- if (!input_filename) { input_filename=argument; continue; }\r
+ if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }\r
\r
// second provided filename is output\r
if (!output_filename) \r
// No input filename ==> Error\r
if(!input_filename) { badusage(); return 1; }\r
\r
- if (bench) return BMK_benchFile(argv+2, argc-2);\r
+ if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart);\r
\r
// No output filename \r
if (!output_filename) { badusage(); return 1; }\r