From: Yann Collet Date: Tue, 2 Apr 2019 23:22:11 +0000 (-0700) Subject: created LZ4_FAST_DEC_LOOP build macro X-Git-Tag: upstream/1.9.3~5^2~22^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2589c4424ff56a9e6bb37b2be394e5e0c376e7a5;p=platform%2Fupstream%2Flz4.git created LZ4_FAST_DEC_LOOP build macro --- diff --git a/lib/README.md b/lib/README.md index a705de6..e9b221f 100644 --- a/lib/README.md +++ b/lib/README.md @@ -42,17 +42,28 @@ Should they be nonetheless needed, it's possible to force their publication by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`. +#### Build macros + +The following build macro can be determined at compilation time : + +- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop. + This loops works great on x86/x64 cpus, and is automatically enabled on this platform. + It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor. + Typically with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`, + and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`. + + #### Amalgamation -lz4 code is able to be amalgamated into a single file. -We can combine all source code in `lz4_all.c` by using following command, +lz4 source code can be amalgamated into a single file. +One can combine all source code into `lz4_all.c` by using following command: ``` cat lz4.c > lz4_all.c cat lz4hc.c >> lz4_all.c cat lz4frame.c >> lz4_all.c ``` -and compile `lz4_all.c`. -It's necessary to include all `*.h` files present in `/lib` together with `lz4_all.c`. +(`cat` file order is important) then compile `lz4_all.c`. +All `*.h` files present in `/lib` remain necessary to compile `lz4_all.c`. #### Windows : using MinGW+MSYS to create DLL diff --git a/lib/lz4.c b/lib/lz4.c index de744c5..dafd972 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -202,6 +202,7 @@ typedef size_t reg_t; /* 32-bits in x32 mode */ #endif + /*-************************************ * Reading and writing into memory **************************************/ @@ -235,7 +236,7 @@ static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArc static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -#else /* safe and portable access through memcpy() */ +#else /* safe and portable access using memcpy() */ static U16 LZ4_read16(const void* memPtr) { @@ -301,7 +302,15 @@ static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; -#if defined(__i386__) || defined(__x86_64__) +#ifndef LZ4_FAST_DEC_LOOP +# if defined(__i386__) || defined(__x86_64__) +# define LZ4_FAST_DEC_LOOP 1 +# else +# define LZ4_FAST_DEC_LOOP 0 +# endif +#endif + +#if LZ4_FAST_DEC_LOOP LZ4_FORCE_O2_INLINE_GCC_PPC64LE void LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { if (offset < 8) { @@ -367,6 +376,8 @@ void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con } } #endif + + /*-************************************ * Common Constants **************************************/ @@ -1590,7 +1601,7 @@ LZ4_decompress_generic( if ((endOnInput) && unlikely(srcSize==0)) return -1; /* Currently the fast loop shows a regression on qualcomm arm chips. */ -#if defined(__i386__) || defined(__x86_64__) +#if LZ4_FAST_DEC_LOOP if ((oend - op) < FASTLOOP_SAFE_DISTANCE) goto safe_decode; @@ -1773,7 +1784,7 @@ LZ4_decompress_generic( /* copy literals */ cpy = op+length; -#if defined(__i386__) || defined(__x86_64__) +#if LZ4_FAST_DEC_LOOP safe_literal_copy: #endif LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); @@ -1823,7 +1834,7 @@ LZ4_decompress_generic( } length += MINMATCH; -#if defined(__i386__) || defined(__x86_64__) +#if LZ4_FAST_DEC_LOOP safe_match_copy: #endif /* match starting within external dictionary */