Rename initCheck to dirtyContext and use it in LZ4_resetStream_fast() to check if...

author Oleg Khabinov <khabinov@fb.com>

Tue, 18 Sep 2018 18:29:31 +0000 (11:29 -0700)

committer Oleg Khabinov <khabinov@fb.com>

Fri, 28 Sep 2018 21:55:05 +0000 (14:55 -0700)
author Oleg Khabinov <khabinov@fb.com>
Tue, 18 Sep 2018 18:29:31 +0000 (11:29 -0700)
committer Oleg Khabinov <khabinov@fb.com>
Fri, 28 Sep 2018 21:55:05 +0000 (14:55 -0700)
diff --git a/lib/lz4.c b/lib/lz4.c

index d0fa3e8..aed2bb2 100644 (file)
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -610,6 +610,15 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
          LZ4_stream_t_internal* const cctx,
          const int inputSize,
          const tableType_t tableType) {
+    /* If compression failed during the previous step, then the context
+     * is marked as dirty, therefore, it has to be fully reset.
+     */
+    if (cctx->dirtyContext) {
+        DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
+        MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
+        return;
+    }
+
      /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
       * therefore safe to use no matter what mode we're in. Otherwise, we figure
       * out if it's safe to leave as is or whether it needs to be reset.
@@ -660,6 +669,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                   const dictIssue_directive dictIssue,
                   const U32 acceleration)
  {
+    int result;
      const BYTE* ip = (const BYTE*) source;
  
      U32 const startIndex = cctx->currentOffset;
@@ -695,9 +705,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
  
      DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
      /* Init conditions */
-    if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
+    if (outputLimited == fillOutput && maxOutputSize < 1) goto _failure;  /* Impossible to store anything */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) goto _failure;   /* Unsupported inputSize, too large (or negative) */
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) goto _failure;  /* Size too large (not within 64K limit) */
      if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
      assert(acceleration >= 1);
  
@@ -814,7 +824,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
              token = op++;
              if ((outputLimited == limitedOutput) &&  /* Check output buffer overflow */
                  (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
-                return 0;
+                goto _failure;
+
              if ((outputLimited == fillOutput) &&
                  (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
                  op--;
@@ -887,7 +898,7 @@ _next_match:
              if ((outputLimited) &&    /* Check output buffer overflow */
                  (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
                  if (outputLimited == limitedOutput)
-                  return 0;
+                    goto _failure;
                  if (outputLimited == fillOutput) {
                      /* Match description too long : reduce it */
                      U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
@@ -985,7 +996,7 @@ _last_literals:
                  lastRun -= (lastRun+240)/255;
              }
              if (outputLimited == limitedOutput)
-                return 0;
+                goto _failure;
          }
          if (lastRun >= RUN_MASK) {
              size_t accumulator = lastRun - RUN_MASK;
@@ -1004,7 +1015,14 @@ _last_literals:
          *inputConsumed = (int) (((const char*)ip)-source);
      }
      DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
-    return (int)(((char*)op) - dest);
+    result = (int)(((char*)op) - dest);
+    assert(result > 0);
+    return result;
+
+_failure:
+    /* Mark stream as having dirty context, so, it has to be fully reset */
+    cctx->dirtyContext = 1;
+    return 0;
  }
  
  
@@ -1233,6 +1251,12 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
  }
  
  void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
+    /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
+     * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
+     * marked as having dirty context, e.g. requiring full reset.
+     */
+    LZ4_resetStream_fast(working_stream);
+
      if (dictionary_stream != NULL) {
          /* If the current offset is zero, we will never look in the
           * external dictionary context, since there is no value a table
@@ -1276,7 +1300,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch
  
      DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
  
-    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if (streamPtr->dirtyContext) return 0;   /* Uninitialized structure detected */
      LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
      if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
  
diff --git a/lib/lz4.h b/lib/lz4.h

index 4d30857..346f0dc 100644 (file)
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -414,19 +414,22 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or
  #endif
  
  /*! LZ4_resetStream_fast() :
- *  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
- *  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
+ *  Use this to prepare a context for a new chain of calls to a streaming API
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  Note:
+ *  To stay on the safe side, when LZ4_stream_t is used for the first time,
+ *  it should be either created using LZ4_createStream() or
+ *  initialized using LZ4_resetStream().
   *
   *  Note:
   *  Using this in advance of a non-streaming-compression function is redundant,
   *  since they all perform their own custom reset internally.
   *
   *  Differences from LZ4_resetStream():
- *  When an LZ4_stream_t is known to be in a internally coherent state,
- *  it can often be prepared for a new compression with almost no work,
- *  only sometimes falling back to the full, expensive reset
- *  that is always required when the stream is in an indeterminate state
- *  (i.e., the reset performed by LZ4_resetStream()).
+ *  When an LZ4_stream_t is known to be in an internally coherent state,
+ *  it will be prepared for a new compression with almost no work.
+ *  Otherwise, it will fall back to the full, expensive reset.
   *
   *  LZ4_streams are guaranteed to be in a valid state when:
   *  - returned from LZ4_createStream()
@@ -439,9 +442,11 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or
   *    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
   *    that returned success
   *
- *  When a stream isn't known to be in a valid state, it is not safe to pass to
- *  any fastReset or streaming function. It must first be cleansed by the full
- *  LZ4_resetStream().
+ *  Note:
+ *  A stream that was used in a compression call that did not return success
+ *  (e.g., LZ4_compress_fast_continue()), can still be passed to this function,
+ *  however, it's history is not preserved because of previous compression
+ *  failure.
   */
  LZ4LIB_STATIC_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
  
@@ -506,7 +511,7 @@ typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
  struct LZ4_stream_t_internal {
      uint32_t hashTable[LZ4_HASH_SIZE_U32];
      uint32_t currentOffset;
-    uint16_t initCheck;
+    uint16_t dirtyContext;
      uint16_t tableType;
      const uint8_t* dictionary;
      const LZ4_stream_t_internal* dictCtx;
@@ -526,7 +531,7 @@ typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
  struct LZ4_stream_t_internal {
      unsigned int hashTable[LZ4_HASH_SIZE_U32];
      unsigned int currentOffset;
-    unsigned short initCheck;
+    unsigned short dirtyContext;
      unsigned short tableType;
      const unsigned char* dictionary;
      const LZ4_stream_t_internal* dictCtx;
diff --git a/tests/fuzzer.c b/tests/fuzzer.c

index b29e82e..de434f9 100644 (file)
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -744,6 +744,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
              LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
              blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
              FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed");
+            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirtyContext, "context should be good");
  
              /* In the future, it might be desirable to let extDictCtx mode's
               * output diverge from the output generated by regular extDict mode.
@@ -754,19 +755,21 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
              FUZ_CHECKTEST(XXH32(compressedBuffer, blockContinueCompressedSize, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
  
              FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary(), but output buffer is 1 byte too short");
-            LZ4_resetStream(&LZ4_stream);
+            LZ4_resetStream_fast(&LZ4_stream);
              LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
              ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1);
              FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using extDictCtx should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(!LZ4_stream.internal_donotuse.dirtyContext, "context should be dirty");
  
              FUZ_DISPLAYTEST();
-            LZ4_resetStream(&LZ4_stream);
+            LZ4_resetStream_fast(&LZ4_stream);
              LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
              ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
              FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
              FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer");
              FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
              FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirtyContext, "context should be good");
  
              FUZ_DISPLAYTEST();
              LZ4_resetStream_fast(&LZ4_stream);
@@ -776,6 +779,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
              FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer");
              FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
              FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirtyContext, "context should be good");
          }
  
          /* Decompress with dictionary as external */
@@ -990,6 +994,7 @@ static void FUZ_unitTests(int compressionLevel)
          LZ4_resetStream(&streamingState);
          result = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1);
          FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed!");
+        FUZ_CHECKTEST(streamingState.internal_donotuse.dirtyContext, "dirtyContext flag is set")
  
          result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize);
          FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
@@ -1012,7 +1017,7 @@ static void FUZ_unitTests(int compressionLevel)
              XXH64_reset(&xxhOrig, 0);
              XXH64_reset(&xxhNewSafe, 0);
              XXH64_reset(&xxhNewFast, 0);
-            LZ4_resetStream(&streamingState);
+            LZ4_resetStream_fast(&streamingState);
              LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
              LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
author	Oleg Khabinov <khabinov@fb.com>
	Tue, 18 Sep 2018 18:29:31 +0000 (11:29 -0700)
committer	Oleg Khabinov <khabinov@fb.com>
	Fri, 28 Sep 2018 21:55:05 +0000 (14:55 -0700)
lib/lz4.c		patch \| blob \| history
lib/lz4.h		patch \| blob \| history
tests/fuzzer.c		patch \| blob \| history