fix #783

author Yann Collet <cyan@fb.com>

Thu, 27 Aug 2020 07:17:57 +0000 (00:17 -0700)

committer Yann Collet <cyan@fb.com>

Thu, 27 Aug 2020 07:17:57 +0000 (00:17 -0700)
author Yann Collet <cyan@fb.com>
Thu, 27 Aug 2020 07:17:57 +0000 (00:17 -0700)
committer Yann Collet <cyan@fb.com>
Thu, 27 Aug 2020 07:17:57 +0000 (00:17 -0700)
diff --git a/lib/lz4.c b/lib/lz4.c

index 06d24da..0628eac 100644 (file)
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1813,7 +1813,8 @@ LZ4_decompress_generic(
              if ((dict==usingExtDict) && (match < lowPrefix)) {
                  if (unlikely(op+length > oend-LASTLITERALS)) {
                      if (partialDecoding) {
-                        length = MIN(length, (size_t)(oend-op));  /* reach end of buffer */
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
                      } else {
                          goto _output_error;  /* end-of-block condition violated */
                  }   }
@@ -1921,29 +1922,34 @@ LZ4_decompress_generic(
                || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
              {
                  /* We've either hit the input parsing restriction or the output parsing restriction.
-                 * If we've hit the input parsing condition then this must be the last sequence.
-                 * If we've hit the output parsing condition then we are either using partialDecoding
-                 * or we've hit the output parsing condition.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
                   */
                  if (partialDecoding) {
                      /* Since we are partial decoding we may be in this block because of the output parsing
                       * restriction, which is not valid since the output buffer is allowed to be undersized.
                       */
                      assert(endOnInput);
-                    /* If we're in this block because of the input parsing condition, then we must be on the
-                     * last sequence (or invalid), so we must check that we exactly consume the input.
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
                       */
-                    if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend) && (cpy != oend)) { goto _output_error; }
-                    assert(ip+length <= iend);
-                    /* We are finishing in the middle of a literals segment.
-                     * Break after the copy.
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
                       */
                      if (cpy > oend) {
                          cpy = oend;
                          assert(op<=oend);
                          length = (size_t)(oend-op);
                      }
-                    assert(ip+length <= iend);
                  } else {
                      /* We must be on the last sequence because of the parsing limitations so check
                       * that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1954,14 +1960,15 @@ LZ4_decompress_generic(
                        */
                      if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
                  }
-                memmove(op, ip, length);  /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
                  ip += length;
                  op += length;
-                /* Necessarily EOF when !partialDecoding. When partialDecoding
-                 * it is EOF if we've either filled the output buffer or hit
-                 * the input parsing restriction.
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
                   */
-                if (!partialDecoding || (cpy == oend) || (ip == iend)) {
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
                      break;
                  }
              } else {
diff --git a/lib/lz4.h b/lib/lz4.h

index 5209c10..5d2475c 100644 (file)
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -221,25 +221,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
   *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
   *  into destination buffer 'dst' of size 'dstCapacity'.
   *  Up to 'targetOutputSize' bytes will be decoded.
- *  The function stops decoding on reaching this objective,
- *  which can boost performance when only the beginning of a block is required.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
   *
- * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
   *           If source stream is detected malformed, function returns a negative result.
   *
- *  Note : @return can be < targetOutputSize, if compressed block contains less data.
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
   *
- *  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
- *           and expects targetOutputSize <= dstCapacity.
- *           It effectively stops decoding on reaching targetOutputSize,
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
   *           so dstCapacity is kind of redundant.
- *           This is because in a previous version of this function,
- *           decoding operation would not "break" a sequence in the middle.
- *           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
   *           it could write more bytes, though only up to dstCapacity.
   *           Some "margin" used to be required for this operation to work properly.
- *           This is no longer necessary.
- *           The function nonetheless keeps its signature, in an effort to not break API.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
   */
  LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
  
diff --git a/tests/fuzzer.c b/tests/fuzzer.c

index cbb53ca..beeb9d6 100644 (file)
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -618,13 +618,13 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
  
          /* Test partial decoding => must work */
          FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
-        {   size_t const missingBytes = FUZ_rand(&randState) % (unsigned)blockSize;
-            int const targetSize = (int)((size_t)blockSize - missingBytes);
+        {   size_t const missingOutBytes = FUZ_rand(&randState) % (unsigned)blockSize;
+            int const targetSize = (int)((size_t)blockSize - missingOutBytes);
              size_t const extraneousInBytes = FUZ_rand(&randState) % 2;
              int const inCSize = (int)((size_t)compressedSize + extraneousInBytes);
              char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
-            //DISPLAY("compressedSize=%i, inCSize=%i \n", compressedSize, inCSize);
-            //DISPLAY("decompressedSize=%i, targetDstSize=%i \n", blockSize, targetSize);
+            DISPLAYLEVEL(6,"compressedSize=%i, inCSize=%i \n", compressedSize, inCSize);
+            DISPLAYLEVEL(6,"decompressedSize=%i, targetDstSize=%i \n", blockSize, targetSize);
              int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, inCSize, targetSize, blockSize);
              FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
              FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize);
author	Yann Collet <cyan@fb.com>
	Thu, 27 Aug 2020 07:17:57 +0000 (00:17 -0700)
committer	Yann Collet <cyan@fb.com>
	Thu, 27 Aug 2020 07:17:57 +0000 (00:17 -0700)
lib/lz4.c		patch \| blob \| history
lib/lz4.h		patch \| blob \| history
tests/fuzzer.c		patch \| blob \| history