1 /* LzFind.c -- Match finder for LZ algorithms
\r
2 2009-04-22 : Igor Pavlov : Public domain */
\r
9 #define kEmptyHashValue 0
\r
10 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
\r
11 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
\r
12 #define kNormalizeMask (~(kNormalizeStepMin - 1))
\r
13 #define kMaxHistorySize ((UInt32)3 << 30)
\r
15 #define kStartMaxLen 3
\r
17 static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
\r
19 if (!p->directInput)
\r
21 alloc->Free(alloc, p->bufferBase);
\r
26 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
\r
28 static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
\r
30 UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
\r
33 p->blockSize = blockSize;
\r
36 if (p->bufferBase == 0 || p->blockSize != blockSize)
\r
38 LzInWindow_Free(p, alloc);
\r
39 p->blockSize = blockSize;
\r
40 p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
\r
42 return (p->bufferBase != 0);
\r
45 Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
\r
46 Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
\r
48 UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
\r
50 void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
\r
52 p->posLimit -= subValue;
\r
54 p->streamPos -= subValue;
\r
57 static void MatchFinder_ReadBlock(CMatchFinder *p)
\r
59 if (p->streamEndWasReached || p->result != SZ_OK)
\r
63 UInt32 curSize = 0xFFFFFFFF - p->streamPos;
\r
64 if (curSize > p->directInputRem)
\r
65 curSize = (UInt32)p->directInputRem;
\r
66 p->directInputRem -= curSize;
\r
67 p->streamPos += curSize;
\r
68 if (p->directInputRem == 0)
\r
69 p->streamEndWasReached = 1;
\r
74 Byte *dest = p->buffer + (p->streamPos - p->pos);
\r
75 size_t size = (p->bufferBase + p->blockSize - dest);
\r
78 p->result = p->stream->Read(p->stream, dest, &size);
\r
79 if (p->result != SZ_OK)
\r
83 p->streamEndWasReached = 1;
\r
86 p->streamPos += (UInt32)size;
\r
87 if (p->streamPos - p->pos > p->keepSizeAfter)
\r
92 void MatchFinder_MoveBlock(CMatchFinder *p)
\r
94 memmove(p->bufferBase,
\r
95 p->buffer - p->keepSizeBefore,
\r
96 (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
\r
97 p->buffer = p->bufferBase + p->keepSizeBefore;
\r
100 int MatchFinder_NeedMove(CMatchFinder *p)
\r
102 if (p->directInput)
\r
104 /* if (p->streamEndWasReached) return 0; */
\r
105 return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
\r
108 void MatchFinder_ReadIfRequired(CMatchFinder *p)
\r
110 if (p->streamEndWasReached)
\r
112 if (p->keepSizeAfter >= p->streamPos - p->pos)
\r
113 MatchFinder_ReadBlock(p);
\r
116 static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
\r
118 if (MatchFinder_NeedMove(p))
\r
119 MatchFinder_MoveBlock(p);
\r
120 MatchFinder_ReadBlock(p);
\r
123 static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
\r
127 p->numHashBytes = 4;
\r
131 #define kCrcPoly 0xEDB88320
\r
133 void MatchFinder_Construct(CMatchFinder *p)
\r
137 p->directInput = 0;
\r
139 MatchFinder_SetDefaultSettings(p);
\r
141 for (i = 0; i < 256; i++)
\r
145 for (j = 0; j < 8; j++)
\r
146 r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
\r
151 static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
\r
153 alloc->Free(alloc, p->hash);
\r
157 void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
\r
159 MatchFinder_FreeThisClassMemory(p, alloc);
\r
160 LzInWindow_Free(p, alloc);
\r
163 static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
\r
165 size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
\r
166 if (sizeInBytes / sizeof(CLzRef) != num)
\r
168 return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
\r
171 int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
\r
172 UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
\r
176 if (historySize > kMaxHistorySize)
\r
178 MatchFinder_Free(p, alloc);
\r
181 sizeReserv = historySize >> 1;
\r
182 if (historySize > ((UInt32)2 << 30))
\r
183 sizeReserv = historySize >> 2;
\r
184 sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
\r
186 p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
\r
187 p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
\r
188 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
\r
189 if (LzInWindow_Create(p, sizeReserv, alloc))
\r
191 UInt32 newCyclicBufferSize = historySize + 1;
\r
193 p->matchMaxLen = matchMaxLen;
\r
195 p->fixedHashSize = 0;
\r
196 if (p->numHashBytes == 2)
\r
197 hs = (1 << 16) - 1;
\r
200 hs = historySize - 1;
\r
206 hs |= 0xFFFF; /* don't change it! It's required for Deflate */
\r
207 if (hs > (1 << 24))
\r
209 if (p->numHashBytes == 3)
\r
210 hs = (1 << 24) - 1;
\r
217 if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
\r
218 if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
\r
219 if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
\r
220 hs += p->fixedHashSize;
\r
224 UInt32 prevSize = p->hashSizeSum + p->numSons;
\r
226 p->historySize = historySize;
\r
227 p->hashSizeSum = hs;
\r
228 p->cyclicBufferSize = newCyclicBufferSize;
\r
229 p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
\r
230 newSize = p->hashSizeSum + p->numSons;
\r
231 if (p->hash != 0 && prevSize == newSize)
\r
233 MatchFinder_FreeThisClassMemory(p, alloc);
\r
234 p->hash = AllocRefs(newSize, alloc);
\r
237 p->son = p->hash + p->hashSizeSum;
\r
242 MatchFinder_Free(p, alloc);
\r
246 static void MatchFinder_SetLimits(CMatchFinder *p)
\r
248 UInt32 limit = kMaxValForNormalize - p->pos;
\r
249 UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
\r
250 if (limit2 < limit)
\r
252 limit2 = p->streamPos - p->pos;
\r
253 if (limit2 <= p->keepSizeAfter)
\r
259 limit2 -= p->keepSizeAfter;
\r
260 if (limit2 < limit)
\r
263 UInt32 lenLimit = p->streamPos - p->pos;
\r
264 if (lenLimit > p->matchMaxLen)
\r
265 lenLimit = p->matchMaxLen;
\r
266 p->lenLimit = lenLimit;
\r
268 p->posLimit = p->pos + limit;
\r
271 void MatchFinder_Init(CMatchFinder *p)
\r
274 for (i = 0; i < p->hashSizeSum; i++)
\r
275 p->hash[i] = kEmptyHashValue;
\r
276 p->cyclicBufferPos = 0;
\r
277 p->buffer = p->bufferBase;
\r
278 p->pos = p->streamPos = p->cyclicBufferSize;
\r
280 p->streamEndWasReached = 0;
\r
281 MatchFinder_ReadBlock(p);
\r
282 MatchFinder_SetLimits(p);
\r
285 static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
\r
287 return (p->pos - p->historySize - 1) & kNormalizeMask;
\r
290 void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
\r
293 for (i = 0; i < numItems; i++)
\r
295 UInt32 value = items[i];
\r
296 if (value <= subValue)
\r
297 value = kEmptyHashValue;
\r
304 static void MatchFinder_Normalize(CMatchFinder *p)
\r
306 UInt32 subValue = MatchFinder_GetSubValue(p);
\r
307 MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
\r
308 MatchFinder_ReduceOffsets(p, subValue);
\r
311 static void MatchFinder_CheckLimits(CMatchFinder *p)
\r
313 if (p->pos == kMaxValForNormalize)
\r
314 MatchFinder_Normalize(p);
\r
315 if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
\r
316 MatchFinder_CheckAndMoveAndRead(p);
\r
317 if (p->cyclicBufferPos == p->cyclicBufferSize)
\r
318 p->cyclicBufferPos = 0;
\r
319 MatchFinder_SetLimits(p);
\r
322 static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
\r
323 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
\r
324 UInt32 *distances, UInt32 maxLen)
\r
326 son[_cyclicBufferPos] = curMatch;
\r
329 UInt32 delta = pos - curMatch;
\r
330 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
\r
333 const Byte *pb = cur - delta;
\r
334 curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
\r
335 if (pb[maxLen] == cur[maxLen] && *pb == *cur)
\r
338 while (++len != lenLimit)
\r
339 if (pb[len] != cur[len])
\r
343 *distances++ = maxLen = len;
\r
344 *distances++ = delta - 1;
\r
345 if (len == lenLimit)
\r
353 UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
\r
354 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
\r
355 UInt32 *distances, UInt32 maxLen)
\r
357 CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
\r
358 CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
\r
359 UInt32 len0 = 0, len1 = 0;
\r
362 UInt32 delta = pos - curMatch;
\r
363 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
\r
365 *ptr0 = *ptr1 = kEmptyHashValue;
\r
369 CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
\r
370 const Byte *pb = cur - delta;
\r
371 UInt32 len = (len0 < len1 ? len0 : len1);
\r
372 if (pb[len] == cur[len])
\r
374 if (++len != lenLimit && pb[len] == cur[len])
\r
375 while (++len != lenLimit)
\r
376 if (pb[len] != cur[len])
\r
380 *distances++ = maxLen = len;
\r
381 *distances++ = delta - 1;
\r
382 if (len == lenLimit)
\r
390 if (pb[len] < cur[len])
\r
408 static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
\r
409 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
\r
411 CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
\r
412 CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
\r
413 UInt32 len0 = 0, len1 = 0;
\r
416 UInt32 delta = pos - curMatch;
\r
417 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
\r
419 *ptr0 = *ptr1 = kEmptyHashValue;
\r
423 CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
\r
424 const Byte *pb = cur - delta;
\r
425 UInt32 len = (len0 < len1 ? len0 : len1);
\r
426 if (pb[len] == cur[len])
\r
428 while (++len != lenLimit)
\r
429 if (pb[len] != cur[len])
\r
432 if (len == lenLimit)
\r
440 if (pb[len] < cur[len])
\r
459 ++p->cyclicBufferPos; \
\r
461 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
\r
463 #define MOVE_POS_RET MOVE_POS return offset;
\r
465 static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
\r
467 #define GET_MATCHES_HEADER2(minLen, ret_op) \
\r
468 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
\r
469 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
\r
472 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
\r
473 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
\r
475 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
\r
477 #define GET_MATCHES_FOOTER(offset, maxLen) \
\r
478 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
\r
479 distances + offset, maxLen) - distances); MOVE_POS_RET;
\r
481 #define SKIP_FOOTER \
\r
482 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
\r
484 static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
\r
487 GET_MATCHES_HEADER(2)
\r
489 curMatch = p->hash[hashValue];
\r
490 p->hash[hashValue] = p->pos;
\r
492 GET_MATCHES_FOOTER(offset, 1)
\r
495 UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
\r
498 GET_MATCHES_HEADER(3)
\r
500 curMatch = p->hash[hashValue];
\r
501 p->hash[hashValue] = p->pos;
\r
503 GET_MATCHES_FOOTER(offset, 2)
\r
506 static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
\r
508 UInt32 hash2Value, delta2, maxLen, offset;
\r
509 GET_MATCHES_HEADER(3)
\r
513 delta2 = p->pos - p->hash[hash2Value];
\r
514 curMatch = p->hash[kFix3HashSize + hashValue];
\r
516 p->hash[hash2Value] =
\r
517 p->hash[kFix3HashSize + hashValue] = p->pos;
\r
522 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
\r
524 for (; maxLen != lenLimit; maxLen++)
\r
525 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
\r
527 distances[0] = maxLen;
\r
528 distances[1] = delta2 - 1;
\r
530 if (maxLen == lenLimit)
\r
532 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
\r
536 GET_MATCHES_FOOTER(offset, maxLen)
\r
539 static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
\r
541 UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
\r
542 GET_MATCHES_HEADER(4)
\r
546 delta2 = p->pos - p->hash[ hash2Value];
\r
547 delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
\r
548 curMatch = p->hash[kFix4HashSize + hashValue];
\r
550 p->hash[ hash2Value] =
\r
551 p->hash[kFix3HashSize + hash3Value] =
\r
552 p->hash[kFix4HashSize + hashValue] = p->pos;
\r
556 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
\r
558 distances[0] = maxLen = 2;
\r
559 distances[1] = delta2 - 1;
\r
562 if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
\r
565 distances[offset + 1] = delta3 - 1;
\r
571 for (; maxLen != lenLimit; maxLen++)
\r
572 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
\r
574 distances[offset - 2] = maxLen;
\r
575 if (maxLen == lenLimit)
\r
577 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
\r
583 GET_MATCHES_FOOTER(offset, maxLen)
\r
586 static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
\r
588 UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
\r
589 GET_MATCHES_HEADER(4)
\r
593 delta2 = p->pos - p->hash[ hash2Value];
\r
594 delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
\r
595 curMatch = p->hash[kFix4HashSize + hashValue];
\r
597 p->hash[ hash2Value] =
\r
598 p->hash[kFix3HashSize + hash3Value] =
\r
599 p->hash[kFix4HashSize + hashValue] = p->pos;
\r
603 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
\r
605 distances[0] = maxLen = 2;
\r
606 distances[1] = delta2 - 1;
\r
609 if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
\r
612 distances[offset + 1] = delta3 - 1;
\r
618 for (; maxLen != lenLimit; maxLen++)
\r
619 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
\r
621 distances[offset - 2] = maxLen;
\r
622 if (maxLen == lenLimit)
\r
624 p->son[p->cyclicBufferPos] = curMatch;
\r
630 offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
\r
631 distances + offset, maxLen) - (distances));
\r
635 UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
\r
638 GET_MATCHES_HEADER(3)
\r
640 curMatch = p->hash[hashValue];
\r
641 p->hash[hashValue] = p->pos;
\r
642 offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
\r
643 distances, 2) - (distances));
\r
647 static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
\r
653 curMatch = p->hash[hashValue];
\r
654 p->hash[hashValue] = p->pos;
\r
657 while (--num != 0);
\r
660 void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
\r
666 curMatch = p->hash[hashValue];
\r
667 p->hash[hashValue] = p->pos;
\r
670 while (--num != 0);
\r
673 static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
\r
680 curMatch = p->hash[kFix3HashSize + hashValue];
\r
681 p->hash[hash2Value] =
\r
682 p->hash[kFix3HashSize + hashValue] = p->pos;
\r
685 while (--num != 0);
\r
688 static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
\r
692 UInt32 hash2Value, hash3Value;
\r
695 curMatch = p->hash[kFix4HashSize + hashValue];
\r
696 p->hash[ hash2Value] =
\r
697 p->hash[kFix3HashSize + hash3Value] = p->pos;
\r
698 p->hash[kFix4HashSize + hashValue] = p->pos;
\r
701 while (--num != 0);
\r
704 static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
\r
708 UInt32 hash2Value, hash3Value;
\r
711 curMatch = p->hash[kFix4HashSize + hashValue];
\r
712 p->hash[ hash2Value] =
\r
713 p->hash[kFix3HashSize + hash3Value] =
\r
714 p->hash[kFix4HashSize + hashValue] = p->pos;
\r
715 p->son[p->cyclicBufferPos] = curMatch;
\r
718 while (--num != 0);
\r
721 void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
\r
727 curMatch = p->hash[hashValue];
\r
728 p->hash[hashValue] = p->pos;
\r
729 p->son[p->cyclicBufferPos] = curMatch;
\r
732 while (--num != 0);
\r
735 void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
\r
737 vTable->Init = (Mf_Init_Func)MatchFinder_Init;
\r
738 vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
\r
739 vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
\r
740 vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
\r
743 vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
\r
744 vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
\r
746 else if (p->numHashBytes == 2)
\r
748 vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
\r
749 vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
\r
751 else if (p->numHashBytes == 3)
\r
753 vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
\r
754 vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
\r
758 vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
\r
759 vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
\r