2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
9 // This is a derivative work based on Zlib, copyright below:
11 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
13 This software is provided 'as-is', without any express or implied
14 warranty. In no event will the authors be held liable for any damages
15 arising from the use of this software.
17 Permission is granted to anyone to use this software for any purpose,
18 including commercial applications, and to alter it and redistribute it
19 freely, subject to the following restrictions:
21 1. The origin of this software must not be misrepresented; you must not
22 claim that you wrote the original software. If you use this software
23 in a product, an acknowledgment in the product documentation would be
24 appreciated but is not required.
25 2. Altered source versions must be plainly marked as such, and must not be
26 misrepresented as being the original software.
27 3. This notice may not be removed or altered from any source distribution.
29 Jean-loup Gailly Mark Adler
30 jloup@gzip.org madler@alumni.caltech.edu
32 The data format used by the zlib library is described by RFCs (Request for
33 Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
34 (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
37 #ifndef BOOST_BEAST_ZLIB_DETAIL_DEFLATE_STREAM_IPP
38 #define BOOST_BEAST_ZLIB_DETAIL_DEFLATE_STREAM_IPP
40 #include <boost/beast/zlib/detail/deflate_stream.hpp>
41 #include <boost/beast/zlib/detail/ranges.hpp>
42 #include <boost/assert.hpp>
43 #include <boost/config.hpp>
44 #include <boost/make_unique.hpp>
45 #include <boost/optional.hpp>
46 #include <boost/throw_exception.hpp>
52 #include <type_traits>
62 * The "deflation" process depends on being able to identify portions
63 * of the input text which are identical to earlier input (within a
64 * sliding window trailing behind the input currently being processed).
66 * Each code tree is stored in a compressed form which is itself
67 * a Huffman encoding of the lengths of all the code strings (in
68 * ascending order by source values). The actual code strings are
69 * reconstructed from the lengths in the inflate process, as described
70 * in the deflate specification.
72 * The most straightforward technique turns out to be the fastest for
73 * most input files: try all possible matches and select the longest.
74 * The key feature of this algorithm is that insertions into the string
75 * dictionary are very simple and thus fast, and deletions are avoided
76 * completely. Insertions are performed at each input character, whereas
77 * string matches are performed only when the previous match ends. So it
78 * is preferable to spend more time in matches to allow very fast string
79 * insertions and avoid deletions. The matching algorithm for small
80 * strings is inspired from that of Rabin & Karp. A brute force approach
81 * is used to find longer strings when a small match has been found.
82 * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
83 * (by Leonid Broukhis).
84 * A previous version of this file used a more sophisticated algorithm
85 * (by Fiala and Greene) which is guaranteed to run in linear amortized
86 * time, but has a larger average cost, uses more memory and is patented.
87 * However the F&G algorithm may be faster for some highly redundant
88 * files if the parameter max_chain_length (described below) is too large.
92 * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
93 * I found it in 'freeze' written by Leonid Broukhis.
94 * Thanks to many people for bug reports and testing.
98 * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
99 * Available in http://tools.ietf.org/html/rfc1951
101 * A description of the Rabin and Karp algorithm is given in the book
102 * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
104 * Fiala,E.R., and Greene,D.H.
105 * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
109 /* Generate the codes for a given tree and bit counts (which need not be optimal).
110 IN assertion: the array bl_count contains the bit length statistics for
111 the given tree and the field len is set for all tree elements.
112 OUT assertion: the field code is set for all tree elements of non
117 gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count)
119 std::uint16_t next_code[maxBits+1]; /* next code value for each bit length */
120 std::uint16_t code = 0; /* running code value */
121 int bits; /* bit index */
122 int n; /* code index */
124 // The distribution counts are first used to
125 // generate the code values without bit reversal.
126 for(bits = 1; bits <= maxBits; bits++)
128 code = (code + bl_count[bits-1]) << 1;
129 next_code[bits] = code;
131 // Check that the bit counts in bl_count are consistent.
132 // The last code must be all ones.
133 BOOST_ASSERT(code + bl_count[maxBits]-1 == (1<<maxBits)-1);
134 for(n = 0; n <= max_code; n++)
136 int len = tree[n].dl;
139 tree[n].fc = bi_reverse(next_code[len]++, len);
144 deflate_stream::get_lut() ->
153 // number of codes at each bit length for an optimal tree
154 //std::uint16_t bl_count[maxBits+1];
156 // Initialize the mapping length (0..255) -> length code (0..28)
157 std::uint8_t length = 0;
158 for(std::uint8_t code = 0; code < lengthCodes-1; ++code)
160 tables.base_length[code] = length;
161 auto const run = 1U << tables.extra_lbits[code];
162 for(unsigned n = 0; n < run; ++n)
163 tables.length_code[length++] = code;
165 BOOST_ASSERT(length == 0);
166 // Note that the length 255 (match length 258) can be represented
167 // in two different ways: code 284 + 5 bits or code 285, so we
168 // overwrite length_code[255] to use the best encoding:
169 tables.length_code[255] = lengthCodes-1;
171 // Initialize the mapping dist (0..32K) -> dist code (0..29)
174 std::uint16_t dist = 0;
175 for(code = 0; code < 16; code++)
177 tables.base_dist[code] = dist;
178 auto const run = 1U << tables.extra_dbits[code];
179 for(unsigned n = 0; n < run; ++n)
180 tables.dist_code[dist++] = code;
182 BOOST_ASSERT(dist == 256);
183 // from now on, all distances are divided by 128
185 for(; code < dCodes; ++code)
187 tables.base_dist[code] = dist << 7;
188 auto const run = 1U << (tables.extra_dbits[code]-7);
189 for(std::size_t n = 0; n < run; ++n)
190 tables.dist_code[256 + dist++] = code;
192 BOOST_ASSERT(dist == 256);
195 // Construct the codes of the static literal tree
196 std::uint16_t bl_count[maxBits+1];
197 std::memset(bl_count, 0, sizeof(bl_count));
200 tables.ltree[n++].dl = 8;
203 tables.ltree[n++].dl = 9;
206 tables.ltree[n++].dl = 7;
209 tables.ltree[n++].dl = 8;
211 // Codes 286 and 287 do not exist, but we must include them in the tree
212 // construction to get a canonical Huffman tree (longest code all ones)
213 gen_codes(tables.ltree, lCodes+1, bl_count);
215 for(n = 0; n < dCodes; ++n)
217 tables.dtree[n].dl = 5;
219 static_cast<std::uint16_t>(bi_reverse(n, 5));
223 static init const data;
235 if(level == default_size)
238 // VFALCO What do we do about this?
239 // until 256-byte window bug fixed
243 if(level < 0 || level > 9)
244 BOOST_THROW_EXCEPTION(std::invalid_argument{
247 if(windowBits < 8 || windowBits > 15)
248 BOOST_THROW_EXCEPTION(std::invalid_argument{
249 "invalid windowBits"});
251 if(memLevel < 1 || memLevel > max_mem_level)
252 BOOST_THROW_EXCEPTION(std::invalid_argument{
253 "invalid memLevel"});
255 w_bits_ = windowBits;
257 hash_bits_ = memLevel + 7;
259 // 16K elements by default
260 lit_bufsize_ = 1 << (memLevel + 6);
263 strategy_ = strategy;
284 doUpperBound(std::size_t sourceLen) const
289 /* conservative upper bound for compressed data */
290 complen = sourceLen +
291 ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
293 /* compute wrapper length */
296 /* if not default parameters, return conservative bound */
297 if(w_bits_ != 15 || hash_bits_ != 8 + 7)
298 return complen + wraplen;
300 /* default settings: return tight bound for that case */
301 return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
302 (sourceLen >> 25) + 13 - 6 + wraplen;
313 good_match_ = good_length;
314 nice_match_ = nice_length;
315 max_lazy_match_ = max_lazy;
316 max_chain_length_ = max_chain;
321 doParams(z_params& zs, int level, Strategy strategy, error_code& ec)
325 if(level == default_size)
327 if(level < 0 || level > 9)
329 ec = error::stream_error;
332 func = get_config(level_).func;
334 if((strategy != strategy_ || func != get_config(level).func) &&
337 // Flush the last buffer:
338 doWrite(zs, Flush::block, ec);
339 if(ec == error::need_buffers && pending_ == 0)
345 max_lazy_match_ = get_config(level).max_lazy;
346 good_match_ = get_config(level).good_length;
347 nice_match_ = get_config(level).nice_length;
348 max_chain_length_ = get_config(level).max_chain;
350 strategy_ = strategy;
353 // VFALCO boost::optional param is a workaround for
354 // gcc "maybe uninitialized" warning
355 // https://github.com/boostorg/beast/issues/532
359 doWrite(z_params& zs, boost::optional<Flush> flush, error_code& ec)
363 if(zs.next_in == nullptr && zs.avail_in != 0)
364 BOOST_THROW_EXCEPTION(std::invalid_argument{"invalid input"});
366 if(zs.next_out == nullptr ||
367 (status_ == FINISH_STATE && flush != Flush::finish))
369 ec = error::stream_error;
372 if(zs.avail_out == 0)
374 ec = error::need_buffers;
378 // value of flush param for previous deflate call
379 auto old_flush = boost::make_optional<Flush>(
380 last_flush_.is_initialized(),
381 last_flush_ ? *last_flush_ : Flush::none);
385 // Flush as much pending output as possible
389 if(zs.avail_out == 0)
391 /* Since avail_out is 0, deflate will be called again with
392 * more output space, but possibly with both pending and
393 * avail_in equal to zero. There won't be anything to do,
394 * but this is not an error situation so make sure we
395 * return OK instead of BUF_ERROR at next call of deflate:
397 last_flush_ = boost::none;
401 else if(zs.avail_in == 0 && (
402 old_flush && flush <= *old_flush // Caution: depends on enum order
403 ) && flush != Flush::finish)
405 /* Make sure there is something to do and avoid duplicate consecutive
406 * flushes. For repeated and useless calls with Flush::finish, we keep
407 * returning Z_STREAM_END instead of Z_BUF_ERROR.
409 ec = error::need_buffers;
413 // User must not provide more input after the first FINISH:
414 if(status_ == FINISH_STATE && zs.avail_in != 0)
416 ec = error::need_buffers;
420 /* Start a new block or continue the current one.
422 if(zs.avail_in != 0 || lookahead_ != 0 ||
423 (flush != Flush::none && status_ != FINISH_STATE))
429 case Strategy::huffman:
430 bstate = deflate_huff(zs, flush.get());
433 bstate = deflate_rle(zs, flush.get());
437 bstate = (this->*(get_config(level_).func))(zs, flush.get());
442 if(bstate == finish_started || bstate == finish_done)
444 status_ = FINISH_STATE;
446 if(bstate == need_more || bstate == finish_started)
448 if(zs.avail_out == 0)
450 last_flush_ = boost::none; /* avoid BUF_ERROR next call, see above */
453 /* If flush != Flush::none && avail_out == 0, the next call
454 of deflate should use the same flush parameter to make sure
455 that the flush is complete. So we don't have to output an
456 empty block here, this will be done at next call. This also
457 ensures that for a very small output buffer, we emit at most
461 if(bstate == block_done)
463 if(flush == Flush::partial)
467 else if(flush != Flush::block)
469 /* FULL_FLUSH or SYNC_FLUSH */
470 tr_stored_block(nullptr, 0L, 0);
471 /* For a full flush, this empty block will be recognized
472 * as a special marker by inflate_sync().
474 if(flush == Flush::full)
476 clear_hash(); // forget history
486 if(zs.avail_out == 0)
488 last_flush_ = boost::none; /* avoid BUF_ERROR at next call, see above */
494 if(flush == Flush::finish)
496 ec = error::end_of_stream;
501 // VFALCO Warning: untested
504 doDictionary(Byte const* dict, uInt dictLength, error_code& ec)
508 ec = error::stream_error;
514 /* if dict would fill window, just replace the history */
515 if(dictLength >= w_size_)
521 dict += dictLength - w_size_; /* use the tail */
522 dictLength = w_size_;
525 /* insert dict into window and hash */
527 zs.avail_in = dictLength;
528 zs.next_in = (const Byte *)dict;
532 while(lookahead_ >= minMatch)
534 uInt str = strstart_;
535 uInt n = lookahead_ - (minMatch-1);
538 update_hash(ins_h_, window_[str + minMatch-1]);
539 prev_[str & w_mask_] = head_[ins_h_];
540 head_[ins_h_] = (std::uint16_t)str;
545 lookahead_ = minMatch-1;
548 strstart_ += lookahead_;
549 block_start_ = (long)strstart_;
550 insert_ = lookahead_;
552 match_length_ = prev_length_ = minMatch-1;
553 match_available_ = 0;
558 doPrime(int bits, int value, error_code& ec)
562 if((Byte *)(d_buf_) < pending_out_ + ((Buf_size + 7) >> 3))
564 ec = error::need_buffers;
570 int put = Buf_size - bi_valid_;
573 bi_buf_ |= (std::uint16_t)((value & ((1 << put) - 1)) << bi_valid_);
584 doPending(unsigned* value, int* bits)
592 //--------------------------------------------------------------------------
594 // Do lazy initialization
599 // Caller must set these:
606 w_size_ = 1 << w_bits_;
607 w_mask_ = w_size_ - 1;
609 hash_size_ = 1 << hash_bits_;
610 hash_mask_ = hash_size_ - 1;
611 hash_shift_ = ((hash_bits_+minMatch-1)/minMatch);
613 auto const nwindow = w_size_ * 2*sizeof(Byte);
614 auto const nprev = w_size_ * sizeof(std::uint16_t);
615 auto const nhead = hash_size_ * sizeof(std::uint16_t);
616 auto const noverlay = lit_bufsize_ * (sizeof(std::uint16_t)+2);
617 auto const needed = nwindow + nprev + nhead + noverlay;
619 if(! buf_ || buf_size_ != needed)
621 buf_ = boost::make_unique_noinit<
622 std::uint8_t[]>(needed);
626 window_ = reinterpret_cast<Byte*>(buf_.get());
627 prev_ = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow);
628 std::memset(prev_, 0, nprev);
629 head_ = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow + nprev);
631 /* We overlay pending_buf_ and d_buf_ + l_buf_. This works
632 since the average output size for(length, distance)
635 auto overlay = reinterpret_cast<std::uint16_t*>(
636 buf_.get() + nwindow + nprev + nhead);
638 // nothing written to window_ yet
642 reinterpret_cast<std::uint8_t*>(overlay);
644 static_cast<std::uint32_t>(lit_bufsize_) *
645 (sizeof(std::uint16_t) + 2L);
647 d_buf_ = overlay + lit_bufsize_ / sizeof(std::uint16_t);
648 l_buf_ = pending_buf_ + (1 + sizeof(std::uint16_t)) * lit_bufsize_;
651 pending_out_ = pending_buf_;
653 status_ = BUSY_STATE;
654 last_flush_ = Flush::none;
662 /* Initialize the "longest match" routines for a new zlib stream
668 window_size_ = (std::uint32_t)2L*w_size_;
672 /* Set the default configuration parameters:
674 // VFALCO TODO just copy the config struct
675 max_lazy_match_ = get_config(level_).max_lazy;
676 good_match_ = get_config(level_).good_length;
677 nice_match_ = get_config(level_).nice_length;
678 max_chain_length_ = get_config(level_).max_chain;
684 match_length_ = prev_length_ = minMatch-1;
685 match_available_ = 0;
689 // Initialize a new block.
695 for(int n = 0; n < lCodes; n++)
696 dyn_ltree_[n].fc = 0;
697 for(int n = 0; n < dCodes; n++)
698 dyn_dtree_[n].fc = 0;
699 for(int n = 0; n < blCodes; n++)
701 dyn_ltree_[END_BLOCK].fc = 1;
708 /* Restore the heap property by moving down the tree starting at node k,
709 exchanging a node with the smallest of its two sons if necessary,
710 stopping when the heap property is re-established (each father smaller
716 ct_data const* tree, // the tree to restore
717 int k) // node to move down
720 int j = k << 1; // left son of k
721 while(j <= heap_len_)
723 // Set j to the smallest of the two sons:
725 smaller(tree, heap_[j+1], heap_[j]))
727 // Exit if v is smaller than both sons
728 if(smaller(tree, v, heap_[j]))
731 // Exchange v with the smallest son
735 // And continue down the tree,
736 // setting j to the left son of k
742 /* Remove the smallest element from the heap and recreate the heap
743 with one less element. Updates heap and heap_len.
747 pqremove(ct_data const* tree, int& top)
749 top = heap_[kSmallest];
750 heap_[kSmallest] = heap_[heap_len_--];
751 pqdownheap(tree, kSmallest);
754 /* Compute the optimal bit lengths for a tree and update the total bit length
755 for the current block.
756 IN assertion: the fields freq and dad are set, heap[heap_max] and
757 above are the tree nodes sorted by increasing frequency.
758 OUT assertions: the field len is set to the optimal bit length, the
759 array bl_count contains the frequencies for each bit length.
760 The length opt_len is updated; static_len is also updated if stree is
765 gen_bitlen(tree_desc *desc)
767 ct_data *tree = desc->dyn_tree;
768 int max_code = desc->max_code;
769 ct_data const* stree = desc->stat_desc->static_tree;
770 std::uint8_t const *extra = desc->stat_desc->extra_bits;
771 int base = desc->stat_desc->extra_base;
772 int max_length = desc->stat_desc->max_length;
774 int n, m; // iterate over the tree elements
775 int bits; // bit length
776 int xbits; // extra bits
777 std::uint16_t f; // frequency
778 int overflow = 0; // number of elements with bit length too large
780 std::fill(&bl_count_[0], &bl_count_[maxBits+1], std::uint16_t{0});
782 /* In a first pass, compute the optimal bit lengths (which may
783 * overflow in the case of the bit length tree).
785 tree[heap_[heap_max_]].dl = 0; // root of the heap
787 for(h = heap_max_+1; h < HEAP_SIZE; h++) {
789 bits = tree[tree[n].dl].dl + 1;
790 if(bits > max_length) bits = max_length, overflow++;
791 // We overwrite tree[n].dl which is no longer needed
792 tree[n].dl = (std::uint16_t)bits;
795 continue; // not a leaf node
800 xbits = extra[n-base];
802 opt_len_ += (std::uint32_t)f * (bits + xbits);
804 static_len_ += (std::uint32_t)f * (stree[n].dl + xbits);
809 // Find the first bit length which could increase:
813 while(bl_count_[bits] == 0)
815 bl_count_[bits]--; // move one leaf down the tree
816 bl_count_[bits+1] += 2; // move one overflow item as its brother
817 bl_count_[max_length]--;
818 /* The brother of the overflow item also moves one step up,
819 * but this does not affect bl_count[max_length]
825 /* Now recompute all bit lengths, scanning in increasing frequency.
826 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
827 * lengths instead of fixing only the wrong ones. This idea is taken
828 * from 'ar' written by Haruhiko Okumura.)
830 for(bits = max_length; bits != 0; bits--)
838 if((unsigned) tree[m].dl != (unsigned) bits)
840 opt_len_ += ((long)bits - (long)tree[m].dl) *(long)tree[m].fc;
841 tree[m].dl = (std::uint16_t)bits;
848 /* Construct one Huffman tree and assigns the code bit strings and lengths.
849 Update the total bit length for the current block.
850 IN assertion: the field freq is set for all tree elements.
851 OUT assertions: the fields len and code are set to the optimal bit length
852 and corresponding code. The length opt_len is updated; static_len is
853 also updated if stree is not null. The field max_code is set.
857 build_tree(tree_desc *desc)
859 ct_data *tree = desc->dyn_tree;
860 ct_data const* stree = desc->stat_desc->static_tree;
861 int elems = desc->stat_desc->elems;
862 int n, m; // iterate over heap elements
863 int max_code = -1; // largest code with non zero frequency
864 int node; // new node being created
866 /* Construct the initial heap, with least frequent element in
867 * heap[kSmallest]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
868 * heap[0] is not used.
871 heap_max_ = HEAP_SIZE;
873 for(n = 0; n < elems; n++)
877 heap_[++(heap_len_)] = max_code = n;
886 /* The pkzip format requires that at least one distance code exists,
887 * and that at least one bit should be sent even if there is only one
888 * possible code. So to avoid special checks later on we force at least
889 * two codes of non zero frequency.
893 node = heap_[++(heap_len_)] = (max_code < 2 ? ++max_code : 0);
898 static_len_ -= stree[node].dl;
899 // node is 0 or 1 so it does not have extra bits
901 desc->max_code = max_code;
903 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
904 * establish sub-heaps of increasing lengths:
906 for(n = heap_len_/2; n >= 1; n--)
909 /* Construct the Huffman tree by repeatedly combining the least two
912 node = elems; /* next internal node of the tree */
915 pqremove(tree, n); /* n = node of least frequency */
916 m = heap_[kSmallest]; /* m = node of next least frequency */
918 heap_[--(heap_max_)] = n; /* keep the nodes sorted by frequency */
919 heap_[--(heap_max_)] = m;
921 /* Create a new node father of n and m */
922 tree[node].fc = tree[n].fc + tree[m].fc;
923 depth_[node] = (std::uint8_t)((depth_[n] >= depth_[m] ?
924 depth_[n] : depth_[m]) + 1);
925 tree[n].dl = tree[m].dl = (std::uint16_t)node;
926 /* and insert the new node in the heap */
927 heap_[kSmallest] = node++;
928 pqdownheap(tree, kSmallest);
931 while(heap_len_ >= 2);
933 heap_[--(heap_max_)] = heap_[kSmallest];
935 /* At this point, the fields freq and dad are set. We can now
936 * generate the bit lengths.
938 gen_bitlen((tree_desc *)desc);
940 /* The field len is now set, we can generate the bit codes */
941 gen_codes(tree, max_code, bl_count_);
944 /* Scan a literal or distance tree to determine the frequencies
945 of the codes in the bit length tree.
950 ct_data *tree, // the tree to be scanned
951 int max_code) // and its largest code of non zero frequency
953 int n; // iterates over all tree elements
954 int prevlen = -1; // last emitted length
955 int curlen; // length of current code
956 int nextlen = tree[0].dl; // length of next code
957 std::uint16_t count = 0; // repeat count of the current code
958 int max_count = 7; // max repeat count
959 int min_count = 4; // min repeat count
966 tree[max_code+1].dl = (std::uint16_t)0xffff; // guard
968 for(n = 0; n <= max_code; n++)
970 curlen = nextlen; nextlen = tree[n+1].dl;
971 if(++count < max_count && curlen == nextlen)
975 else if(count < min_count)
977 bl_tree_[curlen].fc += count;
981 if(curlen != prevlen) bl_tree_[curlen].fc++;
982 bl_tree_[REP_3_6].fc++;
986 bl_tree_[REPZ_3_10].fc++;
990 bl_tree_[REPZ_11_138].fc++;
999 else if(curlen == nextlen)
1012 /* Send a literal or distance tree in compressed form,
1013 using the codes in bl_tree.
1018 ct_data *tree, // the tree to be scanned
1019 int max_code) // and its largest code of non zero frequency
1021 int n; // iterates over all tree elements
1022 int prevlen = -1; // last emitted length
1023 int curlen; // length of current code
1024 int nextlen = tree[0].dl; // length of next code
1025 int count = 0; // repeat count of the current code
1026 int max_count = 7; // max repeat count
1027 int min_count = 4; // min repeat count
1029 // tree[max_code+1].dl = -1; // guard already set
1036 for(n = 0; n <= max_code; n++)
1039 nextlen = tree[n+1].dl;
1040 if(++count < max_count && curlen == nextlen)
1044 else if(count < min_count)
1048 send_code(curlen, bl_tree_);
1050 while (--count != 0);
1052 else if(curlen != 0)
1054 if(curlen != prevlen)
1056 send_code(curlen, bl_tree_);
1059 BOOST_ASSERT(count >= 3 && count <= 6);
1060 send_code(REP_3_6, bl_tree_);
1061 send_bits(count-3, 2);
1063 else if(count <= 10)
1065 send_code(REPZ_3_10, bl_tree_);
1066 send_bits(count-3, 3);
1070 send_code(REPZ_11_138, bl_tree_);
1071 send_bits(count-11, 7);
1080 else if(curlen == nextlen)
1093 /* Construct the Huffman tree for the bit lengths and return
1094 the index in bl_order of the last bit length code to send.
1100 int max_blindex; // index of last bit length code of non zero freq
1102 // Determine the bit length frequencies for literal and distance trees
1103 scan_tree((ct_data *)dyn_ltree_, l_desc_.max_code);
1104 scan_tree((ct_data *)dyn_dtree_, d_desc_.max_code);
1106 // Build the bit length tree:
1107 build_tree((tree_desc *)(&(bl_desc_)));
1108 /* opt_len now includes the length of the tree representations, except
1109 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1112 /* Determine the number of bit length codes to send. The pkzip format
1113 * requires that at least 4 bit length codes be sent. (appnote.txt says
1114 * 3 but the actual value used is 4.)
1116 for(max_blindex = blCodes-1; max_blindex >= 3; max_blindex--)
1118 if(bl_tree_[lut_.bl_order[max_blindex]].dl != 0)
1121 // Update opt_len to include the bit length tree and counts
1122 opt_len_ += 3*(max_blindex+1) + 5+5+4;
1126 /* Send the header for a block using dynamic Huffman trees: the counts,
1127 the lengths of the bit length codes, the literal tree and the distance
1129 IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1136 int blcodes) // number of codes for each tree
1138 int rank; // index in bl_order
1140 BOOST_ASSERT(lcodes >= 257 && dcodes >= 1 && blcodes >= 4);
1141 BOOST_ASSERT(lcodes <= lCodes && dcodes <= dCodes && blcodes <= blCodes);
1142 send_bits(lcodes-257, 5); // not +255 as stated in appnote.txt
1143 send_bits(dcodes-1, 5);
1144 send_bits(blcodes-4, 4); // not -3 as stated in appnote.txt
1145 for(rank = 0; rank < blcodes; rank++)
1146 send_bits(bl_tree_[lut_.bl_order[rank]].dl, 3);
1147 send_tree((ct_data *)dyn_ltree_, lcodes-1); // literal tree
1148 send_tree((ct_data *)dyn_dtree_, dcodes-1); // distance tree
1151 /* Send the block data compressed using the given Huffman trees
1156 ct_data const* ltree, // literal tree
1157 ct_data const* dtree) // distance tree
1159 unsigned dist; /* distance of matched string */
1160 int lc; /* match length or unmatched char (if dist == 0) */
1161 unsigned lx = 0; /* running index in l_buf */
1162 unsigned code; /* the code to send */
1163 int extra; /* number of extra bits to send */
1173 send_code(lc, ltree); /* send a literal byte */
1177 /* Here, lc is the match length - minMatch */
1178 code = lut_.length_code[lc];
1179 send_code(code+literals+1, ltree); /* send the length code */
1180 extra = lut_.extra_lbits[code];
1183 lc -= lut_.base_length[code];
1184 send_bits(lc, extra); /* send the extra length bits */
1186 dist--; /* dist is now the match distance - 1 */
1187 code = d_code(dist);
1188 BOOST_ASSERT(code < dCodes);
1190 send_code(code, dtree); /* send the distance code */
1191 extra = lut_.extra_dbits[code];
1194 dist -= lut_.base_dist[code];
1195 send_bits(dist, extra); /* send the extra distance bits */
1197 } /* literal or match pair ? */
1199 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1200 BOOST_ASSERT((uInt)(pending_) < lit_bufsize_ + 2*lx);
1202 while(lx < last_lit_);
1205 send_code(END_BLOCK, ltree);
1208 /* Check if the data type is TEXT or BINARY, using the following algorithm:
1209 - TEXT if the two conditions below are satisfied:
1210 a) There are no non-portable control characters belonging to the
1211 "black list" (0..6, 14..25, 28..31).
1212 b) There is at least one printable character belonging to the
1213 "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1215 - The following partially-portable control characters form a
1216 "gray list" that is ignored in this detection algorithm:
1217 (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1218 IN assertion: the fields fc of dyn_ltree are set.
1224 /* black_mask is the bit mask of black-listed bytes
1225 * set bits 0..6, 14..25, and 28..31
1226 * 0xf3ffc07f = binary 11110011111111111100000001111111
1228 unsigned long black_mask = 0xf3ffc07fUL;
1231 // Check for non-textual ("black-listed") bytes.
1232 for(n = 0; n <= 31; n++, black_mask >>= 1)
1233 if((black_mask & 1) && (dyn_ltree_[n].fc != 0))
1236 // Check for textual ("white-listed") bytes. */
1237 if(dyn_ltree_[9].fc != 0 || dyn_ltree_[10].fc != 0
1238 || dyn_ltree_[13].fc != 0)
1240 for(n = 32; n < literals; n++)
1241 if(dyn_ltree_[n].fc != 0)
1244 /* There are no "black-listed" or "white-listed" bytes:
1245 * this stream either is empty or has tolerated ("gray-listed") bytes only.
1250 /* Flush the bit buffer and align the output on a byte boundary
1258 else if(bi_valid_ > 0)
1259 put_byte((Byte)bi_buf_);
1264 /* Flush the bit buffer, keeping at most 7 bits in it.
1276 else if(bi_valid_ >= 8)
1278 put_byte((Byte)bi_buf_);
1284 /* Copy a stored block, storing first the length and its
1285 one's complement if requested.
1290 char *buf, // the input data
1291 unsigned len, // its length
1292 int header) // true if block header must be written
1294 bi_windup(); // align on byte boundary
1298 put_short((std::uint16_t)len);
1299 put_short((std::uint16_t)~len);
1302 std::memcpy(&pending_buf_[pending_], buf, len);
1306 //------------------------------------------------------------------------------
1308 /* Initialize the tree data structures for a new zlib stream.
1314 l_desc_.dyn_tree = dyn_ltree_;
1315 l_desc_.stat_desc = &lut_.l_desc;
1317 d_desc_.dyn_tree = dyn_dtree_;
1318 d_desc_.stat_desc = &lut_.d_desc;
1320 bl_desc_.dyn_tree = bl_tree_;
1321 bl_desc_.stat_desc = &lut_.bl_desc;
1326 // Initialize the first block of the first file:
1330 /* Send one empty static block to give enough lookahead for inflate.
1331 This takes 10 bits, of which 7 may remain in the bit buffer.
1337 send_bits(STATIC_TREES<<1, 3);
1338 send_code(END_BLOCK, lut_.ltree);
1342 /* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
1351 /* Send a stored block
1356 char *buf, // input block
1357 std::uint32_t stored_len, // length of input block
1358 int last) // one if this is the last block for a file
1360 send_bits((STORED_BLOCK<<1)+last, 3); // send block type
1361 copy_block(buf, (unsigned)stored_len, 1); // with header
1366 tr_tally_dist(std::uint16_t dist, std::uint8_t len, bool& flush)
1368 d_buf_[last_lit_] = dist;
1369 l_buf_[last_lit_++] = len;
1371 dyn_ltree_[lut_.length_code[len]+literals+1].fc++;
1372 dyn_dtree_[d_code(dist)].fc++;
1373 flush = (last_lit_ == lit_bufsize_-1);
1378 tr_tally_lit(std::uint8_t c, bool& flush)
1380 d_buf_[last_lit_] = 0;
1381 l_buf_[last_lit_++] = c;
1383 flush = (last_lit_ == lit_bufsize_-1);
1386 //------------------------------------------------------------------------------
1388 /* Determine the best encoding for the current block: dynamic trees,
1389 static trees or store, and output the encoded block to the zip file.
1395 char *buf, // input block, or NULL if too old
1396 std::uint32_t stored_len, // length of input block
1397 int last) // one if this is the last block for a file
1399 std::uint32_t opt_lenb;
1400 std::uint32_t static_lenb; // opt_len and static_len in bytes
1401 int max_blindex = 0; // index of last bit length code of non zero freq
1403 // Build the Huffman trees unless a stored block is forced
1406 // Check if the file is binary or text
1407 if(zs.data_type == unknown)
1408 zs.data_type = detect_data_type();
1410 // Construct the literal and distance trees
1411 build_tree((tree_desc *)(&(l_desc_)));
1413 build_tree((tree_desc *)(&(d_desc_)));
1414 /* At this point, opt_len and static_len are the total bit lengths of
1415 * the compressed block data, excluding the tree representations.
1418 /* Build the bit length tree for the above two trees, and get the index
1419 * in bl_order of the last bit length code to send.
1421 max_blindex = build_bl_tree();
1423 /* Determine the best encoding. Compute the block lengths in bytes. */
1424 opt_lenb = (opt_len_+3+7)>>3;
1425 static_lenb = (static_len_+3+7)>>3;
1427 if(static_lenb <= opt_lenb)
1428 opt_lenb = static_lenb;
1432 // VFALCO This assertion fails even in the original ZLib,
1433 // happens with strategy == Z_HUFFMAN_ONLY, see:
1434 // https://github.com/madler/zlib/issues/172
1439 opt_lenb = static_lenb = stored_len + 5; // force a stored block
1443 if(buf != (char*)0) { /* force stored block */
1445 if(stored_len+4 <= opt_lenb && buf != (char*)0) {
1446 /* 4: two words for the lengths */
1448 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
1449 * Otherwise we can't have processed more than WSIZE input bytes since
1450 * the last block flush, because compression would have been
1451 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
1452 * transform a block into a stored block.
1454 tr_stored_block(buf, stored_len, last);
1458 else if(static_lenb >= 0)
1460 // force static trees
1463 else if(strategy_ == Strategy::fixed || static_lenb == opt_lenb)
1466 send_bits((STATIC_TREES<<1)+last, 3);
1467 compress_block(lut_.ltree, lut_.dtree);
1471 send_bits((DYN_TREES<<1)+last, 3);
1472 send_all_trees(l_desc_.max_code+1, d_desc_.max_code+1,
1474 compress_block((const ct_data *)dyn_ltree_,
1475 (const ct_data *)dyn_dtree_);
1477 /* The above check is made mod 2^32, for files larger than 512 MB
1478 * and std::size_t implemented on 32 bits.
1488 fill_window(z_params& zs)
1491 unsigned more; // Amount of free space at the end of the window.
1493 uInt wsize = w_size_;
1497 more = (unsigned)(window_size_ -
1498 (std::uint32_t)lookahead_ -(std::uint32_t)strstart_);
1500 // VFALCO We don't support systems below 32-bit
1502 // Deal with !@#$% 64K limit:
1503 if(sizeof(int) <= 2)
1505 if(more == 0 && strstart_ == 0 && lookahead_ == 0)
1509 else if(more == (unsigned)(-1))
1511 /* Very unlikely, but possible on 16 bit machine if
1512 * strstart == 0 && lookahead == 1 (input done a byte at time)
1519 /* If the window is almost full and there is insufficient lookahead,
1520 move the upper half to the lower one to make room in the upper half.
1522 if(strstart_ >= wsize+max_dist())
1524 std::memcpy(window_, window_+wsize, (unsigned)wsize);
1525 match_start_ -= wsize;
1526 strstart_ -= wsize; // we now have strstart >= max_dist
1527 block_start_ -= (long) wsize;
1529 /* Slide the hash table (could be avoided with 32 bit values
1530 at the expense of memory usage). We slide even when level == 0
1531 to keep the hash table consistent if we switch back to level > 0
1532 later. (Using level 0 permanently is not an optimal usage of
1533 zlib, so we don't care about this pathological case.)
1540 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
1549 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
1550 /* If n is not on any hash chain, prev[n] is garbage but
1551 its value will never be used.
1557 if(zs.avail_in == 0)
1560 /* If there was no sliding:
1561 strstart <= WSIZE+max_dist-1 && lookahead <= kMinLookahead - 1 &&
1562 more == window_size - lookahead - strstart
1563 => more >= window_size - (kMinLookahead-1 + WSIZE + max_dist-1)
1564 => more >= window_size - 2*WSIZE + 2
1565 In the BIG_MEM or MMAP case (not yet supported),
1566 window_size == input_size + kMinLookahead &&
1567 strstart + lookahead_ <= input_size => more >= kMinLookahead.
1568 Otherwise, window_size == 2*WSIZE so more >= 2.
1569 If there was sliding, more >= WSIZE. So in all cases, more >= 2.
1571 n = read_buf(zs, window_ + strstart_ + lookahead_, more);
1574 // Initialize the hash value now that we have some input:
1575 if(lookahead_ + insert_ >= minMatch)
1577 uInt str = strstart_ - insert_;
1578 ins_h_ = window_[str];
1579 update_hash(ins_h_, window_[str + 1]);
1582 update_hash(ins_h_, window_[str + minMatch-1]);
1583 prev_[str & w_mask_] = head_[ins_h_];
1584 head_[ins_h_] = (std::uint16_t)str;
1587 if(lookahead_ + insert_ < minMatch)
1591 /* If the whole input has less than minMatch bytes, ins_h is garbage,
1592 but this is not important since only literal bytes will be emitted.
1595 while(lookahead_ < kMinLookahead && zs.avail_in != 0);
1597 /* If the kWinInit bytes after the end of the current data have never been
1598 written, then zero those bytes in order to avoid memory check reports of
1599 the use of uninitialized (or uninitialised as Julian writes) bytes by
1600 the longest match routines. Update the high water mark for the next
1601 time through here. kWinInit is set to maxMatch since the longest match
1602 routines allow scanning to strstart + maxMatch, ignoring lookahead.
1604 if(high_water_ < window_size_)
1606 std::uint32_t curr = strstart_ + (std::uint32_t)(lookahead_);
1607 std::uint32_t winit;
1609 if(high_water_ < curr)
1611 /* Previous high water mark below current data -- zero kWinInit
1612 bytes or up to end of window, whichever is less.
1614 winit = window_size_ - curr;
1615 if(winit > kWinInit)
1617 std::memset(window_ + curr, 0, (unsigned)winit);
1618 high_water_ = curr + winit;
1620 else if(high_water_ < (std::uint32_t)curr + kWinInit)
1622 /* High water mark at or above current data, but below current data
1623 plus kWinInit -- zero out to current data plus kWinInit, or up
1624 to end of window, whichever is less.
1626 winit = (std::uint32_t)curr + kWinInit - high_water_;
1627 if(winit > window_size_ - high_water_)
1628 winit = window_size_ - high_water_;
1629 std::memset(window_ + high_water_, 0, (unsigned)winit);
1630 high_water_ += winit;
1635 /* Flush as much pending output as possible. All write() output goes
1636 through this function so some applications may wish to modify it
1637 to avoid allocating a large strm->next_out buffer and copying into it.
1638 (See also read_buf()).
1642 flush_pending(z_params& zs)
1645 auto len = clamp(pending_, zs.avail_out);
1649 std::memcpy(zs.next_out, pending_out_, len);
1651 static_cast<std::uint8_t*>(zs.next_out) + len;
1652 pending_out_ += len;
1653 zs.total_out += len;
1654 zs.avail_out -= len;
1657 pending_out_ = pending_buf_;
1660 /* Flush the current block, with given end-of-file flag.
1661 IN assertion: strstart is set to the end of the current match.
1665 flush_block(z_params& zs, bool last)
1668 (block_start_ >= 0L ?
1669 (char *)&window_[(unsigned)block_start_] :
1671 (std::uint32_t)((long)strstart_ - block_start_),
1673 block_start_ = strstart_;
1677 /* Read a new buffer from the current input stream, update the adler32
1678 and total number of bytes read. All write() input goes through
1679 this function so some applications may wish to modify it to avoid
1680 allocating a large strm->next_in buffer and copying from it.
1681 (See also flush_pending()).
1685 read_buf(z_params& zs, Byte *buf, unsigned size)
1687 auto len = clamp(zs.avail_in, size);
1693 std::memcpy(buf, zs.next_in, len);
1694 zs.next_in = static_cast<
1695 std::uint8_t const*>(zs.next_in) + len;
1700 /* Set match_start to the longest match starting at the given string and
1701 return its length. Matches shorter or equal to prev_length are discarded,
1702 in which case the result is equal to prev_length and match_start is
1704 IN assertions: cur_match is the head of the hash chain for the current
1705 string (strstart) and its distance is <= max_dist, and prev_length >= 1
1706 OUT assertion: the match length is not greater than s->lookahead_.
1708 For 80x86 and 680x0, an optimized version will be provided in match.asm or
1709 match.S. The code will be functionally equivalent.
1713 longest_match(IPos cur_match)
1715 unsigned chain_length = max_chain_length_;/* max hash chain length */
1716 Byte *scan = window_ + strstart_; /* current string */
1717 Byte *match; /* matched string */
1718 int len; /* length of current match */
1719 int best_len = prev_length_; /* best match length so far */
1720 int nice_match = nice_match_; /* stop if match long enough */
1721 IPos limit = strstart_ > (IPos)max_dist() ?
1722 strstart_ - (IPos)max_dist() : 0;
1723 /* Stop when cur_match becomes <= limit. To simplify the code,
1724 * we prevent matches with the string of window index 0.
1726 std::uint16_t *prev = prev_;
1727 uInt wmask = w_mask_;
1729 Byte *strend = window_ + strstart_ + maxMatch;
1730 Byte scan_end1 = scan[best_len-1];
1731 Byte scan_end = scan[best_len];
1733 /* The code is optimized for HASH_BITS >= 8 and maxMatch-2 multiple of 16.
1734 * It is easy to get rid of this optimization if necessary.
1736 BOOST_ASSERT(hash_bits_ >= 8 && maxMatch == 258);
1738 /* Do not waste too much time if we already have a good match: */
1739 if(prev_length_ >= good_match_) {
1742 /* Do not look for matches beyond the end of the input. This is necessary
1743 * to make deflate deterministic.
1745 if((uInt)nice_match > lookahead_)
1746 nice_match = lookahead_;
1748 BOOST_ASSERT((std::uint32_t)strstart_ <= window_size_-kMinLookahead);
1751 BOOST_ASSERT(cur_match < strstart_);
1752 match = window_ + cur_match;
1754 /* Skip to next match if the match length cannot increase
1755 * or if the match length is less than 2. Note that the checks below
1756 * for insufficient lookahead only occur occasionally for performance
1757 * reasons. Therefore uninitialized memory will be accessed, and
1758 * conditional jumps will be made that depend on those values.
1759 * However the length of the match is limited to the lookahead, so
1760 * the output of deflate is not affected by the uninitialized values.
1762 if( match[best_len] != scan_end ||
1763 match[best_len-1] != scan_end1 ||
1765 *++match != scan[1])
1768 /* The check at best_len-1 can be removed because it will be made
1769 * again later. (This heuristic is not always a win.)
1770 * It is not necessary to compare scan[2] and match[2] since they
1771 * are always equal when the other bytes match, given that
1772 * the hash keys are equal and that HASH_BITS >= 8.
1775 BOOST_ASSERT(*scan == *match);
1777 /* We check for insufficient lookahead only every 8th comparison;
1778 * the 256th check will be made at strstart+258.
1783 while( *++scan == *++match && *++scan == *++match &&
1784 *++scan == *++match && *++scan == *++match &&
1785 *++scan == *++match && *++scan == *++match &&
1786 *++scan == *++match && *++scan == *++match &&
1789 BOOST_ASSERT(scan <= window_+(unsigned)(window_size_-1));
1791 len = maxMatch - (int)(strend - scan);
1792 scan = strend - maxMatch;
1794 if(len > best_len) {
1795 match_start_ = cur_match;
1797 if(len >= nice_match) break;
1798 scan_end1 = scan[best_len-1];
1799 scan_end = scan[best_len];
1802 while((cur_match = prev[cur_match & wmask]) > limit
1803 && --chain_length != 0);
1805 if((uInt)best_len <= lookahead_)
1806 return (uInt)best_len;
1810 //------------------------------------------------------------------------------
1812 /* Copy without compression as much as possible from the input stream, return
1813 the current block state.
1814 This function does not insert new strings in the dictionary since
1815 uncompressible data is probably not useful. This function is used
1816 only for the level=0 compression option.
1817 NOTE: this function should be optimized to avoid extra copying from
1818 window to pending_buf.
1822 f_stored(z_params& zs, Flush flush) ->
1825 /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
1826 * to pending_buf_size, and each stored block has a 5 byte header:
1828 std::uint32_t max_block_size = 0xffff;
1829 std::uint32_t max_start;
1831 if(max_block_size > pending_buf_size_ - 5) {
1832 max_block_size = pending_buf_size_ - 5;
1835 /* Copy as much as possible from input to output: */
1837 /* Fill the window as much as possible: */
1838 if(lookahead_ <= 1) {
1840 BOOST_ASSERT(strstart_ < w_size_+max_dist() ||
1841 block_start_ >= (long)w_size_);
1844 if(lookahead_ == 0 && flush == Flush::none)
1847 if(lookahead_ == 0) break; /* flush the current block */
1849 BOOST_ASSERT(block_start_ >= 0L);
1851 strstart_ += lookahead_;
1854 /* Emit a stored block if pending_buf will be full: */
1855 max_start = block_start_ + max_block_size;
1856 if(strstart_ == 0 || (std::uint32_t)strstart_ >= max_start) {
1857 /* strstart == 0 is possible when wraparound on 16-bit machine */
1858 lookahead_ = (uInt)(strstart_ - max_start);
1859 strstart_ = (uInt)max_start;
1860 flush_block(zs, false);
1861 if(zs.avail_out == 0)
1864 /* Flush if we may have to slide, otherwise block_start may become
1865 * negative and the data will be gone:
1867 if(strstart_ - (uInt)block_start_ >= max_dist()) {
1868 flush_block(zs, false);
1869 if(zs.avail_out == 0)
1874 if(flush == Flush::finish)
1876 flush_block(zs, true);
1877 if(zs.avail_out == 0)
1878 return finish_started;
1881 if((long)strstart_ > block_start_)
1883 flush_block(zs, false);
1884 if(zs.avail_out == 0)
1890 /* Compress as much as possible from the input stream, return the current
1892 This function does not perform lazy evaluation of matches and inserts
1893 new strings in the dictionary only for unmatched strings or for short
1894 matches. It is used only for the fast compression options.
1898 f_fast(z_params& zs, Flush flush) ->
1901 IPos hash_head; /* head of the hash chain */
1902 bool bflush; /* set if current block must be flushed */
1906 /* Make sure that we always have enough lookahead, except
1907 * at the end of the input file. We need maxMatch bytes
1908 * for the next match, plus minMatch bytes to insert the
1909 * string following the next match.
1911 if(lookahead_ < kMinLookahead)
1914 if(lookahead_ < kMinLookahead && flush == Flush::none)
1917 break; /* flush the current block */
1920 /* Insert the string window[strstart .. strstart+2] in the
1921 * dictionary, and set hash_head to the head of the hash chain:
1924 if(lookahead_ >= minMatch) {
1925 insert_string(hash_head);
1928 /* Find the longest match, discarding those <= prev_length.
1929 * At this point we have always match_length < minMatch
1931 if(hash_head != 0 && strstart_ - hash_head <= max_dist()) {
1932 /* To simplify the code, we prevent matches with the string
1933 * of window index 0 (in particular we have to avoid a match
1934 * of the string with itself at the start of the input file).
1936 match_length_ = longest_match (hash_head);
1937 /* longest_match() sets match_start */
1939 if(match_length_ >= minMatch)
1941 tr_tally_dist(static_cast<std::uint16_t>(strstart_ - match_start_),
1942 static_cast<std::uint8_t>(match_length_ - minMatch), bflush);
1944 lookahead_ -= match_length_;
1946 /* Insert new strings in the hash table only if the match length
1947 * is not too large. This saves time but degrades compression.
1949 if(match_length_ <= max_lazy_match_ &&
1950 lookahead_ >= minMatch) {
1951 match_length_--; /* string at strstart already in table */
1955 insert_string(hash_head);
1956 /* strstart never exceeds WSIZE-maxMatch, so there are
1957 * always minMatch bytes ahead.
1960 while(--match_length_ != 0);
1965 strstart_ += match_length_;
1967 ins_h_ = window_[strstart_];
1968 update_hash(ins_h_, window_[strstart_+1]);
1969 /* If lookahead < minMatch, ins_h is garbage, but it does not
1970 * matter since it will be recomputed at next deflate call.
1976 /* No match, output a literal byte */
1977 tr_tally_lit(window_[strstart_], bflush);
1983 flush_block(zs, false);
1984 if(zs.avail_out == 0)
1988 insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
1989 if(flush == Flush::finish)
1991 flush_block(zs, true);
1992 if(zs.avail_out == 0)
1993 return finish_started;
1998 flush_block(zs, false);
1999 if(zs.avail_out == 0)
2005 /* Same as above, but achieves better compression. We use a lazy
2006 evaluation for matches: a match is finally adopted only if there is
2007 no better match at the next window position.
2011 f_slow(z_params& zs, Flush flush) ->
2014 IPos hash_head; /* head of hash chain */
2015 bool bflush; /* set if current block must be flushed */
2017 /* Process the input block. */
2020 /* Make sure that we always have enough lookahead, except
2021 * at the end of the input file. We need maxMatch bytes
2022 * for the next match, plus minMatch bytes to insert the
2023 * string following the next match.
2025 if(lookahead_ < kMinLookahead)
2028 if(lookahead_ < kMinLookahead && flush == Flush::none)
2031 break; /* flush the current block */
2034 /* Insert the string window[strstart .. strstart+2] in the
2035 * dictionary, and set hash_head to the head of the hash chain:
2038 if(lookahead_ >= minMatch)
2039 insert_string(hash_head);
2041 /* Find the longest match, discarding those <= prev_length.
2043 prev_length_ = match_length_, prev_match_ = match_start_;
2044 match_length_ = minMatch-1;
2046 if(hash_head != 0 && prev_length_ < max_lazy_match_ &&
2047 strstart_ - hash_head <= max_dist())
2049 /* To simplify the code, we prevent matches with the string
2050 * of window index 0 (in particular we have to avoid a match
2051 * of the string with itself at the start of the input file).
2053 match_length_ = longest_match(hash_head);
2054 /* longest_match() sets match_start */
2056 if(match_length_ <= 5 && (strategy_ == Strategy::filtered
2057 || (match_length_ == minMatch &&
2058 strstart_ - match_start_ > kTooFar)
2061 /* If prev_match is also minMatch, match_start is garbage
2062 * but we will ignore the current match anyway.
2064 match_length_ = minMatch-1;
2067 /* If there was a match at the previous step and the current
2068 * match is not better, output the previous match:
2070 if(prev_length_ >= minMatch && match_length_ <= prev_length_)
2072 /* Do not insert strings in hash table beyond this. */
2073 uInt max_insert = strstart_ + lookahead_ - minMatch;
2076 static_cast<std::uint16_t>(strstart_ -1 - prev_match_),
2077 static_cast<std::uint8_t>(prev_length_ - minMatch), bflush);
2079 /* Insert in hash table all strings up to the end of the match.
2080 * strstart-1 and strstart are already inserted. If there is not
2081 * enough lookahead, the last two strings are not inserted in
2084 lookahead_ -= prev_length_-1;
2087 if(++strstart_ <= max_insert)
2088 insert_string(hash_head);
2090 while(--prev_length_ != 0);
2091 match_available_ = 0;
2092 match_length_ = minMatch-1;
2097 flush_block(zs, false);
2098 if(zs.avail_out == 0)
2103 else if(match_available_)
2105 /* If there was no match at the previous position, output a
2106 * single literal. If there was a match but the current match
2107 * is longer, truncate the previous match to a single literal.
2109 tr_tally_lit(window_[strstart_-1], bflush);
2111 flush_block(zs, false);
2114 if(zs.avail_out == 0)
2119 /* There is no previous match to compare with, wait for
2120 * the next step to decide.
2122 match_available_ = 1;
2127 BOOST_ASSERT(flush != Flush::none);
2128 if(match_available_)
2130 tr_tally_lit(window_[strstart_-1], bflush);
2131 match_available_ = 0;
2133 insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2134 if(flush == Flush::finish)
2136 flush_block(zs, true);
2137 if(zs.avail_out == 0)
2138 return finish_started;
2143 flush_block(zs, false);
2144 if(zs.avail_out == 0)
2150 /* For Strategy::rle, simply look for runs of bytes, generate matches only of distance
2151 one. Do not maintain a hash table. (It will be regenerated if this run of
2152 deflate switches away from Strategy::rle.)
2156 f_rle(z_params& zs, Flush flush) ->
2159 bool bflush; // set if current block must be flushed
2160 uInt prev; // byte at distance one to match
2161 Byte *scan, *strend; // scan goes up to strend for length of run
2165 /* Make sure that we always have enough lookahead, except
2166 * at the end of the input file. We need maxMatch bytes
2167 * for the longest run, plus one for the unrolled loop.
2169 if(lookahead_ <= maxMatch) {
2171 if(lookahead_ <= maxMatch && flush == Flush::none) {
2174 if(lookahead_ == 0) break; /* flush the current block */
2177 /* See how many times the previous byte repeats */
2179 if(lookahead_ >= minMatch && strstart_ > 0) {
2180 scan = window_ + strstart_ - 1;
2182 if(prev == *++scan && prev == *++scan && prev == *++scan) {
2183 strend = window_ + strstart_ + maxMatch;
2185 } while(prev == *++scan && prev == *++scan &&
2186 prev == *++scan && prev == *++scan &&
2187 prev == *++scan && prev == *++scan &&
2188 prev == *++scan && prev == *++scan &&
2190 match_length_ = maxMatch - (int)(strend - scan);
2191 if(match_length_ > lookahead_)
2192 match_length_ = lookahead_;
2194 BOOST_ASSERT(scan <= window_+(uInt)(window_size_-1));
2197 /* Emit match if have run of minMatch or longer, else emit literal */
2198 if(match_length_ >= minMatch) {
2199 tr_tally_dist(std::uint16_t{1},
2200 static_cast<std::uint8_t>(match_length_ - minMatch),
2203 lookahead_ -= match_length_;
2204 strstart_ += match_length_;
2207 /* No match, output a literal byte */
2208 tr_tally_lit(window_[strstart_], bflush);
2214 flush_block(zs, false);
2215 if(zs.avail_out == 0)
2220 if(flush == Flush::finish)
2222 flush_block(zs, true);
2223 if(zs.avail_out == 0)
2224 return finish_started;
2229 flush_block(zs, false);
2230 if(zs.avail_out == 0)
2236 /* ===========================================================================
2237 * For Strategy::huffman, do not look for matches. Do not maintain a hash table.
2238 * (It will be regenerated if this run of deflate switches away from Huffman.)
2242 f_huff(z_params& zs, Flush flush) ->
2245 bool bflush; // set if current block must be flushed
2249 // Make sure that we have a literal to write.
2255 if(flush == Flush::none)
2257 break; // flush the current block
2261 // Output a literal byte
2263 tr_tally_lit(window_[strstart_], bflush);
2268 flush_block(zs, false);
2269 if(zs.avail_out == 0)
2274 if(flush == Flush::finish)
2276 flush_block(zs, true);
2277 if(zs.avail_out == 0)
2278 return finish_started;
2283 flush_block(zs, false);
2284 if(zs.avail_out == 0)