1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package flate implements the DEFLATE compressed data format, described in
6 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
17 maxCodeLen = 16 // max length of Huffman code
18 maxHist = 32768 // max history required
19 // The next three numbers come from the RFC, section 3.2.7.
22 numCodes = 19 // number of codes in Huffman meta-code
25 // A CorruptInputError reports the presence of corrupt input at a given offset.
26 type CorruptInputError int64
28 func (e CorruptInputError) Error() string {
29 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
32 // An InternalError reports an error in the flate code itself.
33 type InternalError string
35 func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
37 // A ReadError reports an error encountered while reading input.
38 type ReadError struct {
39 Offset int64 // byte offset where error occurred
40 Err error // error returned by underlying Read
43 func (e *ReadError) Error() string {
44 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
47 // A WriteError reports an error encountered while writing output.
48 type WriteError struct {
49 Offset int64 // byte offset where error occurred
50 Err error // error returned by underlying Write
53 func (e *WriteError) Error() string {
54 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
57 // Huffman decoder is based on
58 // J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
59 // Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
60 type huffmanDecoder struct {
61 // min, max code length
64 // limit[i] = largest code word of length i
65 // Given code v of length n,
66 // need more bits if v > limit[n].
67 limit [maxCodeLen + 1]int
69 // base[i] = smallest code word of length i - seq number
70 base [maxCodeLen + 1]int
72 // codes[seq number] = output code.
73 // Given code v of length n, value is
74 // codes[v - base[n]].
78 // Initialize Huffman decoding tables from array of code lengths.
79 func (h *huffmanDecoder) init(bits []int) bool {
80 // Count number of codes of each length,
81 // compute min and max length.
82 var count [maxCodeLen + 1]int
84 for _, n := range bits {
88 if min == 0 || n < min {
103 // For each code range, compute
104 // nextcode (first code of that length),
105 // limit (last code of that length), and
106 // base (offset from first code to sequence number).
109 var nextcode [maxCodeLen]int
110 for i := min; i <= max; i++ {
113 h.base[i] = code - seq
116 h.limit[i] = code - 1
120 // Make array mapping sequence numbers to codes.
121 if len(h.codes) < len(bits) {
122 h.codes = make([]int, len(bits))
124 for i, n := range bits {
130 seq := code - h.base[n]
136 // Hard-coded Huffman tables for DEFLATE algorithm.
137 // See RFC 1951, section 3.2.6.
138 var fixedHuffmanDecoder = huffmanDecoder{
140 [maxCodeLen + 1]int{7: 23, 199, 511},
141 [maxCodeLen + 1]int{7: 0, 24, 224},
144 256, 257, 258, 259, 260, 261, 262,
145 263, 264, 265, 266, 267, 268, 269,
146 270, 271, 272, 273, 274, 275, 276,
150 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
151 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
152 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
153 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
154 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
155 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
156 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
157 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
158 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
159 92, 93, 94, 95, 96, 97, 98, 99, 100,
160 101, 102, 103, 104, 105, 106, 107, 108,
161 109, 110, 111, 112, 113, 114, 115, 116,
162 117, 118, 119, 120, 121, 122, 123, 124,
163 125, 126, 127, 128, 129, 130, 131, 132,
164 133, 134, 135, 136, 137, 138, 139, 140,
168 280, 281, 282, 283, 284, 285, 286, 287,
171 144, 145, 146, 147, 148, 149, 150, 151,
172 152, 153, 154, 155, 156, 157, 158, 159,
173 160, 161, 162, 163, 164, 165, 166, 167,
174 168, 169, 170, 171, 172, 173, 174, 175,
175 176, 177, 178, 179, 180, 181, 182, 183,
176 184, 185, 186, 187, 188, 189, 190, 191,
177 192, 193, 194, 195, 196, 197, 198, 199,
178 200, 201, 202, 203, 204, 205, 206, 207,
179 208, 209, 210, 211, 212, 213, 214, 215,
180 216, 217, 218, 219, 220, 221, 222, 223,
181 224, 225, 226, 227, 228, 229, 230, 231,
182 232, 233, 234, 235, 236, 237, 238, 239,
183 240, 241, 242, 243, 244, 245, 246, 247,
184 248, 249, 250, 251, 252, 253, 254, 255,
188 // The actual read interface needed by NewReader.
189 // If the passed in io.Reader does not also have ReadByte,
190 // the NewReader will introduce its own buffering.
191 type Reader interface {
193 ReadByte() (c byte, err error)
197 type decompressor struct {
203 // Input bits, in top of b.
207 // Huffman decoders for literal/length, distance.
208 h1, h2 huffmanDecoder
210 // Length arrays used to define Huffman codes.
211 bits [maxLit + maxDist]int
212 codebits [numCodes]int
214 // Output history, buffer.
216 hp int // current output position in buffer
217 hw int // have written hist[0:hw] already
218 hfull bool // buffer has filled at least once
220 // Temporary buffer (avoids repeated allocation).
223 // Next step in the decompression,
224 // and decompression state.
225 step func(*decompressor)
229 hl, hd *huffmanDecoder
234 func (f *decompressor) nextBlock() {
237 f.flush((*decompressor).nextBlock)
244 if f.err = f.moreBits(); f.err != nil {
257 // compressed, fixed Huffman tables
258 f.hl = &fixedHuffmanDecoder
262 // compressed, dynamic Huffman tables
263 if f.err = f.readHuffman(); f.err != nil {
271 f.err = CorruptInputError(f.roffset)
275 func (f *decompressor) Read(b []byte) (int, error) {
277 if len(f.toRead) > 0 {
278 n := copy(b, f.toRead)
279 f.toRead = f.toRead[n:]
290 func (f *decompressor) Close() error {
297 // RFC 1951 section 3.2.7.
298 // Compression with dynamic Huffman codes
300 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
302 func (f *decompressor) readHuffman() error {
303 // HLIT[5], HDIST[5], HCLEN[4].
305 if err := f.moreBits(); err != nil {
309 nlit := int(f.b&0x1F) + 257
311 return CorruptInputError(f.roffset)
314 ndist := int(f.b&0x1F) + 1
315 // maxDist is 32, so ndist is always valid.
317 nclen := int(f.b&0xF) + 4
318 // numCodes is 19, so nclen is always valid.
322 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
323 for i := 0; i < nclen; i++ {
325 if err := f.moreBits(); err != nil {
329 f.codebits[codeOrder[i]] = int(f.b & 0x7)
333 for i := nclen; i < len(codeOrder); i++ {
334 f.codebits[codeOrder[i]] = 0
336 if !f.h1.init(f.codebits[0:]) {
337 return CorruptInputError(f.roffset)
340 // HLIT + 257 code lengths, HDIST + 1 code lengths,
341 // using the code length Huffman code.
342 for i, n := 0, nlit+ndist; i < n; {
343 x, err := f.huffSym(&f.h1)
353 // Repeat previous length or zero.
359 return InternalError("unexpected length code")
364 return CorruptInputError(f.roffset)
377 if err := f.moreBits(); err != nil {
381 rep += int(f.b & uint32(1<<nb-1))
385 return CorruptInputError(f.roffset)
387 for j := 0; j < rep; j++ {
393 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
394 return CorruptInputError(f.roffset)
400 // Decode a single Huffman block from f.
401 // hl and hd are the Huffman states for the lit/length values
402 // and the distance values, respectively. If hd == nil, using the
403 // fixed distance encoding associated with fixed Huffman blocks.
404 func (f *decompressor) huffmanBlock() {
406 v, err := f.huffSym(f.hl)
411 var n uint // number of bits extra
415 f.hist[f.hp] = byte(v)
417 if f.hp == len(f.hist) {
418 // After the flush, continue this loop.
419 f.flush((*decompressor).huffmanBlock)
424 // Done with huffman block; read next block.
425 f.step = (*decompressor).nextBlock
427 // otherwise, reference to older data
429 length = v - (257 - 3)
432 length = v*2 - (265*2 - 11)
435 length = v*4 - (269*4 - 19)
438 length = v*8 - (273*8 - 35)
441 length = v*16 - (277*16 - 67)
444 length = v*32 - (281*32 - 131)
452 if err = f.moreBits(); err != nil {
457 length += int(f.b & uint32(1<<n-1))
465 if err = f.moreBits(); err != nil {
470 dist = int(reverseByte[(f.b&0x1F)<<3])
474 if dist, err = f.huffSym(f.hd); err != nil {
484 f.err = CorruptInputError(f.roffset)
487 nb := uint(dist-2) >> 1
488 // have 1 bit in bottom of dist, need nb more.
489 extra := (dist & 1) << nb
491 if err = f.moreBits(); err != nil {
496 extra |= int(f.b & uint32(1<<nb-1))
499 dist = 1<<(nb+1) + 1 + extra
502 // Copy history[-dist:-dist+length] into output.
503 if dist > len(f.hist) {
504 f.err = InternalError("bad history distance")
508 // No check on length; encoding can be prescient.
509 if !f.hfull && dist > f.hp {
510 f.err = CorruptInputError(f.roffset)
518 for i := 0; i < length; i++ {
519 f.hist[f.hp] = f.hist[p]
522 if f.hp == len(f.hist) {
523 // After flush continue copying out of history.
524 f.copyLen = length - (i + 1)
526 f.flush((*decompressor).copyHuff)
529 if p == len(f.hist) {
537 func (f *decompressor) copyHuff() {
544 for i := 0; i < length; i++ {
545 f.hist[f.hp] = f.hist[p]
548 if f.hp == len(f.hist) {
549 f.copyLen = length - (i + 1)
550 f.flush((*decompressor).copyHuff)
553 if p == len(f.hist) {
558 // Continue processing Huffman block.
562 // Copy a single uncompressed data block from input to output.
563 func (f *decompressor) dataBlock() {
565 // Discard current half-byte.
569 // Length then ones-complement of length.
570 nr, err := io.ReadFull(f.r, f.buf[0:4])
571 f.roffset += int64(nr)
573 f.err = &ReadError{f.roffset, err}
576 n := int(f.buf[0]) | int(f.buf[1])<<8
577 nn := int(f.buf[2]) | int(f.buf[3])<<8
578 if uint16(nn) != uint16(^n) {
579 f.err = CorruptInputError(f.roffset)
584 // 0-length block means sync
585 f.flush((*decompressor).nextBlock)
593 func (f *decompressor) copyData() {
594 // Read f.dataLen bytes into history,
595 // pausing for reads as history fills.
598 m := len(f.hist) - f.hp
602 m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
603 f.roffset += int64(m)
605 f.err = &ReadError{f.roffset, err}
610 if f.hp == len(f.hist) {
612 f.flush((*decompressor).copyData)
616 f.step = (*decompressor).nextBlock
619 func (f *decompressor) setDict(dict []byte) {
620 if len(dict) > len(f.hist) {
621 // Will only remember the tail.
622 dict = dict[len(dict)-len(f.hist):]
625 f.hp = copy(f.hist[:], dict)
626 if f.hp == len(f.hist) {
633 func (f *decompressor) moreBits() error {
634 c, err := f.r.ReadByte()
637 err = io.ErrUnexpectedEOF
642 f.b |= uint32(c) << f.nb
647 // Read the next Huffman-encoded symbol from f according to h.
648 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
649 for n := uint(h.min); n <= uint(h.max); n++ {
655 if err := f.moreBits(); err != nil {
659 v := int(f.b & uint32(1<<n-1))
661 v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
665 return h.codes[v-h.base[n]], nil
668 return 0, CorruptInputError(f.roffset)
671 // Flush any buffered output to the underlying writer.
672 func (f *decompressor) flush(step func(*decompressor)) {
673 f.toRead = f.hist[f.hw:f.hp]
674 f.woffset += int64(f.hp - f.hw)
676 if f.hp == len(f.hist) {
684 func makeReader(r io.Reader) Reader {
685 if rr, ok := r.(Reader); ok {
688 return bufio.NewReader(r)
691 // NewReader returns a new ReadCloser that can be used
692 // to read the uncompressed version of r. It is the caller's
693 // responsibility to call Close on the ReadCloser when
695 func NewReader(r io.Reader) io.ReadCloser {
698 f.step = (*decompressor).nextBlock
702 // NewReaderDict is like NewReader but initializes the reader
703 // with a preset dictionary. The returned Reader behaves as if
704 // the uncompressed data stream started with the given dictionary,
705 // which has already been read. NewReaderDict is typically used
706 // to read data compressed by NewWriterDict.
707 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
711 f.step = (*decompressor).nextBlock