inflate.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package flate implements the DEFLATE compressed data format, described in
     6  // RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
     7  // formats.
     8  package flate
     9  
    10  import (
    11  	"bufio"
    12  	"io"
    13  	"math/bits"
    14  	"strconv"
    15  	"sync"
    16  )
    17  
    18  const (
    19  	maxCodeLen = 16 // max length of Huffman code
    20  	// The next three numbers come from the RFC section 3.2.7, with the
    21  	// additional proviso in section 3.2.5 which implies that distance codes
    22  	// 30 and 31 should never occur in compressed data.
    23  	maxNumLit  = 286
    24  	maxNumDist = 30
    25  	numCodes   = 19 // number of codes in Huffman meta-code
    26  )
    27  
    28  // Initialize the fixedHuffmanDecoder only once upon first use.
    29  var fixedOnce sync.Once
    30  var fixedHuffmanDecoder huffmanDecoder
    31  
    32  // A CorruptInputError reports the presence of corrupt input at a given offset.
    33  type CorruptInputError int64
    34  
    35  func (e CorruptInputError) Error() string {
    36  	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
    37  }
    38  
    39  // An InternalError reports an error in the flate code itself.
    40  type InternalError string
    41  
    42  func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
    43  
    44  // A ReadError reports an error encountered while reading input.
    45  //
    46  // Deprecated: No longer returned.
    47  type ReadError struct {
    48  	Offset int64 // byte offset where error occurred
    49  	Err    error // error returned by underlying Read
    50  }
    51  
    52  func (e *ReadError) Error() string {
    53  	return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
    54  }
    55  
    56  // A WriteError reports an error encountered while writing output.
    57  //
    58  // Deprecated: No longer returned.
    59  type WriteError struct {
    60  	Offset int64 // byte offset where error occurred
    61  	Err    error // error returned by underlying Write
    62  }
    63  
    64  func (e *WriteError) Error() string {
    65  	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
    66  }
    67  
    68  // Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
    69  // to switch to a new underlying [Reader]. This permits reusing a ReadCloser
    70  // instead of allocating a new one.
    71  type Resetter interface {
    72  	// Reset discards any buffered data and resets the Resetter as if it was
    73  	// newly initialized with the given reader.
    74  	Reset(r io.Reader, dict []byte) error
    75  }
    76  
    77  // The data structure for decoding Huffman tables is based on that of
    78  // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
    79  // For codes smaller than the table width, there are multiple entries
    80  // (each combination of trailing bits has the same value). For codes
    81  // larger than the table width, the table contains a link to an overflow
    82  // table. The width of each entry in the link table is the maximum code
    83  // size minus the chunk width.
    84  //
    85  // Note that you can do a lookup in the table even without all bits
    86  // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
    87  // have the property that shorter codes come before longer ones, the
    88  // bit length estimate in the result is a lower bound on the actual
    89  // number of bits.
    90  //
    91  // See the following:
    92  //	https://github.com/madler/zlib/raw/master/doc/algorithm.txt
    93  
    94  // chunk & 15 is number of bits
    95  // chunk >> 4 is value, including table link
    96  
    97  const (
    98  	huffmanChunkBits  = 9
    99  	huffmanNumChunks  = 1 << huffmanChunkBits
   100  	huffmanCountMask  = 15
   101  	huffmanValueShift = 4
   102  )
   103  
   104  type huffmanDecoder struct {
   105  	min      int                      // the minimum code length
   106  	chunks   [huffmanNumChunks]uint32 // chunks as described above
   107  	links    [][]uint32               // overflow links
   108  	linkMask uint32                   // mask the width of the link table
   109  }
   110  
   111  // Initialize Huffman decoding tables from array of code lengths.
   112  // Following this function, h is guaranteed to be initialized into a complete
   113  // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
   114  // degenerate case where the tree has only a single symbol with length 1. Empty
   115  // trees are permitted.
   116  func (h *huffmanDecoder) init(lengths []int) bool {
   117  	// Sanity enables additional runtime tests during Huffman
   118  	// table construction. It's intended to be used during
   119  	// development to supplement the currently ad-hoc unit tests.
   120  	const sanity = false
   121  
   122  	if h.min != 0 {
   123  		*h = huffmanDecoder{}
   124  	}
   125  
   126  	// Count number of codes of each length,
   127  	// compute min and max length.
   128  	var count [maxCodeLen]int
   129  	var min, max int
   130  	for _, n := range lengths {
   131  		if n == 0 {
   132  			continue
   133  		}
   134  		if min == 0 || n < min {
   135  			min = n
   136  		}
   137  		if n > max {
   138  			max = n
   139  		}
   140  		count[n]++
   141  	}
   142  
   143  	// Empty tree. The decompressor.huffSym function will fail later if the tree
   144  	// is used. Technically, an empty tree is only valid for the HDIST tree and
   145  	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
   146  	// is guaranteed to fail since it will attempt to use the tree to decode the
   147  	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
   148  	// guaranteed to fail later since the compressed data section must be
   149  	// composed of at least one symbol (the end-of-block marker).
   150  	if max == 0 {
   151  		return true
   152  	}
   153  
   154  	code := 0
   155  	var nextcode [maxCodeLen]int
   156  	for i := min; i <= max; i++ {
   157  		code <<= 1
   158  		nextcode[i] = code
   159  		code += count[i]
   160  	}
   161  
   162  	// Check that the coding is complete (i.e., that we've
   163  	// assigned all 2-to-the-max possible bit sequences).
   164  	// Exception: To be compatible with zlib, we also need to
   165  	// accept degenerate single-code codings. See also
   166  	// TestDegenerateHuffmanCoding.
   167  	if code != 1<<uint(max) && !(code == 1 && max == 1) {
   168  		return false
   169  	}
   170  
   171  	h.min = min
   172  	if max > huffmanChunkBits {
   173  		numLinks := 1 << (uint(max) - huffmanChunkBits)
   174  		h.linkMask = uint32(numLinks - 1)
   175  
   176  		// create link tables
   177  		link := nextcode[huffmanChunkBits+1] >> 1
   178  		h.links = make([][]uint32, huffmanNumChunks-link)
   179  		for j := uint(link); j < huffmanNumChunks; j++ {
   180  			reverse := int(bits.Reverse16(uint16(j)))
   181  			reverse >>= uint(16 - huffmanChunkBits)
   182  			off := j - uint(link)
   183  			if sanity && h.chunks[reverse] != 0 {
   184  				panic("impossible: overwriting existing chunk")
   185  			}
   186  			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
   187  			h.links[off] = make([]uint32, numLinks)
   188  		}
   189  	}
   190  
   191  	for i, n := range lengths {
   192  		if n == 0 {
   193  			continue
   194  		}
   195  		code := nextcode[n]
   196  		nextcode[n]++
   197  		chunk := uint32(i<<huffmanValueShift | n)
   198  		reverse := int(bits.Reverse16(uint16(code)))
   199  		reverse >>= uint(16 - n)
   200  		if n <= huffmanChunkBits {
   201  			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
   202  				// We should never need to overwrite
   203  				// an existing chunk. Also, 0 is
   204  				// never a valid chunk, because the
   205  				// lower 4 "count" bits should be
   206  				// between 1 and 15.
   207  				if sanity && h.chunks[off] != 0 {
   208  					panic("impossible: overwriting existing chunk")
   209  				}
   210  				h.chunks[off] = chunk
   211  			}
   212  		} else {
   213  			j := reverse & (huffmanNumChunks - 1)
   214  			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
   215  				// Longer codes should have been
   216  				// associated with a link table above.
   217  				panic("impossible: not an indirect chunk")
   218  			}
   219  			value := h.chunks[j] >> huffmanValueShift
   220  			linktab := h.links[value]
   221  			reverse >>= huffmanChunkBits
   222  			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
   223  				if sanity && linktab[off] != 0 {
   224  					panic("impossible: overwriting existing chunk")
   225  				}
   226  				linktab[off] = chunk
   227  			}
   228  		}
   229  	}
   230  
   231  	if sanity {
   232  		// Above we've sanity checked that we never overwrote
   233  		// an existing entry. Here we additionally check that
   234  		// we filled the tables completely.
   235  		for i, chunk := range h.chunks {
   236  			if chunk == 0 {
   237  				// As an exception, in the degenerate
   238  				// single-code case, we allow odd
   239  				// chunks to be missing.
   240  				if code == 1 && i%2 == 1 {
   241  					continue
   242  				}
   243  				panic("impossible: missing chunk")
   244  			}
   245  		}
   246  		for _, linktab := range h.links {
   247  			for _, chunk := range linktab {
   248  				if chunk == 0 {
   249  					panic("impossible: missing chunk")
   250  				}
   251  			}
   252  		}
   253  	}
   254  
   255  	return true
   256  }
   257  
   258  // The actual read interface needed by [NewReader].
   259  // If the passed in io.Reader does not also have ReadByte,
   260  // the [NewReader] will introduce its own buffering.
   261  type Reader interface {
   262  	io.Reader
   263  	io.ByteReader
   264  }
   265  
   266  // Decompress state.
   267  type decompressor struct {
   268  	// Input source.
   269  	r       Reader
   270  	rBuf    *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
   271  	roffset int64
   272  
   273  	// Input bits, in top of b.
   274  	b  uint32
   275  	nb uint
   276  
   277  	// Huffman decoders for literal/length, distance.
   278  	h1, h2 huffmanDecoder
   279  
   280  	// Length arrays used to define Huffman codes.
   281  	bits     *[maxNumLit + maxNumDist]int
   282  	codebits *[numCodes]int
   283  
   284  	// Output history, buffer.
   285  	dict dictDecoder
   286  
   287  	// Temporary buffer (avoids repeated allocation).
   288  	buf [4]byte
   289  
   290  	// Next step in the decompression,
   291  	// and decompression state.
   292  	step      func(*decompressor)
   293  	stepState int
   294  	final     bool
   295  	err       error
   296  	toRead    []byte
   297  	hl, hd    *huffmanDecoder
   298  	copyLen   int
   299  	copyDist  int
   300  }
   301  
   302  func (f *decompressor) nextBlock() {
   303  	for f.nb < 1+2 {
   304  		if f.err = f.moreBits(); f.err != nil {
   305  			return
   306  		}
   307  	}
   308  	f.final = f.b&1 == 1
   309  	f.b >>= 1
   310  	typ := f.b & 3
   311  	f.b >>= 2
   312  	f.nb -= 1 + 2
   313  	switch typ {
   314  	case 0:
   315  		f.dataBlock()
   316  	case 1:
   317  		// compressed, fixed Huffman tables
   318  		f.hl = &fixedHuffmanDecoder
   319  		f.hd = nil
   320  		f.huffmanBlock()
   321  	case 2:
   322  		// compressed, dynamic Huffman tables
   323  		if f.err = f.readHuffman(); f.err != nil {
   324  			break
   325  		}
   326  		f.hl = &f.h1
   327  		f.hd = &f.h2
   328  		f.huffmanBlock()
   329  	default:
   330  		// 3 is reserved.
   331  		f.err = CorruptInputError(f.roffset)
   332  	}
   333  }
   334  
   335  func (f *decompressor) Read(b []byte) (int, error) {
   336  	for {
   337  		if len(f.toRead) > 0 {
   338  			n := copy(b, f.toRead)
   339  			f.toRead = f.toRead[n:]
   340  			if len(f.toRead) == 0 {
   341  				return n, f.err
   342  			}
   343  			return n, nil
   344  		}
   345  		if f.err != nil {
   346  			return 0, f.err
   347  		}
   348  		f.step(f)
   349  		if f.err != nil && len(f.toRead) == 0 {
   350  			f.toRead = f.dict.readFlush() // Flush what's left in case of error
   351  		}
   352  	}
   353  }
   354  
   355  func (f *decompressor) Close() error {
   356  	if f.err == io.EOF {
   357  		return nil
   358  	}
   359  	return f.err
   360  }
   361  
   362  // RFC 1951 section 3.2.7.
   363  // Compression with dynamic Huffman codes
   364  
   365  var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
   366  
   367  func (f *decompressor) readHuffman() error {
   368  	// HLIT[5], HDIST[5], HCLEN[4].
   369  	for f.nb < 5+5+4 {
   370  		if err := f.moreBits(); err != nil {
   371  			return err
   372  		}
   373  	}
   374  	nlit := int(f.b&0x1F) + 257
   375  	if nlit > maxNumLit {
   376  		return CorruptInputError(f.roffset)
   377  	}
   378  	f.b >>= 5
   379  	ndist := int(f.b&0x1F) + 1
   380  	if ndist > maxNumDist {
   381  		return CorruptInputError(f.roffset)
   382  	}
   383  	f.b >>= 5
   384  	nclen := int(f.b&0xF) + 4
   385  	// numCodes is 19, so nclen is always valid.
   386  	f.b >>= 4
   387  	f.nb -= 5 + 5 + 4
   388  
   389  	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
   390  	for i := 0; i < nclen; i++ {
   391  		for f.nb < 3 {
   392  			if err := f.moreBits(); err != nil {
   393  				return err
   394  			}
   395  		}
   396  		f.codebits[codeOrder[i]] = int(f.b & 0x7)
   397  		f.b >>= 3
   398  		f.nb -= 3
   399  	}
   400  	for i := nclen; i < len(codeOrder); i++ {
   401  		f.codebits[codeOrder[i]] = 0
   402  	}
   403  	if !f.h1.init(f.codebits[0:]) {
   404  		return CorruptInputError(f.roffset)
   405  	}
   406  
   407  	// HLIT + 257 code lengths, HDIST + 1 code lengths,
   408  	// using the code length Huffman code.
   409  	for i, n := 0, nlit+ndist; i < n; {
   410  		x, err := f.huffSym(&f.h1)
   411  		if err != nil {
   412  			return err
   413  		}
   414  		if x < 16 {
   415  			// Actual length.
   416  			f.bits[i] = x
   417  			i++
   418  			continue
   419  		}
   420  		// Repeat previous length or zero.
   421  		var rep int
   422  		var nb uint
   423  		var b int
   424  		switch x {
   425  		default:
   426  			return InternalError("unexpected length code")
   427  		case 16:
   428  			rep = 3
   429  			nb = 2
   430  			if i == 0 {
   431  				return CorruptInputError(f.roffset)
   432  			}
   433  			b = f.bits[i-1]
   434  		case 17:
   435  			rep = 3
   436  			nb = 3
   437  			b = 0
   438  		case 18:
   439  			rep = 11
   440  			nb = 7
   441  			b = 0
   442  		}
   443  		for f.nb < nb {
   444  			if err := f.moreBits(); err != nil {
   445  				return err
   446  			}
   447  		}
   448  		rep += int(f.b & uint32(1<<nb-1))
   449  		f.b >>= nb
   450  		f.nb -= nb
   451  		if i+rep > n {
   452  			return CorruptInputError(f.roffset)
   453  		}
   454  		for j := 0; j < rep; j++ {
   455  			f.bits[i] = b
   456  			i++
   457  		}
   458  	}
   459  
   460  	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
   461  		return CorruptInputError(f.roffset)
   462  	}
   463  
   464  	// As an optimization, we can initialize the min bits to read at a time
   465  	// for the HLIT tree to the length of the EOB marker since we know that
   466  	// every block must terminate with one. This preserves the property that
   467  	// we never read any extra bytes after the end of the DEFLATE stream.
   468  	if f.h1.min < f.bits[endBlockMarker] {
   469  		f.h1.min = f.bits[endBlockMarker]
   470  	}
   471  
   472  	return nil
   473  }
   474  
   475  // Decode a single Huffman block from f.
   476  // hl and hd are the Huffman states for the lit/length values
   477  // and the distance values, respectively. If hd == nil, using the
   478  // fixed distance encoding associated with fixed Huffman blocks.
   479  func (f *decompressor) huffmanBlock() {
   480  	const (
   481  		stateInit = iota // Zero value must be stateInit
   482  		stateDict
   483  	)
   484  
   485  	switch f.stepState {
   486  	case stateInit:
   487  		goto readLiteral
   488  	case stateDict:
   489  		goto copyHistory
   490  	}
   491  
   492  readLiteral:
   493  	// Read literal and/or (length, distance) according to RFC section 3.2.3.
   494  	{
   495  		v, err := f.huffSym(f.hl)
   496  		if err != nil {
   497  			f.err = err
   498  			return
   499  		}
   500  		var n uint // number of bits extra
   501  		var length int
   502  		switch {
   503  		case v < 256:
   504  			f.dict.writeByte(byte(v))
   505  			if f.dict.availWrite() == 0 {
   506  				f.toRead = f.dict.readFlush()
   507  				f.step = (*decompressor).huffmanBlock
   508  				f.stepState = stateInit
   509  				return
   510  			}
   511  			goto readLiteral
   512  		case v == 256:
   513  			f.finishBlock()
   514  			return
   515  		// otherwise, reference to older data
   516  		case v < 265:
   517  			length = v - (257 - 3)
   518  			n = 0
   519  		case v < 269:
   520  			length = v*2 - (265*2 - 11)
   521  			n = 1
   522  		case v < 273:
   523  			length = v*4 - (269*4 - 19)
   524  			n = 2
   525  		case v < 277:
   526  			length = v*8 - (273*8 - 35)
   527  			n = 3
   528  		case v < 281:
   529  			length = v*16 - (277*16 - 67)
   530  			n = 4
   531  		case v < 285:
   532  			length = v*32 - (281*32 - 131)
   533  			n = 5
   534  		case v < maxNumLit:
   535  			length = 258
   536  			n = 0
   537  		default:
   538  			f.err = CorruptInputError(f.roffset)
   539  			return
   540  		}
   541  		if n > 0 {
   542  			for f.nb < n {
   543  				if err = f.moreBits(); err != nil {
   544  					f.err = err
   545  					return
   546  				}
   547  			}
   548  			length += int(f.b & uint32(1<<n-1))
   549  			f.b >>= n
   550  			f.nb -= n
   551  		}
   552  
   553  		var dist int
   554  		if f.hd == nil {
   555  			for f.nb < 5 {
   556  				if err = f.moreBits(); err != nil {
   557  					f.err = err
   558  					return
   559  				}
   560  			}
   561  			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
   562  			f.b >>= 5
   563  			f.nb -= 5
   564  		} else {
   565  			if dist, err = f.huffSym(f.hd); err != nil {
   566  				f.err = err
   567  				return
   568  			}
   569  		}
   570  
   571  		switch {
   572  		case dist < 4:
   573  			dist++
   574  		case dist < maxNumDist:
   575  			nb := uint(dist-2) >> 1
   576  			// have 1 bit in bottom of dist, need nb more.
   577  			extra := (dist & 1) << nb
   578  			for f.nb < nb {
   579  				if err = f.moreBits(); err != nil {
   580  					f.err = err
   581  					return
   582  				}
   583  			}
   584  			extra |= int(f.b & uint32(1<<nb-1))
   585  			f.b >>= nb
   586  			f.nb -= nb
   587  			dist = 1<<(nb+1) + 1 + extra
   588  		default:
   589  			f.err = CorruptInputError(f.roffset)
   590  			return
   591  		}
   592  
   593  		// No check on length; encoding can be prescient.
   594  		if dist > f.dict.histSize() {
   595  			f.err = CorruptInputError(f.roffset)
   596  			return
   597  		}
   598  
   599  		f.copyLen, f.copyDist = length, dist
   600  		goto copyHistory
   601  	}
   602  
   603  copyHistory:
   604  	// Perform a backwards copy according to RFC section 3.2.3.
   605  	{
   606  		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
   607  		if cnt == 0 {
   608  			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
   609  		}
   610  		f.copyLen -= cnt
   611  
   612  		if f.dict.availWrite() == 0 || f.copyLen > 0 {
   613  			f.toRead = f.dict.readFlush()
   614  			f.step = (*decompressor).huffmanBlock // We need to continue this work
   615  			f.stepState = stateDict
   616  			return
   617  		}
   618  		goto readLiteral
   619  	}
   620  }
   621  
   622  // Copy a single uncompressed data block from input to output.
   623  func (f *decompressor) dataBlock() {
   624  	// Uncompressed.
   625  	// Discard current half-byte.
   626  	f.nb = 0
   627  	f.b = 0
   628  
   629  	// Length then ones-complement of length.
   630  	nr, err := io.ReadFull(f.r, f.buf[0:4])
   631  	f.roffset += int64(nr)
   632  	if err != nil {
   633  		f.err = noEOF(err)
   634  		return
   635  	}
   636  	n := int(f.buf[0]) | int(f.buf[1])<<8
   637  	nn := int(f.buf[2]) | int(f.buf[3])<<8
   638  	if uint16(nn) != uint16(^n) {
   639  		f.err = CorruptInputError(f.roffset)
   640  		return
   641  	}
   642  
   643  	if n == 0 {
   644  		f.toRead = f.dict.readFlush()
   645  		f.finishBlock()
   646  		return
   647  	}
   648  
   649  	f.copyLen = n
   650  	f.copyData()
   651  }
   652  
   653  // copyData copies f.copyLen bytes from the underlying reader into f.hist.
   654  // It pauses for reads when f.hist is full.
   655  func (f *decompressor) copyData() {
   656  	buf := f.dict.writeSlice()
   657  	if len(buf) > f.copyLen {
   658  		buf = buf[:f.copyLen]
   659  	}
   660  
   661  	cnt, err := io.ReadFull(f.r, buf)
   662  	f.roffset += int64(cnt)
   663  	f.copyLen -= cnt
   664  	f.dict.writeMark(cnt)
   665  	if err != nil {
   666  		f.err = noEOF(err)
   667  		return
   668  	}
   669  
   670  	if f.dict.availWrite() == 0 || f.copyLen > 0 {
   671  		f.toRead = f.dict.readFlush()
   672  		f.step = (*decompressor).copyData
   673  		return
   674  	}
   675  	f.finishBlock()
   676  }
   677  
   678  func (f *decompressor) finishBlock() {
   679  	if f.final {
   680  		if f.dict.availRead() > 0 {
   681  			f.toRead = f.dict.readFlush()
   682  		}
   683  		f.err = io.EOF
   684  	}
   685  	f.step = (*decompressor).nextBlock
   686  }
   687  
   688  // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
   689  func noEOF(e error) error {
   690  	if e == io.EOF {
   691  		return io.ErrUnexpectedEOF
   692  	}
   693  	return e
   694  }
   695  
   696  func (f *decompressor) moreBits() error {
   697  	c, err := f.r.ReadByte()
   698  	if err != nil {
   699  		return noEOF(err)
   700  	}
   701  	f.roffset++
   702  	f.b |= uint32(c) << f.nb
   703  	f.nb += 8
   704  	return nil
   705  }
   706  
   707  // Read the next Huffman-encoded symbol from f according to h.
   708  func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
   709  	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
   710  	// with single element, huffSym must error on these two edge cases. In both
   711  	// cases, the chunks slice will be 0 for the invalid sequence, leading it
   712  	// satisfy the n == 0 check below.
   713  	n := uint(h.min)
   714  	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
   715  	// but is smart enough to keep local variables in registers, so use nb and b,
   716  	// inline call to moreBits and reassign b,nb back to f on return.
   717  	nb, b := f.nb, f.b
   718  	for {
   719  		for nb < n {
   720  			c, err := f.r.ReadByte()
   721  			if err != nil {
   722  				f.b = b
   723  				f.nb = nb
   724  				return 0, noEOF(err)
   725  			}
   726  			f.roffset++
   727  			b |= uint32(c) << (nb & 31)
   728  			nb += 8
   729  		}
   730  		chunk := h.chunks[b&(huffmanNumChunks-1)]
   731  		n = uint(chunk & huffmanCountMask)
   732  		if n > huffmanChunkBits {
   733  			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
   734  			n = uint(chunk & huffmanCountMask)
   735  		}
   736  		if n <= nb {
   737  			if n == 0 {
   738  				f.b = b
   739  				f.nb = nb
   740  				f.err = CorruptInputError(f.roffset)
   741  				return 0, f.err
   742  			}
   743  			f.b = b >> (n & 31)
   744  			f.nb = nb - n
   745  			return int(chunk >> huffmanValueShift), nil
   746  		}
   747  	}
   748  }
   749  
   750  func (f *decompressor) makeReader(r io.Reader) {
   751  	if rr, ok := r.(Reader); ok {
   752  		f.rBuf = nil
   753  		f.r = rr
   754  		return
   755  	}
   756  	// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
   757  	if f.rBuf != nil {
   758  		f.rBuf.Reset(r)
   759  	} else {
   760  		// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
   761  		f.rBuf = bufio.NewReader(r)
   762  	}
   763  	f.r = f.rBuf
   764  }
   765  
   766  func fixedHuffmanDecoderInit() {
   767  	fixedOnce.Do(func() {
   768  		// These come from the RFC section 3.2.6.
   769  		var bits [288]int
   770  		for i := 0; i < 144; i++ {
   771  			bits[i] = 8
   772  		}
   773  		for i := 144; i < 256; i++ {
   774  			bits[i] = 9
   775  		}
   776  		for i := 256; i < 280; i++ {
   777  			bits[i] = 7
   778  		}
   779  		for i := 280; i < 288; i++ {
   780  			bits[i] = 8
   781  		}
   782  		fixedHuffmanDecoder.init(bits[:])
   783  	})
   784  }
   785  
   786  func (f *decompressor) Reset(r io.Reader, dict []byte) error {
   787  	*f = decompressor{
   788  		rBuf:     f.rBuf,
   789  		bits:     f.bits,
   790  		codebits: f.codebits,
   791  		dict:     f.dict,
   792  		step:     (*decompressor).nextBlock,
   793  	}
   794  	f.makeReader(r)
   795  	f.dict.init(maxMatchOffset, dict)
   796  	return nil
   797  }
   798  
   799  // NewReader returns a new ReadCloser that can be used
   800  // to read the uncompressed version of r.
   801  // If r does not also implement [io.ByteReader],
   802  // the decompressor may read more data than necessary from r.
   803  // The reader returns [io.EOF] after the final block in the DEFLATE stream has
   804  // been encountered. Any trailing data after the final block is ignored.
   805  //
   806  // The [io.ReadCloser] returned by NewReader also implements [Resetter].
   807  func NewReader(r io.Reader) io.ReadCloser {
   808  	fixedHuffmanDecoderInit()
   809  
   810  	var f decompressor
   811  	f.makeReader(r)
   812  	f.bits = new([maxNumLit + maxNumDist]int)
   813  	f.codebits = new([numCodes]int)
   814  	f.step = (*decompressor).nextBlock
   815  	f.dict.init(maxMatchOffset, nil)
   816  	return &f
   817  }
   818  
   819  // NewReaderDict is like [NewReader] but initializes the reader
   820  // with a preset dictionary. The returned [Reader] behaves as if
   821  // the uncompressed data stream started with the given dictionary,
   822  // which has already been read. NewReaderDict is typically used
   823  // to read data compressed by NewWriterDict.
   824  //
   825  // The ReadCloser returned by NewReaderDict also implements [Resetter].
   826  func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
   827  	fixedHuffmanDecoderInit()
   828  
   829  	var f decompressor
   830  	f.makeReader(r)
   831  	f.bits = new([maxNumLit + maxNumDist]int)
   832  	f.codebits = new([numCodes]int)
   833  	f.step = (*decompressor).nextBlock
   834  	f.dict.init(maxMatchOffset, dict)
   835  	return &f
   836  }
   837
View as plain text