The Go Programming Language

Source file src/pkg/compress/flate/inflate.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package flate implements the DEFLATE compressed data format, described in
     6	// RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
     7	// formats.
     8	package flate
     9	
    10	import (
    11		"bufio"
    12		"io"
    13		"os"
    14		"strconv"
    15	)
    16	
    17	const (
    18		maxCodeLen = 16    // max length of Huffman code
    19		maxHist    = 32768 // max history required
    20		maxLit     = 286
    21		maxDist    = 32
    22		numCodes   = 19 // number of codes in Huffman meta-code
    23	)
    24	
    25	// A CorruptInputError reports the presence of corrupt input at a given offset.
    26	type CorruptInputError int64
    27	
    28	func (e CorruptInputError) String() string {
    29		return "flate: corrupt input before offset " + strconv.Itoa64(int64(e))
    30	}
    31	
    32	// An InternalError reports an error in the flate code itself.
    33	type InternalError string
    34	
    35	func (e InternalError) String() string { return "flate: internal error: " + string(e) }
    36	
    37	// A ReadError reports an error encountered while reading input.
    38	type ReadError struct {
    39		Offset int64    // byte offset where error occurred
    40		Error  os.Error // error returned by underlying Read
    41	}
    42	
    43	func (e *ReadError) String() string {
    44		return "flate: read error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String()
    45	}
    46	
    47	// A WriteError reports an error encountered while writing output.
    48	type WriteError struct {
    49		Offset int64    // byte offset where error occurred
    50		Error  os.Error // error returned by underlying Write
    51	}
    52	
    53	func (e *WriteError) String() string {
    54		return "flate: write error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String()
    55	}
    56	
    57	// Huffman decoder is based on
    58	// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
    59	// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
    60	type huffmanDecoder struct {
    61		// min, max code length
    62		min, max int
    63	
    64		// limit[i] = largest code word of length i
    65		// Given code v of length n,
    66		// need more bits if v > limit[n].
    67		limit [maxCodeLen + 1]int
    68	
    69		// base[i] = smallest code word of length i - seq number
    70		base [maxCodeLen + 1]int
    71	
    72		// codes[seq number] = output code.
    73		// Given code v of length n, value is
    74		// codes[v - base[n]].
    75		codes []int
    76	}
    77	
    78	// Initialize Huffman decoding tables from array of code lengths.
    79	func (h *huffmanDecoder) init(bits []int) bool {
    80		// Count number of codes of each length,
    81		// compute min and max length.
    82		var count [maxCodeLen + 1]int
    83		var min, max int
    84		for _, n := range bits {
    85			if n == 0 {
    86				continue
    87			}
    88			if min == 0 || n < min {
    89				min = n
    90			}
    91			if n > max {
    92				max = n
    93			}
    94			count[n]++
    95		}
    96		if max == 0 {
    97			return false
    98		}
    99	
   100		h.min = min
   101		h.max = max
   102	
   103		// For each code range, compute
   104		// nextcode (first code of that length),
   105		// limit (last code of that length), and
   106		// base (offset from first code to sequence number).
   107		code := 0
   108		seq := 0
   109		var nextcode [maxCodeLen]int
   110		for i := min; i <= max; i++ {
   111			n := count[i]
   112			nextcode[i] = code
   113			h.base[i] = code - seq
   114			code += n
   115			seq += n
   116			h.limit[i] = code - 1
   117			code <<= 1
   118		}
   119	
   120		// Make array mapping sequence numbers to codes.
   121		if len(h.codes) < len(bits) {
   122			h.codes = make([]int, len(bits))
   123		}
   124		for i, n := range bits {
   125			if n == 0 {
   126				continue
   127			}
   128			code := nextcode[n]
   129			nextcode[n]++
   130			seq := code - h.base[n]
   131			h.codes[seq] = i
   132		}
   133		return true
   134	}
   135	
   136	// Hard-coded Huffman tables for DEFLATE algorithm.
   137	// See RFC 1951, section 3.2.6.
   138	var fixedHuffmanDecoder = huffmanDecoder{
   139		7, 9,
   140		[maxCodeLen + 1]int{7: 23, 199, 511},
   141		[maxCodeLen + 1]int{7: 0, 24, 224},
   142		[]int{
   143			// length 7: 256-279
   144			256, 257, 258, 259, 260, 261, 262,
   145			263, 264, 265, 266, 267, 268, 269,
   146			270, 271, 272, 273, 274, 275, 276,
   147			277, 278, 279,
   148	
   149			// length 8: 0-143
   150			0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
   151			12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
   152			22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
   153			32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
   154			42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
   155			52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
   156			62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
   157			72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
   158			82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
   159			92, 93, 94, 95, 96, 97, 98, 99, 100,
   160			101, 102, 103, 104, 105, 106, 107, 108,
   161			109, 110, 111, 112, 113, 114, 115, 116,
   162			117, 118, 119, 120, 121, 122, 123, 124,
   163			125, 126, 127, 128, 129, 130, 131, 132,
   164			133, 134, 135, 136, 137, 138, 139, 140,
   165			141, 142, 143,
   166	
   167			// length 8: 280-287
   168			280, 281, 282, 283, 284, 285, 286, 287,
   169	
   170			// length 9: 144-255
   171			144, 145, 146, 147, 148, 149, 150, 151,
   172			152, 153, 154, 155, 156, 157, 158, 159,
   173			160, 161, 162, 163, 164, 165, 166, 167,
   174			168, 169, 170, 171, 172, 173, 174, 175,
   175			176, 177, 178, 179, 180, 181, 182, 183,
   176			184, 185, 186, 187, 188, 189, 190, 191,
   177			192, 193, 194, 195, 196, 197, 198, 199,
   178			200, 201, 202, 203, 204, 205, 206, 207,
   179			208, 209, 210, 211, 212, 213, 214, 215,
   180			216, 217, 218, 219, 220, 221, 222, 223,
   181			224, 225, 226, 227, 228, 229, 230, 231,
   182			232, 233, 234, 235, 236, 237, 238, 239,
   183			240, 241, 242, 243, 244, 245, 246, 247,
   184			248, 249, 250, 251, 252, 253, 254, 255,
   185		},
   186	}
   187	
   188	// The actual read interface needed by NewReader.
   189	// If the passed in io.Reader does not also have ReadByte,
   190	// the NewReader will introduce its own buffering.
   191	type Reader interface {
   192		io.Reader
   193		ReadByte() (c byte, err os.Error)
   194	}
   195	
   196	// Decompress state.
   197	type decompressor struct {
   198		// Input source.
   199		r       Reader
   200		roffset int64
   201		woffset int64
   202	
   203		// Input bits, in top of b.
   204		b  uint32
   205		nb uint
   206	
   207		// Huffman decoders for literal/length, distance.
   208		h1, h2 huffmanDecoder
   209	
   210		// Length arrays used to define Huffman codes.
   211		bits     [maxLit + maxDist]int
   212		codebits [numCodes]int
   213	
   214		// Output history, buffer.
   215		hist  [maxHist]byte
   216		hp    int  // current output position in buffer
   217		hw    int  // have written hist[0:hw] already
   218		hfull bool // buffer has filled at least once
   219	
   220		// Temporary buffer (avoids repeated allocation).
   221		buf [4]byte
   222	
   223		// Next step in the decompression,
   224		// and decompression state.
   225		step     func(*decompressor)
   226		final    bool
   227		err      os.Error
   228		toRead   []byte
   229		hl, hd   *huffmanDecoder
   230		copyLen  int
   231		copyDist int
   232	}
   233	
   234	func (f *decompressor) nextBlock() {
   235		if f.final {
   236			if f.hw != f.hp {
   237				f.flush((*decompressor).nextBlock)
   238				return
   239			}
   240			f.err = os.EOF
   241			return
   242		}
   243		for f.nb < 1+2 {
   244			if f.err = f.moreBits(); f.err != nil {
   245				return
   246			}
   247		}
   248		f.final = f.b&1 == 1
   249		f.b >>= 1
   250		typ := f.b & 3
   251		f.b >>= 2
   252		f.nb -= 1 + 2
   253		switch typ {
   254		case 0:
   255			f.dataBlock()
   256		case 1:
   257			// compressed, fixed Huffman tables
   258			f.hl = &fixedHuffmanDecoder
   259			f.hd = nil
   260			f.huffmanBlock()
   261		case 2:
   262			// compressed, dynamic Huffman tables
   263			if f.err = f.readHuffman(); f.err != nil {
   264				break
   265			}
   266			f.hl = &f.h1
   267			f.hd = &f.h2
   268			f.huffmanBlock()
   269		default:
   270			// 3 is reserved.
   271			f.err = CorruptInputError(f.roffset)
   272		}
   273	}
   274	
   275	func (f *decompressor) Read(b []byte) (int, os.Error) {
   276		for {
   277			if len(f.toRead) > 0 {
   278				n := copy(b, f.toRead)
   279				f.toRead = f.toRead[n:]
   280				return n, nil
   281			}
   282			if f.err != nil {
   283				return 0, f.err
   284			}
   285			f.step(f)
   286		}
   287		panic("unreachable")
   288	}
   289	
   290	func (f *decompressor) Close() os.Error {
   291		if f.err == os.EOF {
   292			return nil
   293		}
   294		return f.err
   295	}
   296	
   297	// RFC 1951 section 3.2.7.
   298	// Compression with dynamic Huffman codes
   299	
   300	var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
   301	
   302	func (f *decompressor) readHuffman() os.Error {
   303		// HLIT[5], HDIST[5], HCLEN[4].
   304		for f.nb < 5+5+4 {
   305			if err := f.moreBits(); err != nil {
   306				return err
   307			}
   308		}
   309		nlit := int(f.b&0x1F) + 257
   310		f.b >>= 5
   311		ndist := int(f.b&0x1F) + 1
   312		f.b >>= 5
   313		nclen := int(f.b&0xF) + 4
   314		f.b >>= 4
   315		f.nb -= 5 + 5 + 4
   316	
   317		// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
   318		for i := 0; i < nclen; i++ {
   319			for f.nb < 3 {
   320				if err := f.moreBits(); err != nil {
   321					return err
   322				}
   323			}
   324			f.codebits[codeOrder[i]] = int(f.b & 0x7)
   325			f.b >>= 3
   326			f.nb -= 3
   327		}
   328		for i := nclen; i < len(codeOrder); i++ {
   329			f.codebits[codeOrder[i]] = 0
   330		}
   331		if !f.h1.init(f.codebits[0:]) {
   332			return CorruptInputError(f.roffset)
   333		}
   334	
   335		// HLIT + 257 code lengths, HDIST + 1 code lengths,
   336		// using the code length Huffman code.
   337		for i, n := 0, nlit+ndist; i < n; {
   338			x, err := f.huffSym(&f.h1)
   339			if err != nil {
   340				return err
   341			}
   342			if x < 16 {
   343				// Actual length.
   344				f.bits[i] = x
   345				i++
   346				continue
   347			}
   348			// Repeat previous length or zero.
   349			var rep int
   350			var nb uint
   351			var b int
   352			switch x {
   353			default:
   354				return InternalError("unexpected length code")
   355			case 16:
   356				rep = 3
   357				nb = 2
   358				if i == 0 {
   359					return CorruptInputError(f.roffset)
   360				}
   361				b = f.bits[i-1]
   362			case 17:
   363				rep = 3
   364				nb = 3
   365				b = 0
   366			case 18:
   367				rep = 11
   368				nb = 7
   369				b = 0
   370			}
   371			for f.nb < nb {
   372				if err := f.moreBits(); err != nil {
   373					return err
   374				}
   375			}
   376			rep += int(f.b & uint32(1<<nb-1))
   377			f.b >>= nb
   378			f.nb -= nb
   379			if i+rep > n {
   380				return CorruptInputError(f.roffset)
   381			}
   382			for j := 0; j < rep; j++ {
   383				f.bits[i] = b
   384				i++
   385			}
   386		}
   387	
   388		if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
   389			return CorruptInputError(f.roffset)
   390		}
   391	
   392		return nil
   393	}
   394	
   395	// Decode a single Huffman block from f.
   396	// hl and hd are the Huffman states for the lit/length values
   397	// and the distance values, respectively.  If hd == nil, using the
   398	// fixed distance encoding associated with fixed Huffman blocks.
   399	func (f *decompressor) huffmanBlock() {
   400		for {
   401			v, err := f.huffSym(f.hl)
   402			if err != nil {
   403				f.err = err
   404				return
   405			}
   406			var n uint // number of bits extra
   407			var length int
   408			switch {
   409			case v < 256:
   410				f.hist[f.hp] = byte(v)
   411				f.hp++
   412				if f.hp == len(f.hist) {
   413					// After the flush, continue this loop.
   414					f.flush((*decompressor).huffmanBlock)
   415					return
   416				}
   417				continue
   418			case v == 256:
   419				// Done with huffman block; read next block.
   420				f.step = (*decompressor).nextBlock
   421				return
   422			// otherwise, reference to older data
   423			case v < 265:
   424				length = v - (257 - 3)
   425				n = 0
   426			case v < 269:
   427				length = v*2 - (265*2 - 11)
   428				n = 1
   429			case v < 273:
   430				length = v*4 - (269*4 - 19)
   431				n = 2
   432			case v < 277:
   433				length = v*8 - (273*8 - 35)
   434				n = 3
   435			case v < 281:
   436				length = v*16 - (277*16 - 67)
   437				n = 4
   438			case v < 285:
   439				length = v*32 - (281*32 - 131)
   440				n = 5
   441			default:
   442				length = 258
   443				n = 0
   444			}
   445			if n > 0 {
   446				for f.nb < n {
   447					if err = f.moreBits(); err != nil {
   448						f.err = err
   449						return
   450					}
   451				}
   452				length += int(f.b & uint32(1<<n-1))
   453				f.b >>= n
   454				f.nb -= n
   455			}
   456	
   457			var dist int
   458			if f.hd == nil {
   459				for f.nb < 5 {
   460					if err = f.moreBits(); err != nil {
   461						f.err = err
   462						return
   463					}
   464				}
   465				dist = int(reverseByte[(f.b&0x1F)<<3])
   466				f.b >>= 5
   467				f.nb -= 5
   468			} else {
   469				if dist, err = f.huffSym(f.hd); err != nil {
   470					f.err = err
   471					return
   472				}
   473			}
   474	
   475			switch {
   476			case dist < 4:
   477				dist++
   478			case dist >= 30:
   479				f.err = CorruptInputError(f.roffset)
   480				return
   481			default:
   482				nb := uint(dist-2) >> 1
   483				// have 1 bit in bottom of dist, need nb more.
   484				extra := (dist & 1) << nb
   485				for f.nb < nb {
   486					if err = f.moreBits(); err != nil {
   487						f.err = err
   488						return
   489					}
   490				}
   491				extra |= int(f.b & uint32(1<<nb-1))
   492				f.b >>= nb
   493				f.nb -= nb
   494				dist = 1<<(nb+1) + 1 + extra
   495			}
   496	
   497			// Copy history[-dist:-dist+length] into output.
   498			if dist > len(f.hist) {
   499				f.err = InternalError("bad history distance")
   500				return
   501			}
   502	
   503			// No check on length; encoding can be prescient.
   504			if !f.hfull && dist > f.hp {
   505				f.err = CorruptInputError(f.roffset)
   506				return
   507			}
   508	
   509			p := f.hp - dist
   510			if p < 0 {
   511				p += len(f.hist)
   512			}
   513			for i := 0; i < length; i++ {
   514				f.hist[f.hp] = f.hist[p]
   515				f.hp++
   516				p++
   517				if f.hp == len(f.hist) {
   518					// After flush continue copying out of history.
   519					f.copyLen = length - (i + 1)
   520					f.copyDist = dist
   521					f.flush((*decompressor).copyHuff)
   522					return
   523				}
   524				if p == len(f.hist) {
   525					p = 0
   526				}
   527			}
   528		}
   529		panic("unreached")
   530	}
   531	
   532	func (f *decompressor) copyHuff() {
   533		length := f.copyLen
   534		dist := f.copyDist
   535		p := f.hp - dist
   536		if p < 0 {
   537			p += len(f.hist)
   538		}
   539		for i := 0; i < length; i++ {
   540			f.hist[f.hp] = f.hist[p]
   541			f.hp++
   542			p++
   543			if f.hp == len(f.hist) {
   544				f.copyLen = length - (i + 1)
   545				f.flush((*decompressor).copyHuff)
   546				return
   547			}
   548			if p == len(f.hist) {
   549				p = 0
   550			}
   551		}
   552	
   553		// Continue processing Huffman block.
   554		f.huffmanBlock()
   555	}
   556	
   557	// Copy a single uncompressed data block from input to output.
   558	func (f *decompressor) dataBlock() {
   559		// Uncompressed.
   560		// Discard current half-byte.
   561		f.nb = 0
   562		f.b = 0
   563	
   564		// Length then ones-complement of length.
   565		nr, err := io.ReadFull(f.r, f.buf[0:4])
   566		f.roffset += int64(nr)
   567		if err != nil {
   568			f.err = &ReadError{f.roffset, err}
   569			return
   570		}
   571		n := int(f.buf[0]) | int(f.buf[1])<<8
   572		nn := int(f.buf[2]) | int(f.buf[3])<<8
   573		if uint16(nn) != uint16(^n) {
   574			f.err = CorruptInputError(f.roffset)
   575			return
   576		}
   577	
   578		if n == 0 {
   579			// 0-length block means sync
   580			f.flush((*decompressor).nextBlock)
   581			return
   582		}
   583	
   584		f.copyLen = n
   585		f.copyData()
   586	}
   587	
   588	func (f *decompressor) copyData() {
   589		// Read f.dataLen bytes into history,
   590		// pausing for reads as history fills.
   591		n := f.copyLen
   592		for n > 0 {
   593			m := len(f.hist) - f.hp
   594			if m > n {
   595				m = n
   596			}
   597			m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
   598			f.roffset += int64(m)
   599			if err != nil {
   600				f.err = &ReadError{f.roffset, err}
   601				return
   602			}
   603			n -= m
   604			f.hp += m
   605			if f.hp == len(f.hist) {
   606				f.copyLen = n
   607				f.flush((*decompressor).copyData)
   608				return
   609			}
   610		}
   611		f.step = (*decompressor).nextBlock
   612	}
   613	
   614	func (f *decompressor) setDict(dict []byte) {
   615		if len(dict) > len(f.hist) {
   616			// Will only remember the tail.
   617			dict = dict[len(dict)-len(f.hist):]
   618		}
   619	
   620		f.hp = copy(f.hist[:], dict)
   621		if f.hp == len(f.hist) {
   622			f.hp = 0
   623			f.hfull = true
   624		}
   625		f.hw = f.hp
   626	}
   627	
   628	func (f *decompressor) moreBits() os.Error {
   629		c, err := f.r.ReadByte()
   630		if err != nil {
   631			if err == os.EOF {
   632				err = io.ErrUnexpectedEOF
   633			}
   634			return err
   635		}
   636		f.roffset++
   637		f.b |= uint32(c) << f.nb
   638		f.nb += 8
   639		return nil
   640	}
   641	
   642	// Read the next Huffman-encoded symbol from f according to h.
   643	func (f *decompressor) huffSym(h *huffmanDecoder) (int, os.Error) {
   644		for n := uint(h.min); n <= uint(h.max); n++ {
   645			lim := h.limit[n]
   646			if lim == -1 {
   647				continue
   648			}
   649			for f.nb < n {
   650				if err := f.moreBits(); err != nil {
   651					return 0, err
   652				}
   653			}
   654			v := int(f.b & uint32(1<<n-1))
   655			v <<= 16 - n
   656			v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
   657			if v <= lim {
   658				f.b >>= n
   659				f.nb -= n
   660				return h.codes[v-h.base[n]], nil
   661			}
   662		}
   663		return 0, CorruptInputError(f.roffset)
   664	}
   665	
   666	// Flush any buffered output to the underlying writer.
   667	func (f *decompressor) flush(step func(*decompressor)) {
   668		f.toRead = f.hist[f.hw:f.hp]
   669		f.woffset += int64(f.hp - f.hw)
   670		f.hw = f.hp
   671		if f.hp == len(f.hist) {
   672			f.hp = 0
   673			f.hw = 0
   674			f.hfull = true
   675		}
   676		f.step = step
   677	}
   678	
   679	func makeReader(r io.Reader) Reader {
   680		if rr, ok := r.(Reader); ok {
   681			return rr
   682		}
   683		return bufio.NewReader(r)
   684	}
   685	
   686	// NewReader returns a new ReadCloser that can be used
   687	// to read the uncompressed version of r.  It is the caller's
   688	// responsibility to call Close on the ReadCloser when
   689	// finished reading.
   690	func NewReader(r io.Reader) io.ReadCloser {
   691		var f decompressor
   692		f.r = makeReader(r)
   693		f.step = (*decompressor).nextBlock
   694		return &f
   695	}
   696	
   697	// NewReaderDict is like NewReader but initializes the reader
   698	// with a preset dictionary.  The returned Reader behaves as if
   699	// the uncompressed data stream started with the given dictionary,
   700	// which has already been read.  NewReaderDict is typically used
   701	// to read data compressed by NewWriterDict.
   702	func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
   703		var f decompressor
   704		f.setDict(dict)
   705		f.r = makeReader(r)
   706		f.step = (*decompressor).nextBlock
   707		return &f
   708	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.