The Go Programming Language

Source file src/pkg/compress/flate/deflate.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package flate
     6	
     7	import (
     8		"io"
     9		"math"
    10		"os"
    11	)
    12	
    13	const (
    14		NoCompression      = 0
    15		BestSpeed          = 1
    16		fastCompression    = 3
    17		BestCompression    = 9
    18		DefaultCompression = -1
    19		logWindowSize      = 15
    20		windowSize         = 1 << logWindowSize
    21		windowMask         = windowSize - 1
    22		logMaxOffsetSize   = 15  // Standard DEFLATE
    23		minMatchLength     = 3   // The smallest match that the compressor looks for
    24		maxMatchLength     = 258 // The longest match for the compressor
    25		minOffsetSize      = 1   // The shortest offset that makes any sence
    26	
    27		// The maximum number of tokens we put into a single flat block, just too
    28		// stop things from getting too large.
    29		maxFlateBlockTokens = 1 << 14
    30		maxStoreBlockSize   = 65535
    31		hashBits            = 15
    32		hashSize            = 1 << hashBits
    33		hashMask            = (1 << hashBits) - 1
    34		hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
    35	)
    36	
    37	type compressionLevel struct {
    38		good, lazy, nice, chain, fastSkipHashing int
    39	}
    40	
    41	var levels = []compressionLevel{
    42		{}, // 0
    43		// For levels 1-3 we don't bother trying with lazy matches
    44		{3, 0, 8, 4, 4},
    45		{3, 0, 16, 8, 5},
    46		{3, 0, 32, 32, 6},
    47		// Levels 4-9 use increasingly more lazy matching
    48		// and increasingly stringent conditions for "good enough".
    49		{4, 4, 16, 16, math.MaxInt32},
    50		{8, 16, 32, 32, math.MaxInt32},
    51		{8, 16, 128, 128, math.MaxInt32},
    52		{8, 32, 128, 256, math.MaxInt32},
    53		{32, 128, 258, 1024, math.MaxInt32},
    54		{32, 258, 258, 4096, math.MaxInt32},
    55	}
    56	
    57	type compressor struct {
    58		compressionLevel
    59	
    60		w *huffmanBitWriter
    61	
    62		// compression algorithm
    63		fill func(*compressor, []byte) int // copy data to window
    64		step func(*compressor)             // process window
    65		sync bool                          // requesting flush
    66	
    67		// Input hash chains
    68		// hashHead[hashValue] contains the largest inputIndex with the specified hash value
    69		// If hashHead[hashValue] is within the current window, then
    70		// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
    71		// with the same hash value.
    72		chainHead int
    73		hashHead  []int
    74		hashPrev  []int
    75	
    76		// input window: unprocessed data is window[index:windowEnd]
    77		index         int
    78		window        []byte
    79		windowEnd     int
    80		blockStart    int  // window index where current tokens start
    81		byteAvailable bool // if true, still need to process window[index-1].
    82	
    83		// queued output tokens: tokens[:ti]
    84		tokens []token
    85		ti     int
    86	
    87		// deflate state
    88		length         int
    89		offset         int
    90		hash           int
    91		maxInsertIndex int
    92		err            os.Error
    93	}
    94	
    95	func (d *compressor) fillDeflate(b []byte) int {
    96		if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
    97			// shift the window by windowSize
    98			copy(d.window, d.window[windowSize:2*windowSize])
    99			d.index -= windowSize
   100			d.windowEnd -= windowSize
   101			if d.blockStart >= windowSize {
   102				d.blockStart -= windowSize
   103			} else {
   104				d.blockStart = math.MaxInt32
   105			}
   106			for i, h := range d.hashHead {
   107				v := h - windowSize
   108				if v < -1 {
   109					v = -1
   110				}
   111				d.hashHead[i] = v
   112			}
   113			for i, h := range d.hashPrev {
   114				v := -h - windowSize
   115				if v < -1 {
   116					v = -1
   117				}
   118				d.hashPrev[i] = v
   119			}
   120		}
   121		n := copy(d.window[d.windowEnd:], b)
   122		d.windowEnd += n
   123		return n
   124	}
   125	
   126	func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
   127		if index > 0 || eof {
   128			var window []byte
   129			if d.blockStart <= index {
   130				window = d.window[d.blockStart:index]
   131			}
   132			d.blockStart = index
   133			d.w.writeBlock(tokens, eof, window)
   134			return d.w.err
   135		}
   136		return nil
   137	}
   138	
   139	// Try to find a match starting at index whose length is greater than prevSize.
   140	// We only look at chainCount possibilities before giving up.
   141	func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
   142		minMatchLook := maxMatchLength
   143		if lookahead < minMatchLook {
   144			minMatchLook = lookahead
   145		}
   146	
   147		win := d.window[0 : pos+minMatchLook]
   148	
   149		// We quit when we get a match that's at least nice long
   150		nice := len(win) - pos
   151		if d.nice < nice {
   152			nice = d.nice
   153		}
   154	
   155		// If we've got a match that's good enough, only look in 1/4 the chain.
   156		tries := d.chain
   157		length = prevLength
   158		if length >= d.good {
   159			tries >>= 2
   160		}
   161	
   162		w0 := win[pos]
   163		w1 := win[pos+1]
   164		wEnd := win[pos+length]
   165		minIndex := pos - windowSize
   166	
   167		for i := prevHead; tries > 0; tries-- {
   168			if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
   169				// The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
   170	
   171				n := 3
   172				for pos+n < len(win) && win[i+n] == win[pos+n] {
   173					n++
   174				}
   175				if n > length && (n > 3 || pos-i <= 4096) {
   176					length = n
   177					offset = pos - i
   178					ok = true
   179					if n >= nice {
   180						// The match is good enough that we don't try to find a better one.
   181						break
   182					}
   183					wEnd = win[pos+n]
   184				}
   185			}
   186			if i == minIndex {
   187				// hashPrev[i & windowMask] has already been overwritten, so stop now.
   188				break
   189			}
   190			if i = d.hashPrev[i&windowMask]; i < minIndex || i < 0 {
   191				break
   192			}
   193		}
   194		return
   195	}
   196	
   197	func (d *compressor) writeStoredBlock(buf []byte) os.Error {
   198		if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
   199			return d.w.err
   200		}
   201		d.w.writeBytes(buf)
   202		return d.w.err
   203	}
   204	
   205	func (d *compressor) initDeflate() {
   206		d.hashHead = make([]int, hashSize)
   207		d.hashPrev = make([]int, windowSize)
   208		d.window = make([]byte, 2*windowSize)
   209		fillInts(d.hashHead, -1)
   210		d.tokens = make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
   211		d.length = minMatchLength - 1
   212		d.offset = 0
   213		d.byteAvailable = false
   214		d.index = 0
   215		d.ti = 0
   216		d.hash = 0
   217		d.chainHead = -1
   218	}
   219	
   220	func (d *compressor) deflate() {
   221		if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
   222			return
   223		}
   224	
   225		d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
   226		if d.index < d.maxInsertIndex {
   227			d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
   228		}
   229	
   230	Loop:
   231		for {
   232			if d.index > d.windowEnd {
   233				panic("index > windowEnd")
   234			}
   235			lookahead := d.windowEnd - d.index
   236			if lookahead < minMatchLength+maxMatchLength {
   237				if !d.sync {
   238					break Loop
   239				}
   240				if d.index > d.windowEnd {
   241					panic("index > windowEnd")
   242				}
   243				if lookahead == 0 {
   244					// Flush current output block if any.
   245					if d.byteAvailable {
   246						// There is still one pending token that needs to be flushed
   247						d.tokens[d.ti] = literalToken(uint32(d.window[d.index-1]))
   248						d.ti++
   249						d.byteAvailable = false
   250					}
   251					if d.ti > 0 {
   252						if d.err = d.writeBlock(d.tokens[0:d.ti], d.index, false); d.err != nil {
   253							return
   254						}
   255						d.ti = 0
   256					}
   257					break Loop
   258				}
   259			}
   260			if d.index < d.maxInsertIndex {
   261				// Update the hash
   262				d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
   263				d.chainHead = d.hashHead[d.hash]
   264				d.hashPrev[d.index&windowMask] = d.chainHead
   265				d.hashHead[d.hash] = d.index
   266			}
   267			prevLength := d.length
   268			prevOffset := d.offset
   269			d.length = minMatchLength - 1
   270			d.offset = 0
   271			minIndex := d.index - windowSize
   272			if minIndex < 0 {
   273				minIndex = 0
   274			}
   275	
   276			if d.chainHead >= minIndex &&
   277				(d.fastSkipHashing != 0 && lookahead > minMatchLength-1 ||
   278					d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) {
   279				if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok {
   280					d.length = newLength
   281					d.offset = newOffset
   282				}
   283			}
   284			if d.fastSkipHashing != 0 && d.length >= minMatchLength ||
   285				d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength {
   286				// There was a match at the previous step, and the current match is
   287				// not better. Output the previous match.
   288				if d.fastSkipHashing != 0 {
   289					d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))
   290				} else {
   291					d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
   292				}
   293				d.ti++
   294				// Insert in the hash table all strings up to the end of the match.
   295				// index and index-1 are already inserted. If there is not enough
   296				// lookahead, the last two strings are not inserted into the hash
   297				// table.
   298				if d.length <= d.fastSkipHashing {
   299					var newIndex int
   300					if d.fastSkipHashing != 0 {
   301						newIndex = d.index + d.length
   302					} else {
   303						newIndex = prevLength - 1
   304					}
   305					for d.index++; d.index < newIndex; d.index++ {
   306						if d.index < d.maxInsertIndex {
   307							d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
   308							// Get previous value with the same hash.
   309							// Our chain should point to the previous value.
   310							d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
   311							// Set the head of the hash chain to us.
   312							d.hashHead[d.hash] = d.index
   313						}
   314					}
   315					if d.fastSkipHashing == 0 {
   316						d.byteAvailable = false
   317						d.length = minMatchLength - 1
   318					}
   319				} else {
   320					// For matches this long, we don't bother inserting each individual
   321					// item into the table.
   322					d.index += d.length
   323					d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
   324				}
   325				if d.ti == maxFlateBlockTokens {
   326					// The block includes the current character
   327					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
   328						return
   329					}
   330					d.ti = 0
   331				}
   332			} else {
   333				if d.fastSkipHashing != 0 || d.byteAvailable {
   334					i := d.index - 1
   335					if d.fastSkipHashing != 0 {
   336						i = d.index
   337					}
   338					d.tokens[d.ti] = literalToken(uint32(d.window[i]))
   339					d.ti++
   340					if d.ti == maxFlateBlockTokens {
   341						if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
   342							return
   343						}
   344						d.ti = 0
   345					}
   346				}
   347				d.index++
   348				if d.fastSkipHashing == 0 {
   349					d.byteAvailable = true
   350				}
   351			}
   352		}
   353	}
   354	
   355	func (d *compressor) fillStore(b []byte) int {
   356		n := copy(d.window[d.windowEnd:], b)
   357		d.windowEnd += n
   358		return n
   359	}
   360	
   361	func (d *compressor) store() {
   362		if d.windowEnd > 0 {
   363			d.err = d.writeStoredBlock(d.window[:d.windowEnd])
   364		}
   365		d.windowEnd = 0
   366	}
   367	
   368	func (d *compressor) write(b []byte) (n int, err os.Error) {
   369		n = len(b)
   370		b = b[d.fill(d, b):]
   371		for len(b) > 0 {
   372			d.step(d)
   373			b = b[d.fill(d, b):]
   374		}
   375		return n, d.err
   376	}
   377	
   378	func (d *compressor) syncFlush() os.Error {
   379		d.sync = true
   380		d.step(d)
   381		if d.err == nil {
   382			d.w.writeStoredHeader(0, false)
   383			d.w.flush()
   384			d.err = d.w.err
   385		}
   386		d.sync = false
   387		return d.err
   388	}
   389	
   390	func (d *compressor) init(w io.Writer, level int) (err os.Error) {
   391		d.w = newHuffmanBitWriter(w)
   392	
   393		switch {
   394		case level == NoCompression:
   395			d.window = make([]byte, maxStoreBlockSize)
   396			d.fill = (*compressor).fillStore
   397			d.step = (*compressor).store
   398		case level == DefaultCompression:
   399			level = 6
   400			fallthrough
   401		case 1 <= level && level <= 9:
   402			d.compressionLevel = levels[level]
   403			d.initDeflate()
   404			d.fill = (*compressor).fillDeflate
   405			d.step = (*compressor).deflate
   406		default:
   407			return WrongValueError{"level", 0, 9, int32(level)}
   408		}
   409		return nil
   410	}
   411	
   412	func (d *compressor) close() os.Error {
   413		d.sync = true
   414		d.step(d)
   415		if d.err != nil {
   416			return d.err
   417		}
   418		if d.w.writeStoredHeader(0, true); d.w.err != nil {
   419			return d.w.err
   420		}
   421		d.w.flush()
   422		return d.w.err
   423	}
   424	
   425	// NewWriter returns a new Writer compressing
   426	// data at the given level.  Following zlib, levels
   427	// range from 1 (BestSpeed) to 9 (BestCompression);
   428	// higher levels typically run slower but compress more.
   429	// Level 0 (NoCompression) does not attempt any
   430	// compression; it only adds the necessary DEFLATE framing.
   431	func NewWriter(w io.Writer, level int) *Writer {
   432		const logWindowSize = logMaxOffsetSize
   433		var dw Writer
   434		dw.d.init(w, level)
   435		return &dw
   436	}
   437	
   438	// NewWriterDict is like NewWriter but initializes the new
   439	// Writer with a preset dictionary.  The returned Writer behaves
   440	// as if the dictionary had been written to it without producing
   441	// any compressed output.  The compressed data written to w
   442	// can only be decompressed by a Reader initialized with the
   443	// same dictionary.
   444	func NewWriterDict(w io.Writer, level int, dict []byte) *Writer {
   445		dw := &dictWriter{w, false}
   446		zw := NewWriter(dw, level)
   447		zw.Write(dict)
   448		zw.Flush()
   449		dw.enabled = true
   450		return zw
   451	}
   452	
   453	type dictWriter struct {
   454		w       io.Writer
   455		enabled bool
   456	}
   457	
   458	func (w *dictWriter) Write(b []byte) (n int, err os.Error) {
   459		if w.enabled {
   460			return w.w.Write(b)
   461		}
   462		return len(b), nil
   463	}
   464	
   465	// A Writer takes data written to it and writes the compressed
   466	// form of that data to an underlying writer (see NewWriter).
   467	type Writer struct {
   468		d compressor
   469	}
   470	
   471	// Write writes data to w, which will eventually write the
   472	// compressed form of data to its underlying writer.
   473	func (w *Writer) Write(data []byte) (n int, err os.Error) {
   474		return w.d.write(data)
   475	}
   476	
   477	// Flush flushes any pending compressed data to the underlying writer.
   478	// It is useful mainly in compressed network protocols, to ensure that
   479	// a remote reader has enough data to reconstruct a packet.
   480	// Flush does not return until the data has been written.
   481	// If the underlying writer returns an error, Flush returns that error.
   482	//
   483	// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
   484	func (w *Writer) Flush() os.Error {
   485		// For more about flushing:
   486		// http://www.bolet.org/~pornin/deflate-flush.html
   487		return w.d.syncFlush()
   488	}
   489	
   490	// Close flushes and closes the writer.
   491	func (w *Writer) Close() os.Error {
   492		return w.d.close()
   493	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.