...
Run Format

Source file src/compress/lzw/writer.go

Documentation: compress/lzw

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package lzw
  
  import (
  	"bufio"
  	"errors"
  	"fmt"
  	"io"
  )
  
  // A writer is a buffered, flushable writer.
  type writer interface {
  	io.ByteWriter
  	Flush() error
  }
  
  // An errWriteCloser is an io.WriteCloser that always returns a given error.
  type errWriteCloser struct {
  	err error
  }
  
  func (e *errWriteCloser) Write([]byte) (int, error) {
  	return 0, e.err
  }
  
  func (e *errWriteCloser) Close() error {
  	return e.err
  }
  
  const (
  	// A code is a 12 bit value, stored as a uint32 when encoding to avoid
  	// type conversions when shifting bits.
  	maxCode     = 1<<12 - 1
  	invalidCode = 1<<32 - 1
  	// There are 1<<12 possible codes, which is an upper bound on the number of
  	// valid hash table entries at any given point in time. tableSize is 4x that.
  	tableSize = 4 * 1 << 12
  	tableMask = tableSize - 1
  	// A hash table entry is a uint32. Zero is an invalid entry since the
  	// lower 12 bits of a valid entry must be a non-literal code.
  	invalidEntry = 0
  )
  
  // encoder is LZW compressor.
  type encoder struct {
  	// w is the writer that compressed bytes are written to.
  	w writer
  	// order, write, bits, nBits and width are the state for
  	// converting a code stream into a byte stream.
  	order Order
  	write func(*encoder, uint32) error
  	bits  uint32
  	nBits uint
  	width uint
  	// litWidth is the width in bits of literal codes.
  	litWidth uint
  	// hi is the code implied by the next code emission.
  	// overflow is the code at which hi overflows the code width.
  	hi, overflow uint32
  	// savedCode is the accumulated code at the end of the most recent Write
  	// call. It is equal to invalidCode if there was no such call.
  	savedCode uint32
  	// err is the first error encountered during writing. Closing the encoder
  	// will make any future Write calls return errClosed
  	err error
  	// table is the hash table from 20-bit keys to 12-bit values. Each table
  	// entry contains key<<12|val and collisions resolve by linear probing.
  	// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
  	// The values are a 12-bit code.
  	table [tableSize]uint32
  }
  
  // writeLSB writes the code c for "Least Significant Bits first" data.
  func (e *encoder) writeLSB(c uint32) error {
  	e.bits |= c << e.nBits
  	e.nBits += e.width
  	for e.nBits >= 8 {
  		if err := e.w.WriteByte(uint8(e.bits)); err != nil {
  			return err
  		}
  		e.bits >>= 8
  		e.nBits -= 8
  	}
  	return nil
  }
  
  // writeMSB writes the code c for "Most Significant Bits first" data.
  func (e *encoder) writeMSB(c uint32) error {
  	e.bits |= c << (32 - e.width - e.nBits)
  	e.nBits += e.width
  	for e.nBits >= 8 {
  		if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil {
  			return err
  		}
  		e.bits <<= 8
  		e.nBits -= 8
  	}
  	return nil
  }
  
  // errOutOfCodes is an internal error that means that the encoder has run out
  // of unused codes and a clear code needs to be sent next.
  var errOutOfCodes = errors.New("lzw: out of codes")
  
  // incHi increments e.hi and checks for both overflow and running out of
  // unused codes. In the latter case, incHi sends a clear code, resets the
  // encoder state and returns errOutOfCodes.
  func (e *encoder) incHi() error {
  	e.hi++
  	if e.hi == e.overflow {
  		e.width++
  		e.overflow <<= 1
  	}
  	if e.hi == maxCode {
  		clear := uint32(1) << e.litWidth
  		if err := e.write(e, clear); err != nil {
  			return err
  		}
  		e.width = e.litWidth + 1
  		e.hi = clear + 1
  		e.overflow = clear << 1
  		for i := range e.table {
  			e.table[i] = invalidEntry
  		}
  		return errOutOfCodes
  	}
  	return nil
  }
  
  // Write writes a compressed representation of p to e's underlying writer.
  func (e *encoder) Write(p []byte) (n int, err error) {
  	if e.err != nil {
  		return 0, e.err
  	}
  	if len(p) == 0 {
  		return 0, nil
  	}
  	if maxLit := uint8(1<<e.litWidth - 1); maxLit != 0xff {
  		for _, x := range p {
  			if x > maxLit {
  				e.err = errors.New("lzw: input byte too large for the litWidth")
  				return 0, e.err
  			}
  		}
  	}
  	n = len(p)
  	code := e.savedCode
  	if code == invalidCode {
  		// The first code sent is always a literal code.
  		code, p = uint32(p[0]), p[1:]
  	}
  loop:
  	for _, x := range p {
  		literal := uint32(x)
  		key := code<<8 | literal
  		// If there is a hash table hit for this key then we continue the loop
  		// and do not emit a code yet.
  		hash := (key>>12 ^ key) & tableMask
  		for h, t := hash, e.table[hash]; t != invalidEntry; {
  			if key == t>>12 {
  				code = t & maxCode
  				continue loop
  			}
  			h = (h + 1) & tableMask
  			t = e.table[h]
  		}
  		// Otherwise, write the current code, and literal becomes the start of
  		// the next emitted code.
  		if e.err = e.write(e, code); e.err != nil {
  			return 0, e.err
  		}
  		code = literal
  		// Increment e.hi, the next implied code. If we run out of codes, reset
  		// the encoder state (including clearing the hash table) and continue.
  		if err1 := e.incHi(); err1 != nil {
  			if err1 == errOutOfCodes {
  				continue
  			}
  			e.err = err1
  			return 0, e.err
  		}
  		// Otherwise, insert key -> e.hi into the map that e.table represents.
  		for {
  			if e.table[hash] == invalidEntry {
  				e.table[hash] = (key << 12) | e.hi
  				break
  			}
  			hash = (hash + 1) & tableMask
  		}
  	}
  	e.savedCode = code
  	return n, nil
  }
  
  // Close closes the encoder, flushing any pending output. It does not close or
  // flush e's underlying writer.
  func (e *encoder) Close() error {
  	if e.err != nil {
  		if e.err == errClosed {
  			return nil
  		}
  		return e.err
  	}
  	// Make any future calls to Write return errClosed.
  	e.err = errClosed
  	// Write the savedCode if valid.
  	if e.savedCode != invalidCode {
  		if err := e.write(e, e.savedCode); err != nil {
  			return err
  		}
  		if err := e.incHi(); err != nil && err != errOutOfCodes {
  			return err
  		}
  	}
  	// Write the eof code.
  	eof := uint32(1)<<e.litWidth + 1
  	if err := e.write(e, eof); err != nil {
  		return err
  	}
  	// Write the final bits.
  	if e.nBits > 0 {
  		if e.order == MSB {
  			e.bits >>= 24
  		}
  		if err := e.w.WriteByte(uint8(e.bits)); err != nil {
  			return err
  		}
  	}
  	return e.w.Flush()
  }
  
  // NewWriter creates a new io.WriteCloser.
  // Writes to the returned io.WriteCloser are compressed and written to w.
  // It is the caller's responsibility to call Close on the WriteCloser when
  // finished writing.
  // The number of bits to use for literal codes, litWidth, must be in the
  // range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
  func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
  	var write func(*encoder, uint32) error
  	switch order {
  	case LSB:
  		write = (*encoder).writeLSB
  	case MSB:
  		write = (*encoder).writeMSB
  	default:
  		return &errWriteCloser{errors.New("lzw: unknown order")}
  	}
  	if litWidth < 2 || 8 < litWidth {
  		return &errWriteCloser{fmt.Errorf("lzw: litWidth %d out of range", litWidth)}
  	}
  	bw, ok := w.(writer)
  	if !ok {
  		bw = bufio.NewWriter(w)
  	}
  	lw := uint(litWidth)
  	return &encoder{
  		w:         bw,
  		order:     order,
  		write:     write,
  		width:     1 + lw,
  		litWidth:  lw,
  		hi:        1<<lw + 1,
  		overflow:  1 << (lw + 1),
  		savedCode: invalidCode,
  	}
  }
  

View as plain text