...
Run Format

Source file src/archive/zip/writer.go

Documentation: archive/zip

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package zip
     6  
     7  import (
     8  	"bufio"
     9  	"encoding/binary"
    10  	"errors"
    11  	"hash"
    12  	"hash/crc32"
    13  	"io"
    14  	"strings"
    15  	"unicode/utf8"
    16  )
    17  
    18  var (
    19  	errLongName  = errors.New("zip: FileHeader.Name too long")
    20  	errLongExtra = errors.New("zip: FileHeader.Extra too long")
    21  )
    22  
    23  // Writer implements a zip file writer.
    24  type Writer struct {
    25  	cw          *countWriter
    26  	dir         []*header
    27  	last        *fileWriter
    28  	closed      bool
    29  	compressors map[uint16]Compressor
    30  	comment     string
    31  
    32  	// testHookCloseSizeOffset if non-nil is called with the size
    33  	// of offset of the central directory at Close.
    34  	testHookCloseSizeOffset func(size, offset uint64)
    35  }
    36  
    37  type header struct {
    38  	*FileHeader
    39  	offset uint64
    40  }
    41  
    42  // NewWriter returns a new Writer writing a zip file to w.
    43  func NewWriter(w io.Writer) *Writer {
    44  	return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
    45  }
    46  
    47  // SetOffset sets the offset of the beginning of the zip data within the
    48  // underlying writer. It should be used when the zip data is appended to an
    49  // existing file, such as a binary executable.
    50  // It must be called before any data is written.
    51  func (w *Writer) SetOffset(n int64) {
    52  	if w.cw.count != 0 {
    53  		panic("zip: SetOffset called after data was written")
    54  	}
    55  	w.cw.count = n
    56  }
    57  
    58  // Flush flushes any buffered data to the underlying writer.
    59  // Calling Flush is not normally necessary; calling Close is sufficient.
    60  func (w *Writer) Flush() error {
    61  	return w.cw.w.(*bufio.Writer).Flush()
    62  }
    63  
    64  // SetComment sets the end-of-central-directory comment field.
    65  // It can only be called before Close.
    66  func (w *Writer) SetComment(comment string) error {
    67  	if len(comment) > uint16max {
    68  		return errors.New("zip: Writer.Comment too long")
    69  	}
    70  	w.comment = comment
    71  	return nil
    72  }
    73  
    74  // Close finishes writing the zip file by writing the central directory.
    75  // It does not close the underlying writer.
    76  func (w *Writer) Close() error {
    77  	if w.last != nil && !w.last.closed {
    78  		if err := w.last.close(); err != nil {
    79  			return err
    80  		}
    81  		w.last = nil
    82  	}
    83  	if w.closed {
    84  		return errors.New("zip: writer closed twice")
    85  	}
    86  	w.closed = true
    87  
    88  	// write central directory
    89  	start := w.cw.count
    90  	for _, h := range w.dir {
    91  		var buf [directoryHeaderLen]byte
    92  		b := writeBuf(buf[:])
    93  		b.uint32(uint32(directoryHeaderSignature))
    94  		b.uint16(h.CreatorVersion)
    95  		b.uint16(h.ReaderVersion)
    96  		b.uint16(h.Flags)
    97  		b.uint16(h.Method)
    98  		b.uint16(h.ModifiedTime)
    99  		b.uint16(h.ModifiedDate)
   100  		b.uint32(h.CRC32)
   101  		if h.isZip64() || h.offset >= uint32max {
   102  			// the file needs a zip64 header. store maxint in both
   103  			// 32 bit size fields (and offset later) to signal that the
   104  			// zip64 extra header should be used.
   105  			b.uint32(uint32max) // compressed size
   106  			b.uint32(uint32max) // uncompressed size
   107  
   108  			// append a zip64 extra block to Extra
   109  			var buf [28]byte // 2x uint16 + 3x uint64
   110  			eb := writeBuf(buf[:])
   111  			eb.uint16(zip64ExtraID)
   112  			eb.uint16(24) // size = 3x uint64
   113  			eb.uint64(h.UncompressedSize64)
   114  			eb.uint64(h.CompressedSize64)
   115  			eb.uint64(h.offset)
   116  			h.Extra = append(h.Extra, buf[:]...)
   117  		} else {
   118  			b.uint32(h.CompressedSize)
   119  			b.uint32(h.UncompressedSize)
   120  		}
   121  
   122  		b.uint16(uint16(len(h.Name)))
   123  		b.uint16(uint16(len(h.Extra)))
   124  		b.uint16(uint16(len(h.Comment)))
   125  		b = b[4:] // skip disk number start and internal file attr (2x uint16)
   126  		b.uint32(h.ExternalAttrs)
   127  		if h.offset > uint32max {
   128  			b.uint32(uint32max)
   129  		} else {
   130  			b.uint32(uint32(h.offset))
   131  		}
   132  		if _, err := w.cw.Write(buf[:]); err != nil {
   133  			return err
   134  		}
   135  		if _, err := io.WriteString(w.cw, h.Name); err != nil {
   136  			return err
   137  		}
   138  		if _, err := w.cw.Write(h.Extra); err != nil {
   139  			return err
   140  		}
   141  		if _, err := io.WriteString(w.cw, h.Comment); err != nil {
   142  			return err
   143  		}
   144  	}
   145  	end := w.cw.count
   146  
   147  	records := uint64(len(w.dir))
   148  	size := uint64(end - start)
   149  	offset := uint64(start)
   150  
   151  	if f := w.testHookCloseSizeOffset; f != nil {
   152  		f(size, offset)
   153  	}
   154  
   155  	if records >= uint16max || size >= uint32max || offset >= uint32max {
   156  		var buf [directory64EndLen + directory64LocLen]byte
   157  		b := writeBuf(buf[:])
   158  
   159  		// zip64 end of central directory record
   160  		b.uint32(directory64EndSignature)
   161  		b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
   162  		b.uint16(zipVersion45)           // version made by
   163  		b.uint16(zipVersion45)           // version needed to extract
   164  		b.uint32(0)                      // number of this disk
   165  		b.uint32(0)                      // number of the disk with the start of the central directory
   166  		b.uint64(records)                // total number of entries in the central directory on this disk
   167  		b.uint64(records)                // total number of entries in the central directory
   168  		b.uint64(size)                   // size of the central directory
   169  		b.uint64(offset)                 // offset of start of central directory with respect to the starting disk number
   170  
   171  		// zip64 end of central directory locator
   172  		b.uint32(directory64LocSignature)
   173  		b.uint32(0)           // number of the disk with the start of the zip64 end of central directory
   174  		b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
   175  		b.uint32(1)           // total number of disks
   176  
   177  		if _, err := w.cw.Write(buf[:]); err != nil {
   178  			return err
   179  		}
   180  
   181  		// store max values in the regular end record to signal that
   182  		// that the zip64 values should be used instead
   183  		records = uint16max
   184  		size = uint32max
   185  		offset = uint32max
   186  	}
   187  
   188  	// write end record
   189  	var buf [directoryEndLen]byte
   190  	b := writeBuf(buf[:])
   191  	b.uint32(uint32(directoryEndSignature))
   192  	b = b[4:]                        // skip over disk number and first disk number (2x uint16)
   193  	b.uint16(uint16(records))        // number of entries this disk
   194  	b.uint16(uint16(records))        // number of entries total
   195  	b.uint32(uint32(size))           // size of directory
   196  	b.uint32(uint32(offset))         // start of directory
   197  	b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
   198  	if _, err := w.cw.Write(buf[:]); err != nil {
   199  		return err
   200  	}
   201  	if _, err := io.WriteString(w.cw, w.comment); err != nil {
   202  		return err
   203  	}
   204  
   205  	return w.cw.w.(*bufio.Writer).Flush()
   206  }
   207  
   208  // Create adds a file to the zip file using the provided name.
   209  // It returns a Writer to which the file contents should be written.
   210  // The file contents will be compressed using the Deflate method.
   211  // The name must be a relative path: it must not start with a drive
   212  // letter (e.g. C:) or leading slash, and only forward slashes are
   213  // allowed. To create a directory instead of a file, add a trailing
   214  // slash to the name.
   215  // The file's contents must be written to the io.Writer before the next
   216  // call to Create, CreateHeader, or Close.
   217  func (w *Writer) Create(name string) (io.Writer, error) {
   218  	header := &FileHeader{
   219  		Name:   name,
   220  		Method: Deflate,
   221  	}
   222  	return w.CreateHeader(header)
   223  }
   224  
   225  // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
   226  // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
   227  // or any other common encoding).
   228  func detectUTF8(s string) (valid, require bool) {
   229  	for i := 0; i < len(s); {
   230  		r, size := utf8.DecodeRuneInString(s[i:])
   231  		i += size
   232  		// Officially, ZIP uses CP-437, but many readers use the system's
   233  		// local character encoding. Most encoding are compatible with a large
   234  		// subset of CP-437, which itself is ASCII-like.
   235  		//
   236  		// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
   237  		// characters with localized currency and overline characters.
   238  		if r < 0x20 || r > 0x7d || r == 0x5c {
   239  			if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
   240  				return false, false
   241  			}
   242  			require = true
   243  		}
   244  	}
   245  	return true, require
   246  }
   247  
   248  // CreateHeader adds a file to the zip archive using the provided FileHeader
   249  // for the file metadata. Writer takes ownership of fh and may mutate
   250  // its fields. The caller must not modify fh after calling CreateHeader.
   251  //
   252  // This returns a Writer to which the file contents should be written.
   253  // The file's contents must be written to the io.Writer before the next
   254  // call to Create, CreateHeader, or Close.
   255  func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
   256  	if w.last != nil && !w.last.closed {
   257  		if err := w.last.close(); err != nil {
   258  			return nil, err
   259  		}
   260  	}
   261  	if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
   262  		// See https://golang.org/issue/11144 confusion.
   263  		return nil, errors.New("archive/zip: invalid duplicate FileHeader")
   264  	}
   265  
   266  	// The ZIP format has a sad state of affairs regarding character encoding.
   267  	// Officially, the name and comment fields are supposed to be encoded
   268  	// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
   269  	// flag bit is set. However, there are several problems:
   270  	//
   271  	//	* Many ZIP readers still do not support UTF-8.
   272  	//	* If the UTF-8 flag is cleared, several readers simply interpret the
   273  	//	name and comment fields as whatever the local system encoding is.
   274  	//
   275  	// In order to avoid breaking readers without UTF-8 support,
   276  	// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
   277  	// However, if the strings require multibyte UTF-8 encoding and is a
   278  	// valid UTF-8 string, then we set the UTF-8 bit.
   279  	//
   280  	// For the case, where the user explicitly wants to specify the encoding
   281  	// as UTF-8, they will need to set the flag bit themselves.
   282  	utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
   283  	utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
   284  	switch {
   285  	case fh.NonUTF8:
   286  		fh.Flags &^= 0x800
   287  	case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
   288  		fh.Flags |= 0x800
   289  	}
   290  
   291  	fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
   292  	fh.ReaderVersion = zipVersion20
   293  
   294  	// If Modified is set, this takes precedence over MS-DOS timestamp fields.
   295  	if !fh.Modified.IsZero() {
   296  		// Contrary to the FileHeader.SetModTime method, we intentionally
   297  		// do not convert to UTC, because we assume the user intends to encode
   298  		// the date using the specified timezone. A user may want this control
   299  		// because many legacy ZIP readers interpret the timestamp according
   300  		// to the local timezone.
   301  		//
   302  		// The timezone is only non-UTC if a user directly sets the Modified
   303  		// field directly themselves. All other approaches sets UTC.
   304  		fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
   305  
   306  		// Use "extended timestamp" format since this is what Info-ZIP uses.
   307  		// Nearly every major ZIP implementation uses a different format,
   308  		// but at least most seem to be able to understand the other formats.
   309  		//
   310  		// This format happens to be identical for both local and central header
   311  		// if modification time is the only timestamp being encoded.
   312  		var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
   313  		mt := uint32(fh.Modified.Unix())
   314  		eb := writeBuf(mbuf[:])
   315  		eb.uint16(extTimeExtraID)
   316  		eb.uint16(5)  // Size: SizeOf(uint8) + SizeOf(uint32)
   317  		eb.uint8(1)   // Flags: ModTime
   318  		eb.uint32(mt) // ModTime
   319  		fh.Extra = append(fh.Extra, mbuf[:]...)
   320  	}
   321  
   322  	var (
   323  		ow io.Writer
   324  		fw *fileWriter
   325  	)
   326  	h := &header{
   327  		FileHeader: fh,
   328  		offset:     uint64(w.cw.count),
   329  	}
   330  
   331  	if strings.HasSuffix(fh.Name, "/") {
   332  		// Set the compression method to Store to ensure data length is truly zero,
   333  		// which the writeHeader method always encodes for the size fields.
   334  		// This is necessary as most compression formats have non-zero lengths
   335  		// even when compressing an empty string.
   336  		fh.Method = Store
   337  		fh.Flags &^= 0x8 // we will not write a data descriptor
   338  
   339  		// Explicitly clear sizes as they have no meaning for directories.
   340  		fh.CompressedSize = 0
   341  		fh.CompressedSize64 = 0
   342  		fh.UncompressedSize = 0
   343  		fh.UncompressedSize64 = 0
   344  
   345  		ow = dirWriter{}
   346  	} else {
   347  		fh.Flags |= 0x8 // we will write a data descriptor
   348  
   349  		fw = &fileWriter{
   350  			zipw:      w.cw,
   351  			compCount: &countWriter{w: w.cw},
   352  			crc32:     crc32.NewIEEE(),
   353  		}
   354  		comp := w.compressor(fh.Method)
   355  		if comp == nil {
   356  			return nil, ErrAlgorithm
   357  		}
   358  		var err error
   359  		fw.comp, err = comp(fw.compCount)
   360  		if err != nil {
   361  			return nil, err
   362  		}
   363  		fw.rawCount = &countWriter{w: fw.comp}
   364  		fw.header = h
   365  		ow = fw
   366  	}
   367  	w.dir = append(w.dir, h)
   368  	if err := writeHeader(w.cw, fh); err != nil {
   369  		return nil, err
   370  	}
   371  	// If we're creating a directory, fw is nil.
   372  	w.last = fw
   373  	return ow, nil
   374  }
   375  
   376  func writeHeader(w io.Writer, h *FileHeader) error {
   377  	const maxUint16 = 1<<16 - 1
   378  	if len(h.Name) > maxUint16 {
   379  		return errLongName
   380  	}
   381  	if len(h.Extra) > maxUint16 {
   382  		return errLongExtra
   383  	}
   384  
   385  	var buf [fileHeaderLen]byte
   386  	b := writeBuf(buf[:])
   387  	b.uint32(uint32(fileHeaderSignature))
   388  	b.uint16(h.ReaderVersion)
   389  	b.uint16(h.Flags)
   390  	b.uint16(h.Method)
   391  	b.uint16(h.ModifiedTime)
   392  	b.uint16(h.ModifiedDate)
   393  	b.uint32(0) // since we are writing a data descriptor crc32,
   394  	b.uint32(0) // compressed size,
   395  	b.uint32(0) // and uncompressed size should be zero
   396  	b.uint16(uint16(len(h.Name)))
   397  	b.uint16(uint16(len(h.Extra)))
   398  	if _, err := w.Write(buf[:]); err != nil {
   399  		return err
   400  	}
   401  	if _, err := io.WriteString(w, h.Name); err != nil {
   402  		return err
   403  	}
   404  	_, err := w.Write(h.Extra)
   405  	return err
   406  }
   407  
   408  // RegisterCompressor registers or overrides a custom compressor for a specific
   409  // method ID. If a compressor for a given method is not found, Writer will
   410  // default to looking up the compressor at the package level.
   411  func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
   412  	if w.compressors == nil {
   413  		w.compressors = make(map[uint16]Compressor)
   414  	}
   415  	w.compressors[method] = comp
   416  }
   417  
   418  func (w *Writer) compressor(method uint16) Compressor {
   419  	comp := w.compressors[method]
   420  	if comp == nil {
   421  		comp = compressor(method)
   422  	}
   423  	return comp
   424  }
   425  
   426  type dirWriter struct{}
   427  
   428  func (dirWriter) Write(b []byte) (int, error) {
   429  	if len(b) == 0 {
   430  		return 0, nil
   431  	}
   432  	return 0, errors.New("zip: write to directory")
   433  }
   434  
   435  type fileWriter struct {
   436  	*header
   437  	zipw      io.Writer
   438  	rawCount  *countWriter
   439  	comp      io.WriteCloser
   440  	compCount *countWriter
   441  	crc32     hash.Hash32
   442  	closed    bool
   443  }
   444  
   445  func (w *fileWriter) Write(p []byte) (int, error) {
   446  	if w.closed {
   447  		return 0, errors.New("zip: write to closed file")
   448  	}
   449  	w.crc32.Write(p)
   450  	return w.rawCount.Write(p)
   451  }
   452  
   453  func (w *fileWriter) close() error {
   454  	if w.closed {
   455  		return errors.New("zip: file closed twice")
   456  	}
   457  	w.closed = true
   458  	if err := w.comp.Close(); err != nil {
   459  		return err
   460  	}
   461  
   462  	// update FileHeader
   463  	fh := w.header.FileHeader
   464  	fh.CRC32 = w.crc32.Sum32()
   465  	fh.CompressedSize64 = uint64(w.compCount.count)
   466  	fh.UncompressedSize64 = uint64(w.rawCount.count)
   467  
   468  	if fh.isZip64() {
   469  		fh.CompressedSize = uint32max
   470  		fh.UncompressedSize = uint32max
   471  		fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
   472  	} else {
   473  		fh.CompressedSize = uint32(fh.CompressedSize64)
   474  		fh.UncompressedSize = uint32(fh.UncompressedSize64)
   475  	}
   476  
   477  	// Write data descriptor. This is more complicated than one would
   478  	// think, see e.g. comments in zipfile.c:putextended() and
   479  	// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
   480  	// The approach here is to write 8 byte sizes if needed without
   481  	// adding a zip64 extra in the local header (too late anyway).
   482  	var buf []byte
   483  	if fh.isZip64() {
   484  		buf = make([]byte, dataDescriptor64Len)
   485  	} else {
   486  		buf = make([]byte, dataDescriptorLen)
   487  	}
   488  	b := writeBuf(buf)
   489  	b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
   490  	b.uint32(fh.CRC32)
   491  	if fh.isZip64() {
   492  		b.uint64(fh.CompressedSize64)
   493  		b.uint64(fh.UncompressedSize64)
   494  	} else {
   495  		b.uint32(fh.CompressedSize)
   496  		b.uint32(fh.UncompressedSize)
   497  	}
   498  	_, err := w.zipw.Write(buf)
   499  	return err
   500  }
   501  
   502  type countWriter struct {
   503  	w     io.Writer
   504  	count int64
   505  }
   506  
   507  func (w *countWriter) Write(p []byte) (int, error) {
   508  	n, err := w.w.Write(p)
   509  	w.count += int64(n)
   510  	return n, err
   511  }
   512  
   513  type nopCloser struct {
   514  	io.Writer
   515  }
   516  
   517  func (w nopCloser) Close() error {
   518  	return nil
   519  }
   520  
   521  type writeBuf []byte
   522  
   523  func (b *writeBuf) uint8(v uint8) {
   524  	(*b)[0] = v
   525  	*b = (*b)[1:]
   526  }
   527  
   528  func (b *writeBuf) uint16(v uint16) {
   529  	binary.LittleEndian.PutUint16(*b, v)
   530  	*b = (*b)[2:]
   531  }
   532  
   533  func (b *writeBuf) uint32(v uint32) {
   534  	binary.LittleEndian.PutUint32(*b, v)
   535  	*b = (*b)[4:]
   536  }
   537  
   538  func (b *writeBuf) uint64(v uint64) {
   539  	binary.LittleEndian.PutUint64(*b, v)
   540  	*b = (*b)[8:]
   541  }
   542  

View as plain text