The Go Programming Language

Source file src/pkg/archive/tar/reader.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package tar
     6	
     7	// TODO(dsymonds):
     8	//   - pax extensions
     9	
    10	import (
    11		"bytes"
    12		"io"
    13		"io/ioutil"
    14		"os"
    15		"strconv"
    16	)
    17	
    18	var (
    19		HeaderError = os.NewError("invalid tar header")
    20	)
    21	
    22	// A Reader provides sequential access to the contents of a tar archive.
    23	// A tar archive consists of a sequence of files.
    24	// The Next method advances to the next file in the archive (including the first),
    25	// and then it can be treated as an io.Reader to access the file's data.
    26	//
    27	// Example:
    28	//	tr := tar.NewReader(r)
    29	//	for {
    30	//		hdr, err := tr.Next()
    31	//		if err == os.EOF {
    32	//			// end of tar archive
    33	//			break
    34	//		}
    35	//		if err != nil {
    36	//			// handle error
    37	//		}
    38	//		io.Copy(data, tr)
    39	//	}
    40	type Reader struct {
    41		r   io.Reader
    42		err os.Error
    43		nb  int64 // number of unread bytes for current file entry
    44		pad int64 // amount of padding (ignored) after current file entry
    45	}
    46	
    47	// NewReader creates a new Reader reading from r.
    48	func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
    49	
    50	// Next advances to the next entry in the tar archive.
    51	func (tr *Reader) Next() (*Header, os.Error) {
    52		var hdr *Header
    53		if tr.err == nil {
    54			tr.skipUnread()
    55		}
    56		if tr.err == nil {
    57			hdr = tr.readHeader()
    58		}
    59		return hdr, tr.err
    60	}
    61	
    62	// Parse bytes as a NUL-terminated C-style string.
    63	// If a NUL byte is not found then the whole slice is returned as a string.
    64	func cString(b []byte) string {
    65		n := 0
    66		for n < len(b) && b[n] != 0 {
    67			n++
    68		}
    69		return string(b[0:n])
    70	}
    71	
    72	func (tr *Reader) octal(b []byte) int64 {
    73		// Removing leading spaces.
    74		for len(b) > 0 && b[0] == ' ' {
    75			b = b[1:]
    76		}
    77		// Removing trailing NULs and spaces.
    78		for len(b) > 0 && (b[len(b)-1] == ' ' || b[len(b)-1] == '\x00') {
    79			b = b[0 : len(b)-1]
    80		}
    81		x, err := strconv.Btoui64(cString(b), 8)
    82		if err != nil {
    83			tr.err = err
    84		}
    85		return int64(x)
    86	}
    87	
    88	// Skip any unread bytes in the existing file entry, as well as any alignment padding.
    89	func (tr *Reader) skipUnread() {
    90		nr := tr.nb + tr.pad // number of bytes to skip
    91		tr.nb, tr.pad = 0, 0
    92		if sr, ok := tr.r.(io.Seeker); ok {
    93			if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
    94				return
    95			}
    96		}
    97		_, tr.err = io.Copyn(ioutil.Discard, tr.r, nr)
    98	}
    99	
   100	func (tr *Reader) verifyChecksum(header []byte) bool {
   101		if tr.err != nil {
   102			return false
   103		}
   104	
   105		given := tr.octal(header[148:156])
   106		unsigned, signed := checksum(header)
   107		return given == unsigned || given == signed
   108	}
   109	
   110	func (tr *Reader) readHeader() *Header {
   111		header := make([]byte, blockSize)
   112		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   113			return nil
   114		}
   115	
   116		// Two blocks of zero bytes marks the end of the archive.
   117		if bytes.Equal(header, zeroBlock[0:blockSize]) {
   118			if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   119				return nil
   120			}
   121			if bytes.Equal(header, zeroBlock[0:blockSize]) {
   122				tr.err = os.EOF
   123			} else {
   124				tr.err = HeaderError // zero block and then non-zero block
   125			}
   126			return nil
   127		}
   128	
   129		if !tr.verifyChecksum(header) {
   130			tr.err = HeaderError
   131			return nil
   132		}
   133	
   134		// Unpack
   135		hdr := new(Header)
   136		s := slicer(header)
   137	
   138		hdr.Name = cString(s.next(100))
   139		hdr.Mode = tr.octal(s.next(8))
   140		hdr.Uid = int(tr.octal(s.next(8)))
   141		hdr.Gid = int(tr.octal(s.next(8)))
   142		hdr.Size = tr.octal(s.next(12))
   143		hdr.Mtime = tr.octal(s.next(12))
   144		s.next(8) // chksum
   145		hdr.Typeflag = s.next(1)[0]
   146		hdr.Linkname = cString(s.next(100))
   147	
   148		// The remainder of the header depends on the value of magic.
   149		// The original (v7) version of tar had no explicit magic field,
   150		// so its magic bytes, like the rest of the block, are NULs.
   151		magic := string(s.next(8)) // contains version field as well.
   152		var format string
   153		switch magic {
   154		case "ustar\x0000": // POSIX tar (1003.1-1988)
   155			if string(header[508:512]) == "tar\x00" {
   156				format = "star"
   157			} else {
   158				format = "posix"
   159			}
   160		case "ustar  \x00": // old GNU tar
   161			format = "gnu"
   162		}
   163	
   164		switch format {
   165		case "posix", "gnu", "star":
   166			hdr.Uname = cString(s.next(32))
   167			hdr.Gname = cString(s.next(32))
   168			devmajor := s.next(8)
   169			devminor := s.next(8)
   170			if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
   171				hdr.Devmajor = tr.octal(devmajor)
   172				hdr.Devminor = tr.octal(devminor)
   173			}
   174			var prefix string
   175			switch format {
   176			case "posix", "gnu":
   177				prefix = cString(s.next(155))
   178			case "star":
   179				prefix = cString(s.next(131))
   180				hdr.Atime = tr.octal(s.next(12))
   181				hdr.Ctime = tr.octal(s.next(12))
   182			}
   183			if len(prefix) > 0 {
   184				hdr.Name = prefix + "/" + hdr.Name
   185			}
   186		}
   187	
   188		if tr.err != nil {
   189			tr.err = HeaderError
   190			return nil
   191		}
   192	
   193		// Maximum value of hdr.Size is 64 GB (12 octal digits),
   194		// so there's no risk of int64 overflowing.
   195		tr.nb = int64(hdr.Size)
   196		tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two
   197	
   198		return hdr
   199	}
   200	
   201	// Read reads from the current entry in the tar archive.
   202	// It returns 0, os.EOF when it reaches the end of that entry,
   203	// until Next is called to advance to the next entry.
   204	func (tr *Reader) Read(b []byte) (n int, err os.Error) {
   205		if tr.nb == 0 {
   206			// file consumed
   207			return 0, os.EOF
   208		}
   209	
   210		if int64(len(b)) > tr.nb {
   211			b = b[0:tr.nb]
   212		}
   213		n, err = tr.r.Read(b)
   214		tr.nb -= int64(n)
   215	
   216		if err == os.EOF && tr.nb > 0 {
   217			err = io.ErrUnexpectedEOF
   218		}
   219		tr.err = err
   220		return
   221	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.