archive.go

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package archive implements reading of archive files generated by the Go
     6  // toolchain.
     7  package archive
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"cmd/internal/bio"
    13  	"cmd/internal/goobj"
    14  	"errors"
    15  	"fmt"
    16  	"io"
    17  	"log"
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  	"unicode/utf8"
    23  )
    24  
    25  /*
    26  The archive format is:
    27  
    28  First, on a line by itself
    29  	!<arch>
    30  
    31  Then zero or more file records. Each file record has a fixed-size one-line header
    32  followed by data bytes followed by an optional padding byte. The header is:
    33  
    34  	%-16s%-12d%-6d%-6d%-8o%-10d`
    35  	name mtime uid gid mode size
    36  
    37  (note the trailing backquote). The %-16s here means at most 16 *bytes* of
    38  the name, and if shorter, space padded on the right.
    39  */
    40  
    41  // A Data is a reference to data stored in an object file.
    42  // It records the offset and size of the data, so that a client can
    43  // read the data only if necessary.
    44  type Data struct {
    45  	Offset int64
    46  	Size   int64
    47  }
    48  
    49  type Archive struct {
    50  	f       *os.File
    51  	Entries []Entry
    52  }
    53  
    54  func (a *Archive) File() *os.File { return a.f }
    55  
    56  type Entry struct {
    57  	Name  string
    58  	Type  EntryType
    59  	Mtime int64
    60  	Uid   int
    61  	Gid   int
    62  	Mode  os.FileMode
    63  	Data
    64  	Obj *GoObj // nil if this entry is not a Go object file
    65  }
    66  
    67  type EntryType int
    68  
    69  const (
    70  	EntryPkgDef EntryType = iota
    71  	EntryGoObj
    72  	EntryNativeObj
    73  	EntrySentinelNonObj
    74  )
    75  
    76  func (e *Entry) String() string {
    77  	return fmt.Sprintf("%s %6d/%-6d %12d %s %s",
    78  		(e.Mode & 0777).String(),
    79  		e.Uid,
    80  		e.Gid,
    81  		e.Size,
    82  		time.Unix(e.Mtime, 0).Format(timeFormat),
    83  		e.Name)
    84  }
    85  
    86  type GoObj struct {
    87  	TextHeader []byte
    88  	Arch       string
    89  	Data
    90  }
    91  
    92  const (
    93  	entryHeader = "%s%-12d%-6d%-6d%-8o%-10d`\n"
    94  	// In entryHeader the first entry, the name, is always printed as 16 bytes right-padded.
    95  	entryLen   = 16 + 12 + 6 + 6 + 8 + 10 + 1 + 1
    96  	timeFormat = "Jan _2 15:04 2006"
    97  )
    98  
    99  var (
   100  	archiveHeader = []byte("!<arch>\n")
   101  	archiveMagic  = []byte("`\n")
   102  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   103  
   104  	errCorruptArchive   = errors.New("corrupt archive")
   105  	errTruncatedArchive = errors.New("truncated archive")
   106  	errCorruptObject    = errors.New("corrupt object file")
   107  	errNotObject        = errors.New("unrecognized object file format")
   108  )
   109  
   110  type ErrGoObjOtherVersion struct{ magic []byte }
   111  
   112  func (e ErrGoObjOtherVersion) Error() string {
   113  	return fmt.Sprintf("go object of a different version: %q", e.magic)
   114  }
   115  
   116  // An objReader is an object file reader.
   117  type objReader struct {
   118  	a      *Archive
   119  	b      *bio.Reader
   120  	err    error
   121  	offset int64
   122  	limit  int64
   123  	tmp    [256]byte
   124  }
   125  
   126  func (r *objReader) init(f *os.File) {
   127  	r.a = &Archive{f, nil}
   128  	r.offset, _ = f.Seek(0, io.SeekCurrent)
   129  	r.limit, _ = f.Seek(0, io.SeekEnd)
   130  	f.Seek(r.offset, io.SeekStart)
   131  	r.b = bio.NewReader(f)
   132  }
   133  
   134  // error records that an error occurred.
   135  // It returns only the first error, so that an error
   136  // caused by an earlier error does not discard information
   137  // about the earlier error.
   138  func (r *objReader) error(err error) error {
   139  	if r.err == nil {
   140  		if err == io.EOF {
   141  			err = io.ErrUnexpectedEOF
   142  		}
   143  		r.err = err
   144  	}
   145  	// panic("corrupt") // useful for debugging
   146  	return r.err
   147  }
   148  
   149  // peek returns the next n bytes without advancing the reader.
   150  func (r *objReader) peek(n int) ([]byte, error) {
   151  	if r.err != nil {
   152  		return nil, r.err
   153  	}
   154  	if r.offset >= r.limit {
   155  		r.error(io.ErrUnexpectedEOF)
   156  		return nil, r.err
   157  	}
   158  	b, err := r.b.Peek(n)
   159  	if err != nil {
   160  		if err != bufio.ErrBufferFull {
   161  			r.error(err)
   162  		}
   163  	}
   164  	return b, err
   165  }
   166  
   167  // readByte reads and returns a byte from the input file.
   168  // On I/O error or EOF, it records the error but returns byte 0.
   169  // A sequence of 0 bytes will eventually terminate any
   170  // parsing state in the object file. In particular, it ends the
   171  // reading of a varint.
   172  func (r *objReader) readByte() byte {
   173  	if r.err != nil {
   174  		return 0
   175  	}
   176  	if r.offset >= r.limit {
   177  		r.error(io.ErrUnexpectedEOF)
   178  		return 0
   179  	}
   180  	b, err := r.b.ReadByte()
   181  	if err != nil {
   182  		if err == io.EOF {
   183  			err = io.ErrUnexpectedEOF
   184  		}
   185  		r.error(err)
   186  		b = 0
   187  	} else {
   188  		r.offset++
   189  	}
   190  	return b
   191  }
   192  
   193  // readFull reads exactly len(b) bytes from the input file.
   194  // If an error occurs, read returns the error but also
   195  // records it, so it is safe for callers to ignore the result
   196  // as long as delaying the report is not a problem.
   197  func (r *objReader) readFull(b []byte) error {
   198  	if r.err != nil {
   199  		return r.err
   200  	}
   201  	if r.offset+int64(len(b)) > r.limit {
   202  		return r.error(io.ErrUnexpectedEOF)
   203  	}
   204  	n, err := io.ReadFull(r.b, b)
   205  	r.offset += int64(n)
   206  	if err != nil {
   207  		return r.error(err)
   208  	}
   209  	return nil
   210  }
   211  
   212  // skip skips n bytes in the input.
   213  func (r *objReader) skip(n int64) {
   214  	if n < 0 {
   215  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   216  	}
   217  	if n < int64(len(r.tmp)) {
   218  		// Since the data is so small, a just reading from the buffered
   219  		// reader is better than flushing the buffer and seeking.
   220  		r.readFull(r.tmp[:n])
   221  	} else if n <= int64(r.b.Buffered()) {
   222  		// Even though the data is not small, it has already been read.
   223  		// Advance the buffer instead of seeking.
   224  		for n > int64(len(r.tmp)) {
   225  			r.readFull(r.tmp[:])
   226  			n -= int64(len(r.tmp))
   227  		}
   228  		r.readFull(r.tmp[:n])
   229  	} else {
   230  		// Seek, giving up buffered data.
   231  		r.b.MustSeek(r.offset+n, io.SeekStart)
   232  		r.offset += n
   233  	}
   234  }
   235  
   236  // New writes to f to make a new archive.
   237  func New(f *os.File) (*Archive, error) {
   238  	_, err := f.Write(archiveHeader)
   239  	if err != nil {
   240  		return nil, err
   241  	}
   242  	return &Archive{f: f}, nil
   243  }
   244  
   245  // Parse parses an object file or archive from f.
   246  func Parse(f *os.File, verbose bool) (*Archive, error) {
   247  	var r objReader
   248  	r.init(f)
   249  	t, err := r.peek(8)
   250  	if err != nil {
   251  		if err == io.EOF {
   252  			err = io.ErrUnexpectedEOF
   253  		}
   254  		return nil, err
   255  	}
   256  
   257  	switch {
   258  	default:
   259  		return nil, errNotObject
   260  
   261  	case bytes.Equal(t, archiveHeader):
   262  		if err := r.parseArchive(verbose); err != nil {
   263  			return nil, err
   264  		}
   265  	case bytes.Equal(t, goobjHeader):
   266  		off := r.offset
   267  		o := &GoObj{}
   268  		if err := r.parseObject(o, r.limit-off); err != nil {
   269  			return nil, err
   270  		}
   271  		r.a.Entries = []Entry{{
   272  			Name: f.Name(),
   273  			Type: EntryGoObj,
   274  			Data: Data{off, r.limit - off},
   275  			Obj:  o,
   276  		}}
   277  	}
   278  
   279  	return r.a, nil
   280  }
   281  
   282  // trimSpace removes trailing spaces from b and returns the corresponding string.
   283  // This effectively parses the form used in archive headers.
   284  func trimSpace(b []byte) string {
   285  	return string(bytes.TrimRight(b, " "))
   286  }
   287  
   288  // parseArchive parses a Unix archive of Go object files.
   289  func (r *objReader) parseArchive(verbose bool) error {
   290  	r.readFull(r.tmp[:8]) // consume header (already checked)
   291  	for r.offset < r.limit {
   292  		if err := r.readFull(r.tmp[:60]); err != nil {
   293  			return err
   294  		}
   295  		data := r.tmp[:60]
   296  
   297  		// Each file is preceded by this text header (slice indices in first column):
   298  		//	 0:16	name
   299  		//	16:28 date
   300  		//	28:34 uid
   301  		//	34:40 gid
   302  		//	40:48 mode
   303  		//	48:58 size
   304  		//	58:60 magic - `\n
   305  		// We only care about name, size, and magic, unless in verbose mode.
   306  		// The fields are space-padded on the right.
   307  		// The size is in decimal.
   308  		// The file data - size bytes - follows the header.
   309  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   310  		// byte sits between the file data and the next header.
   311  		// The file data that follows is padded to an even number of bytes:
   312  		// if size is odd, an extra padding byte is inserted betw the next header.
   313  		if len(data) < 60 {
   314  			return errTruncatedArchive
   315  		}
   316  		if !bytes.Equal(data[58:60], archiveMagic) {
   317  			return errCorruptArchive
   318  		}
   319  		name := trimSpace(data[0:16])
   320  		var err error
   321  		get := func(start, end, base, bitsize int) int64 {
   322  			if err != nil {
   323  				return 0
   324  			}
   325  			var v int64
   326  			v, err = strconv.ParseInt(trimSpace(data[start:end]), base, bitsize)
   327  			return v
   328  		}
   329  		size := get(48, 58, 10, 64)
   330  		var (
   331  			mtime    int64
   332  			uid, gid int
   333  			mode     os.FileMode
   334  		)
   335  		if verbose {
   336  			mtime = get(16, 28, 10, 64)
   337  			uid = int(get(28, 34, 10, 32))
   338  			gid = int(get(34, 40, 10, 32))
   339  			mode = os.FileMode(get(40, 48, 8, 32))
   340  		}
   341  		if err != nil {
   342  			return errCorruptArchive
   343  		}
   344  		data = data[60:]
   345  		fsize := size + size&1
   346  		if fsize < 0 || fsize < size {
   347  			return errCorruptArchive
   348  		}
   349  		switch name {
   350  		case "__.PKGDEF":
   351  			r.a.Entries = append(r.a.Entries, Entry{
   352  				Name:  name,
   353  				Type:  EntryPkgDef,
   354  				Mtime: mtime,
   355  				Uid:   uid,
   356  				Gid:   gid,
   357  				Mode:  mode,
   358  				Data:  Data{r.offset, size},
   359  			})
   360  			r.skip(size)
   361  		case "preferlinkext", "dynimportfail":
   362  			if size == 0 {
   363  				// These are not actual objects, but rather sentinel
   364  				// entries put into the archive by the Go command to
   365  				// be read by the linker. See #62036.
   366  				r.a.Entries = append(r.a.Entries, Entry{
   367  					Name:  name,
   368  					Type:  EntrySentinelNonObj,
   369  					Mtime: mtime,
   370  					Uid:   uid,
   371  					Gid:   gid,
   372  					Mode:  mode,
   373  					Data:  Data{r.offset, size},
   374  				})
   375  				break
   376  			}
   377  			fallthrough
   378  		default:
   379  			var typ EntryType
   380  			var o *GoObj
   381  			offset := r.offset
   382  			p, err := r.peek(8)
   383  			if err != nil {
   384  				return err
   385  			}
   386  			if bytes.Equal(p, goobjHeader) {
   387  				typ = EntryGoObj
   388  				o = &GoObj{}
   389  				err := r.parseObject(o, size)
   390  				if err != nil {
   391  					return err
   392  				}
   393  			} else {
   394  				typ = EntryNativeObj
   395  				r.skip(size)
   396  			}
   397  			r.a.Entries = append(r.a.Entries, Entry{
   398  				Name:  name,
   399  				Type:  typ,
   400  				Mtime: mtime,
   401  				Uid:   uid,
   402  				Gid:   gid,
   403  				Mode:  mode,
   404  				Data:  Data{offset, size},
   405  				Obj:   o,
   406  			})
   407  		}
   408  		if size&1 != 0 {
   409  			r.skip(1)
   410  		}
   411  	}
   412  	return nil
   413  }
   414  
   415  // parseObject parses a single Go object file.
   416  // The object file consists of a textual header ending in "\n!\n"
   417  // and then the part we want to parse begins.
   418  // The format of that part is defined in a comment at the top
   419  // of cmd/internal/goobj/objfile.go.
   420  func (r *objReader) parseObject(o *GoObj, size int64) error {
   421  	h := make([]byte, 0, 256)
   422  	var c1, c2, c3 byte
   423  	for {
   424  		c1, c2, c3 = c2, c3, r.readByte()
   425  		h = append(h, c3)
   426  		// The new export format can contain 0 bytes.
   427  		// Don't consider them errors, only look for r.err != nil.
   428  		if r.err != nil {
   429  			return errCorruptObject
   430  		}
   431  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   432  			break
   433  		}
   434  	}
   435  	o.TextHeader = h
   436  	hs := strings.Fields(string(h))
   437  	if len(hs) >= 4 {
   438  		o.Arch = hs[3]
   439  	}
   440  	o.Offset = r.offset
   441  	o.Size = size - int64(len(h))
   442  
   443  	p, err := r.peek(8)
   444  	if err != nil {
   445  		return err
   446  	}
   447  	if !bytes.Equal(p, []byte(goobj.Magic)) {
   448  		if bytes.HasPrefix(p, []byte("\x00go1")) && bytes.HasSuffix(p, []byte("ld")) {
   449  			return r.error(ErrGoObjOtherVersion{p[1:]}) // strip the \x00 byte
   450  		}
   451  		return r.error(errCorruptObject)
   452  	}
   453  	r.skip(o.Size)
   454  	return nil
   455  }
   456  
   457  // AddEntry adds an entry to the end of a, with the content from r.
   458  func (a *Archive) AddEntry(typ EntryType, name string, mtime int64, uid, gid int, mode os.FileMode, size int64, r io.Reader) {
   459  	off, err := a.f.Seek(0, io.SeekEnd)
   460  	if err != nil {
   461  		log.Fatal(err)
   462  	}
   463  	n, err := fmt.Fprintf(a.f, entryHeader, exactly16Bytes(name), mtime, uid, gid, mode, size)
   464  	if err != nil || n != entryLen {
   465  		log.Fatal("writing entry header: ", err)
   466  	}
   467  	n1, _ := io.CopyN(a.f, r, size)
   468  	if n1 != size {
   469  		log.Fatal(err)
   470  	}
   471  	if (off+size)&1 != 0 {
   472  		a.f.Write([]byte{0}) // pad to even byte
   473  	}
   474  	a.Entries = append(a.Entries, Entry{
   475  		Name:  name,
   476  		Type:  typ,
   477  		Mtime: mtime,
   478  		Uid:   uid,
   479  		Gid:   gid,
   480  		Mode:  mode,
   481  		Data:  Data{off + entryLen, size},
   482  	})
   483  }
   484  
   485  // exactly16Bytes truncates the string if necessary so it is at most 16 bytes long,
   486  // then pads the result with spaces to be exactly 16 bytes.
   487  // Fmt uses runes for its width calculation, but we need bytes in the entry header.
   488  func exactly16Bytes(s string) string {
   489  	for len(s) > 16 {
   490  		_, wid := utf8.DecodeLastRuneInString(s)
   491  		s = s[:len(s)-wid]
   492  	}
   493  	const sixteenSpaces = "                "
   494  	s += sixteenSpaces[:16-len(s)]
   495  	return s
   496  }
   497  
   498  // architecture-independent object file output
   499  const HeaderSize = 60
   500  
   501  func ReadHeader(b *bufio.Reader, name string) int {
   502  	var buf [HeaderSize]byte
   503  	if _, err := io.ReadFull(b, buf[:]); err != nil {
   504  		return -1
   505  	}
   506  	aname := strings.Trim(string(buf[0:16]), " ")
   507  	if !strings.HasPrefix(aname, name) {
   508  		return -1
   509  	}
   510  	asize := strings.Trim(string(buf[48:58]), " ")
   511  	i, _ := strconv.Atoi(asize)
   512  	return i
   513  }
   514  
   515  func FormatHeader(arhdr []byte, name string, size int64) {
   516  	copy(arhdr[:], fmt.Sprintf("%-16s%-12d%-6d%-6d%-8o%-10d`\n", name, 0, 0, 0, 0644, size))
   517  }
   518
View as plain text