Source file src/go/build/read.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package build
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"go/ast"
    13  	"go/parser"
    14  	"go/scanner"
    15  	"go/token"
    16  	"io"
    17  	"strconv"
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  )
    22  
    23  type importReader struct {
    24  	b    *bufio.Reader
    25  	buf  []byte
    26  	peek byte
    27  	err  error
    28  	eof  bool
    29  	nerr int
    30  	pos  token.Position
    31  }
    32  
    33  var bom = []byte{0xef, 0xbb, 0xbf}
    34  
    35  func newImportReader(name string, r io.Reader) *importReader {
    36  	b := bufio.NewReader(r)
    37  	// Remove leading UTF-8 BOM.
    38  	// Per https://golang.org/ref/spec#Source_code_representation:
    39  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    40  	// if it is the first Unicode code point in the source text.
    41  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    42  		b.Discard(3)
    43  	}
    44  	return &importReader{
    45  		b: b,
    46  		pos: token.Position{
    47  			Filename: name,
    48  			Line:     1,
    49  			Column:   1,
    50  		},
    51  	}
    52  }
    53  
    54  func isIdent(c byte) bool {
    55  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    56  }
    57  
    58  var (
    59  	errSyntax = errors.New("syntax error")
    60  	errNUL    = errors.New("unexpected NUL in input")
    61  )
    62  
    63  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    64  func (r *importReader) syntaxError() {
    65  	if r.err == nil {
    66  		r.err = errSyntax
    67  	}
    68  }
    69  
    70  // readByte reads the next byte from the input, saves it in buf, and returns it.
    71  // If an error occurs, readByte records the error in r.err and returns 0.
    72  func (r *importReader) readByte() byte {
    73  	c, err := r.b.ReadByte()
    74  	if err == nil {
    75  		r.buf = append(r.buf, c)
    76  		if c == 0 {
    77  			err = errNUL
    78  		}
    79  	}
    80  	if err != nil {
    81  		if err == io.EOF {
    82  			r.eof = true
    83  		} else if r.err == nil {
    84  			r.err = err
    85  		}
    86  		c = 0
    87  	}
    88  	return c
    89  }
    90  
    91  // readByteNoBuf is like readByte but doesn't buffer the byte.
    92  // It exhausts r.buf before reading from r.b.
    93  func (r *importReader) readByteNoBuf() byte {
    94  	var c byte
    95  	var err error
    96  	if len(r.buf) > 0 {
    97  		c = r.buf[0]
    98  		r.buf = r.buf[1:]
    99  	} else {
   100  		c, err = r.b.ReadByte()
   101  		if err == nil && c == 0 {
   102  			err = errNUL
   103  		}
   104  	}
   105  
   106  	if err != nil {
   107  		if err == io.EOF {
   108  			r.eof = true
   109  		} else if r.err == nil {
   110  			r.err = err
   111  		}
   112  		return 0
   113  	}
   114  	r.pos.Offset++
   115  	if c == '\n' {
   116  		r.pos.Line++
   117  		r.pos.Column = 1
   118  	} else {
   119  		r.pos.Column++
   120  	}
   121  	return c
   122  }
   123  
   124  // peekByte returns the next byte from the input reader but does not advance beyond it.
   125  // If skipSpace is set, peekByte skips leading spaces and comments.
   126  func (r *importReader) peekByte(skipSpace bool) byte {
   127  	if r.err != nil {
   128  		if r.nerr++; r.nerr > 10000 {
   129  			panic("go/build: import reader looping")
   130  		}
   131  		return 0
   132  	}
   133  
   134  	// Use r.peek as first input byte.
   135  	// Don't just return r.peek here: it might have been left by peekByte(false)
   136  	// and this might be peekByte(true).
   137  	c := r.peek
   138  	if c == 0 {
   139  		c = r.readByte()
   140  	}
   141  	for r.err == nil && !r.eof {
   142  		if skipSpace {
   143  			// For the purposes of this reader, semicolons are never necessary to
   144  			// understand the input and are treated as spaces.
   145  			switch c {
   146  			case ' ', '\f', '\t', '\r', '\n', ';':
   147  				c = r.readByte()
   148  				continue
   149  
   150  			case '/':
   151  				c = r.readByte()
   152  				if c == '/' {
   153  					for c != '\n' && r.err == nil && !r.eof {
   154  						c = r.readByte()
   155  					}
   156  				} else if c == '*' {
   157  					var c1 byte
   158  					for (c != '*' || c1 != '/') && r.err == nil {
   159  						if r.eof {
   160  							r.syntaxError()
   161  						}
   162  						c, c1 = c1, r.readByte()
   163  					}
   164  				} else {
   165  					r.syntaxError()
   166  				}
   167  				c = r.readByte()
   168  				continue
   169  			}
   170  		}
   171  		break
   172  	}
   173  	r.peek = c
   174  	return r.peek
   175  }
   176  
   177  // nextByte is like peekByte but advances beyond the returned byte.
   178  func (r *importReader) nextByte(skipSpace bool) byte {
   179  	c := r.peekByte(skipSpace)
   180  	r.peek = 0
   181  	return c
   182  }
   183  
   184  var goEmbed = []byte("go:embed")
   185  
   186  // findEmbed advances the input reader to the next //go:embed comment.
   187  // It reports whether it found a comment.
   188  // (Otherwise it found an error or EOF.)
   189  func (r *importReader) findEmbed(first bool) bool {
   190  	// The import block scan stopped after a non-space character,
   191  	// so the reader is not at the start of a line on the first call.
   192  	// After that, each //go:embed extraction leaves the reader
   193  	// at the end of a line.
   194  	startLine := !first
   195  	var c byte
   196  	for r.err == nil && !r.eof {
   197  		c = r.readByteNoBuf()
   198  	Reswitch:
   199  		switch c {
   200  		default:
   201  			startLine = false
   202  
   203  		case '\n':
   204  			startLine = true
   205  
   206  		case ' ', '\t':
   207  			// leave startLine alone
   208  
   209  		case '"':
   210  			startLine = false
   211  			for r.err == nil {
   212  				if r.eof {
   213  					r.syntaxError()
   214  				}
   215  				c = r.readByteNoBuf()
   216  				if c == '\\' {
   217  					r.readByteNoBuf()
   218  					if r.err != nil {
   219  						r.syntaxError()
   220  						return false
   221  					}
   222  					continue
   223  				}
   224  				if c == '"' {
   225  					c = r.readByteNoBuf()
   226  					goto Reswitch
   227  				}
   228  			}
   229  			goto Reswitch
   230  
   231  		case '`':
   232  			startLine = false
   233  			for r.err == nil {
   234  				if r.eof {
   235  					r.syntaxError()
   236  				}
   237  				c = r.readByteNoBuf()
   238  				if c == '`' {
   239  					c = r.readByteNoBuf()
   240  					goto Reswitch
   241  				}
   242  			}
   243  
   244  		case '\'':
   245  			startLine = false
   246  			for r.err == nil {
   247  				if r.eof {
   248  					r.syntaxError()
   249  				}
   250  				c = r.readByteNoBuf()
   251  				if c == '\\' {
   252  					r.readByteNoBuf()
   253  					if r.err != nil {
   254  						r.syntaxError()
   255  						return false
   256  					}
   257  					continue
   258  				}
   259  				if c == '\'' {
   260  					c = r.readByteNoBuf()
   261  					goto Reswitch
   262  				}
   263  			}
   264  
   265  		case '/':
   266  			c = r.readByteNoBuf()
   267  			switch c {
   268  			default:
   269  				startLine = false
   270  				goto Reswitch
   271  
   272  			case '*':
   273  				var c1 byte
   274  				for (c != '*' || c1 != '/') && r.err == nil {
   275  					if r.eof {
   276  						r.syntaxError()
   277  					}
   278  					c, c1 = c1, r.readByteNoBuf()
   279  				}
   280  				startLine = false
   281  
   282  			case '/':
   283  				if startLine {
   284  					// Try to read this as a //go:embed comment.
   285  					for i := range goEmbed {
   286  						c = r.readByteNoBuf()
   287  						if c != goEmbed[i] {
   288  							goto SkipSlashSlash
   289  						}
   290  					}
   291  					c = r.readByteNoBuf()
   292  					if c == ' ' || c == '\t' {
   293  						// Found one!
   294  						return true
   295  					}
   296  				}
   297  			SkipSlashSlash:
   298  				for c != '\n' && r.err == nil && !r.eof {
   299  					c = r.readByteNoBuf()
   300  				}
   301  				startLine = true
   302  			}
   303  		}
   304  	}
   305  	return false
   306  }
   307  
   308  // readKeyword reads the given keyword from the input.
   309  // If the keyword is not present, readKeyword records a syntax error.
   310  func (r *importReader) readKeyword(kw string) {
   311  	r.peekByte(true)
   312  	for i := 0; i < len(kw); i++ {
   313  		if r.nextByte(false) != kw[i] {
   314  			r.syntaxError()
   315  			return
   316  		}
   317  	}
   318  	if isIdent(r.peekByte(false)) {
   319  		r.syntaxError()
   320  	}
   321  }
   322  
   323  // readIdent reads an identifier from the input.
   324  // If an identifier is not present, readIdent records a syntax error.
   325  func (r *importReader) readIdent() {
   326  	c := r.peekByte(true)
   327  	if !isIdent(c) {
   328  		r.syntaxError()
   329  		return
   330  	}
   331  	for isIdent(r.peekByte(false)) {
   332  		r.peek = 0
   333  	}
   334  }
   335  
   336  // readString reads a quoted string literal from the input.
   337  // If an identifier is not present, readString records a syntax error.
   338  func (r *importReader) readString() {
   339  	switch r.nextByte(true) {
   340  	case '`':
   341  		for r.err == nil {
   342  			if r.nextByte(false) == '`' {
   343  				break
   344  			}
   345  			if r.eof {
   346  				r.syntaxError()
   347  			}
   348  		}
   349  	case '"':
   350  		for r.err == nil {
   351  			c := r.nextByte(false)
   352  			if c == '"' {
   353  				break
   354  			}
   355  			if r.eof || c == '\n' {
   356  				r.syntaxError()
   357  			}
   358  			if c == '\\' {
   359  				r.nextByte(false)
   360  			}
   361  		}
   362  	default:
   363  		r.syntaxError()
   364  	}
   365  }
   366  
   367  // readImport reads an import clause - optional identifier followed by quoted string -
   368  // from the input.
   369  func (r *importReader) readImport() {
   370  	c := r.peekByte(true)
   371  	if c == '.' {
   372  		r.peek = 0
   373  	} else if isIdent(c) {
   374  		r.readIdent()
   375  	}
   376  	r.readString()
   377  }
   378  
   379  // readComments is like io.ReadAll, except that it only reads the leading
   380  // block of comments in the file.
   381  func readComments(f io.Reader) ([]byte, error) {
   382  	r := newImportReader("", f)
   383  	r.peekByte(true)
   384  	if r.err == nil && !r.eof {
   385  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   386  		r.buf = r.buf[:len(r.buf)-1]
   387  	}
   388  	return r.buf, r.err
   389  }
   390  
   391  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   392  // It records what it learned in *info.
   393  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   394  // info.imports and info.embeds.
   395  //
   396  // It only returns an error if there are problems reading the file,
   397  // not for syntax errors in the file itself.
   398  func readGoInfo(f io.Reader, info *fileInfo) error {
   399  	r := newImportReader(info.name, f)
   400  
   401  	r.readKeyword("package")
   402  	r.readIdent()
   403  	for r.peekByte(true) == 'i' {
   404  		r.readKeyword("import")
   405  		if r.peekByte(true) == '(' {
   406  			r.nextByte(false)
   407  			for r.peekByte(true) != ')' && r.err == nil {
   408  				r.readImport()
   409  			}
   410  			r.nextByte(false)
   411  		} else {
   412  			r.readImport()
   413  		}
   414  	}
   415  
   416  	info.header = r.buf
   417  
   418  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   419  	// Return all but that last byte, which would cause a syntax error if we let it through.
   420  	if r.err == nil && !r.eof {
   421  		info.header = r.buf[:len(r.buf)-1]
   422  	}
   423  
   424  	// If we stopped for a syntax error, consume the whole file so that
   425  	// we are sure we don't change the errors that go/parser returns.
   426  	if r.err == errSyntax {
   427  		r.err = nil
   428  		for r.err == nil && !r.eof {
   429  			r.readByte()
   430  		}
   431  		info.header = r.buf
   432  	}
   433  	if r.err != nil {
   434  		return r.err
   435  	}
   436  
   437  	if info.fset == nil {
   438  		return nil
   439  	}
   440  
   441  	// Parse file header & record imports.
   442  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   443  	if info.parseErr != nil {
   444  		return nil
   445  	}
   446  
   447  	hasEmbed := false
   448  	for _, decl := range info.parsed.Decls {
   449  		d, ok := decl.(*ast.GenDecl)
   450  		if !ok {
   451  			continue
   452  		}
   453  		for _, dspec := range d.Specs {
   454  			spec, ok := dspec.(*ast.ImportSpec)
   455  			if !ok {
   456  				continue
   457  			}
   458  			quoted := spec.Path.Value
   459  			path, err := strconv.Unquote(quoted)
   460  			if err != nil {
   461  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   462  			}
   463  			if !isValidImport(path) {
   464  				// The parser used to return a parse error for invalid import paths, but
   465  				// no longer does, so check for and create the error here instead.
   466  				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
   467  				info.imports = nil
   468  				return nil
   469  			}
   470  			if path == "embed" {
   471  				hasEmbed = true
   472  			}
   473  
   474  			doc := spec.Doc
   475  			if doc == nil && len(d.Specs) == 1 {
   476  				doc = d.Doc
   477  			}
   478  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   479  		}
   480  	}
   481  
   482  	// Extract directives.
   483  	for _, group := range info.parsed.Comments {
   484  		if group.Pos() >= info.parsed.Package {
   485  			break
   486  		}
   487  		for _, c := range group.List {
   488  			if strings.HasPrefix(c.Text, "//go:") {
   489  				info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)})
   490  			}
   491  		}
   492  	}
   493  
   494  	// If the file imports "embed",
   495  	// we have to look for //go:embed comments
   496  	// in the remainder of the file.
   497  	// The compiler will enforce the mapping of comments to
   498  	// declared variables. We just need to know the patterns.
   499  	// If there were //go:embed comments earlier in the file
   500  	// (near the package statement or imports), the compiler
   501  	// will reject them. They can be (and have already been) ignored.
   502  	if hasEmbed {
   503  		var line []byte
   504  		for first := true; r.findEmbed(first); first = false {
   505  			line = line[:0]
   506  			pos := r.pos
   507  			for {
   508  				c := r.readByteNoBuf()
   509  				if c == '\n' || r.err != nil || r.eof {
   510  					break
   511  				}
   512  				line = append(line, c)
   513  			}
   514  			// Add args if line is well-formed.
   515  			// Ignore badly-formed lines - the compiler will report them when it finds them,
   516  			// and we can pretend they are not there to help go list succeed with what it knows.
   517  			embs, err := parseGoEmbed(string(line), pos)
   518  			if err == nil {
   519  				info.embeds = append(info.embeds, embs...)
   520  			}
   521  		}
   522  	}
   523  
   524  	return nil
   525  }
   526  
   527  // isValidImport checks if the import is a valid import using the more strict
   528  // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
   529  // It was ported from the function of the same name that was removed from the
   530  // parser in CL 424855, when the parser stopped doing these checks.
   531  func isValidImport(s string) bool {
   532  	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
   533  	for _, r := range s {
   534  		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
   535  			return false
   536  		}
   537  	}
   538  	return s != ""
   539  }
   540  
   541  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
   542  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   543  // This is based on a similar function in cmd/compile/internal/gc/noder.go;
   544  // this version calculates position information as well.
   545  func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
   546  	trimBytes := func(n int) {
   547  		pos.Offset += n
   548  		pos.Column += utf8.RuneCountInString(args[:n])
   549  		args = args[n:]
   550  	}
   551  	trimSpace := func() {
   552  		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
   553  		trimBytes(len(args) - len(trim))
   554  	}
   555  
   556  	var list []fileEmbed
   557  	for trimSpace(); args != ""; trimSpace() {
   558  		var path string
   559  		pathPos := pos
   560  	Switch:
   561  		switch args[0] {
   562  		default:
   563  			i := len(args)
   564  			for j, c := range args {
   565  				if unicode.IsSpace(c) {
   566  					i = j
   567  					break
   568  				}
   569  			}
   570  			path = args[:i]
   571  			trimBytes(i)
   572  
   573  		case '`':
   574  			var ok bool
   575  			path, _, ok = strings.Cut(args[1:], "`")
   576  			if !ok {
   577  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   578  			}
   579  			trimBytes(1 + len(path) + 1)
   580  
   581  		case '"':
   582  			i := 1
   583  			for ; i < len(args); i++ {
   584  				if args[i] == '\\' {
   585  					i++
   586  					continue
   587  				}
   588  				if args[i] == '"' {
   589  					q, err := strconv.Unquote(args[:i+1])
   590  					if err != nil {
   591  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
   592  					}
   593  					path = q
   594  					trimBytes(i + 1)
   595  					break Switch
   596  				}
   597  			}
   598  			if i >= len(args) {
   599  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   600  			}
   601  		}
   602  
   603  		if args != "" {
   604  			r, _ := utf8.DecodeRuneInString(args)
   605  			if !unicode.IsSpace(r) {
   606  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   607  			}
   608  		}
   609  		list = append(list, fileEmbed{path, pathPos})
   610  	}
   611  	return list, nil
   612  }
   613  

View as plain text