...

Source file src/go/scanner/scanner.go

Documentation: go/scanner

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package scanner implements a scanner for Go source text.
     6  // It takes a []byte as source which can then be tokenized
     7  // through repeated calls to the Scan method.
     8  //
     9  package scanner
    10  
    11  import (
    12  	"bytes"
    13  	"fmt"
    14  	"go/token"
    15  	"path/filepath"
    16  	"strconv"
    17  	"unicode"
    18  	"unicode/utf8"
    19  )
    20  
    21  // An ErrorHandler may be provided to Scanner.Init. If a syntax error is
    22  // encountered and a handler was installed, the handler is called with a
    23  // position and an error message. The position points to the beginning of
    24  // the offending token.
    25  //
    26  type ErrorHandler func(pos token.Position, msg string)
    27  
    28  // A Scanner holds the scanner's internal state while processing
    29  // a given text. It can be allocated as part of another data
    30  // structure but must be initialized via Init before use.
    31  //
    32  type Scanner struct {
    33  	// immutable state
    34  	file *token.File  // source file handle
    35  	dir  string       // directory portion of file.Name()
    36  	src  []byte       // source
    37  	err  ErrorHandler // error reporting; or nil
    38  	mode Mode         // scanning mode
    39  
    40  	// scanning state
    41  	ch         rune // current character
    42  	offset     int  // character offset
    43  	rdOffset   int  // reading offset (position after current character)
    44  	lineOffset int  // current line offset
    45  	insertSemi bool // insert a semicolon before next newline
    46  
    47  	// public state - ok to modify
    48  	ErrorCount int // number of errors encountered
    49  }
    50  
    51  const bom = 0xFEFF // byte order mark, only permitted as very first character
    52  
    53  // Read the next Unicode char into s.ch.
    54  // s.ch < 0 means end-of-file.
    55  //
    56  func (s *Scanner) next() {
    57  	if s.rdOffset < len(s.src) {
    58  		s.offset = s.rdOffset
    59  		if s.ch == '\n' {
    60  			s.lineOffset = s.offset
    61  			s.file.AddLine(s.offset)
    62  		}
    63  		r, w := rune(s.src[s.rdOffset]), 1
    64  		switch {
    65  		case r == 0:
    66  			s.error(s.offset, "illegal character NUL")
    67  		case r >= utf8.RuneSelf:
    68  			// not ASCII
    69  			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
    70  			if r == utf8.RuneError && w == 1 {
    71  				s.error(s.offset, "illegal UTF-8 encoding")
    72  			} else if r == bom && s.offset > 0 {
    73  				s.error(s.offset, "illegal byte order mark")
    74  			}
    75  		}
    76  		s.rdOffset += w
    77  		s.ch = r
    78  	} else {
    79  		s.offset = len(s.src)
    80  		if s.ch == '\n' {
    81  			s.lineOffset = s.offset
    82  			s.file.AddLine(s.offset)
    83  		}
    84  		s.ch = -1 // eof
    85  	}
    86  }
    87  
    88  // peek returns the byte following the most recently read character without
    89  // advancing the scanner. If the scanner is at EOF, peek returns 0.
    90  func (s *Scanner) peek() byte {
    91  	if s.rdOffset < len(s.src) {
    92  		return s.src[s.rdOffset]
    93  	}
    94  	return 0
    95  }
    96  
    97  // A mode value is a set of flags (or 0).
    98  // They control scanner behavior.
    99  //
   100  type Mode uint
   101  
   102  const (
   103  	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
   104  	dontInsertSemis                  // do not automatically insert semicolons - for testing only
   105  )
   106  
   107  // Init prepares the scanner s to tokenize the text src by setting the
   108  // scanner at the beginning of src. The scanner uses the file set file
   109  // for position information and it adds line information for each line.
   110  // It is ok to re-use the same file when re-scanning the same file as
   111  // line information which is already present is ignored. Init causes a
   112  // panic if the file size does not match the src size.
   113  //
   114  // Calls to Scan will invoke the error handler err if they encounter a
   115  // syntax error and err is not nil. Also, for each error encountered,
   116  // the Scanner field ErrorCount is incremented by one. The mode parameter
   117  // determines how comments are handled.
   118  //
   119  // Note that Init may call err if there is an error in the first character
   120  // of the file.
   121  //
   122  func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
   123  	// Explicitly initialize all fields since a scanner may be reused.
   124  	if file.Size() != len(src) {
   125  		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
   126  	}
   127  	s.file = file
   128  	s.dir, _ = filepath.Split(file.Name())
   129  	s.src = src
   130  	s.err = err
   131  	s.mode = mode
   132  
   133  	s.ch = ' '
   134  	s.offset = 0
   135  	s.rdOffset = 0
   136  	s.lineOffset = 0
   137  	s.insertSemi = false
   138  	s.ErrorCount = 0
   139  
   140  	s.next()
   141  	if s.ch == bom {
   142  		s.next() // ignore BOM at file beginning
   143  	}
   144  }
   145  
   146  func (s *Scanner) error(offs int, msg string) {
   147  	if s.err != nil {
   148  		s.err(s.file.Position(s.file.Pos(offs)), msg)
   149  	}
   150  	s.ErrorCount++
   151  }
   152  
   153  func (s *Scanner) scanComment() string {
   154  	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
   155  	offs := s.offset - 1 // position of initial '/'
   156  	next := -1           // position immediately following the comment; < 0 means invalid comment
   157  	numCR := 0
   158  
   159  	if s.ch == '/' {
   160  		//-style comment
   161  		// (the final '\n' is not considered part of the comment)
   162  		s.next()
   163  		for s.ch != '\n' && s.ch >= 0 {
   164  			if s.ch == '\r' {
   165  				numCR++
   166  			}
   167  			s.next()
   168  		}
   169  		// if we are at '\n', the position following the comment is afterwards
   170  		next = s.offset
   171  		if s.ch == '\n' {
   172  			next++
   173  		}
   174  		goto exit
   175  	}
   176  
   177  	/*-style comment */
   178  	s.next()
   179  	for s.ch >= 0 {
   180  		ch := s.ch
   181  		if ch == '\r' {
   182  			numCR++
   183  		}
   184  		s.next()
   185  		if ch == '*' && s.ch == '/' {
   186  			s.next()
   187  			next = s.offset
   188  			goto exit
   189  		}
   190  	}
   191  
   192  	s.error(offs, "comment not terminated")
   193  
   194  exit:
   195  	lit := s.src[offs:s.offset]
   196  
   197  	// On Windows, a (//-comment) line may end in "\r\n".
   198  	// Remove the final '\r' before analyzing the text for
   199  	// line directives (matching the compiler). Remove any
   200  	// other '\r' afterwards (matching the pre-existing be-
   201  	// havior of the scanner).
   202  	if numCR > 0 && len(lit) >= 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' {
   203  		lit = lit[:len(lit)-1]
   204  		numCR--
   205  	}
   206  
   207  	// interpret line directives
   208  	// (//line directives must start at the beginning of the current line)
   209  	if next >= 0 /* implies valid comment */ && (lit[1] == '*' || offs == s.lineOffset) && bytes.HasPrefix(lit[2:], prefix) {
   210  		s.updateLineInfo(next, offs, lit)
   211  	}
   212  
   213  	if numCR > 0 {
   214  		lit = stripCR(lit, lit[1] == '*')
   215  	}
   216  
   217  	return string(lit)
   218  }
   219  
   220  var prefix = []byte("line ")
   221  
   222  // updateLineInfo parses the incoming comment text at offset offs
   223  // as a line directive. If successful, it updates the line info table
   224  // for the position next per the line directive.
   225  func (s *Scanner) updateLineInfo(next, offs int, text []byte) {
   226  	// extract comment text
   227  	if text[1] == '*' {
   228  		text = text[:len(text)-2] // lop off trailing "*/"
   229  	}
   230  	text = text[7:] // lop off leading "//line " or "/*line "
   231  	offs += 7
   232  
   233  	i, n, ok := trailingDigits(text)
   234  	if i == 0 {
   235  		return // ignore (not a line directive)
   236  	}
   237  	// i > 0
   238  
   239  	if !ok {
   240  		// text has a suffix :xxx but xxx is not a number
   241  		s.error(offs+i, "invalid line number: "+string(text[i:]))
   242  		return
   243  	}
   244  
   245  	var line, col int
   246  	i2, n2, ok2 := trailingDigits(text[:i-1])
   247  	if ok2 {
   248  		//line filename:line:col
   249  		i, i2 = i2, i
   250  		line, col = n2, n
   251  		if col == 0 {
   252  			s.error(offs+i2, "invalid column number: "+string(text[i2:]))
   253  			return
   254  		}
   255  		text = text[:i2-1] // lop off ":col"
   256  	} else {
   257  		//line filename:line
   258  		line = n
   259  	}
   260  
   261  	if line == 0 {
   262  		s.error(offs+i, "invalid line number: "+string(text[i:]))
   263  		return
   264  	}
   265  
   266  	// If we have a column (//line filename:line:col form),
   267  	// an empty filename means to use the previous filename.
   268  	filename := string(text[:i-1]) // lop off ":line", and trim white space
   269  	if filename == "" && ok2 {
   270  		filename = s.file.Position(s.file.Pos(offs)).Filename
   271  	} else if filename != "" {
   272  		// Put a relative filename in the current directory.
   273  		// This is for compatibility with earlier releases.
   274  		// See issue 26671.
   275  		filename = filepath.Clean(filename)
   276  		if !filepath.IsAbs(filename) {
   277  			filename = filepath.Join(s.dir, filename)
   278  		}
   279  	}
   280  
   281  	s.file.AddLineColumnInfo(next, filename, line, col)
   282  }
   283  
   284  func trailingDigits(text []byte) (int, int, bool) {
   285  	i := bytes.LastIndexByte(text, ':') // look from right (Windows filenames may contain ':')
   286  	if i < 0 {
   287  		return 0, 0, false // no ":"
   288  	}
   289  	// i >= 0
   290  	n, err := strconv.ParseUint(string(text[i+1:]), 10, 0)
   291  	return i + 1, int(n), err == nil
   292  }
   293  
   294  func (s *Scanner) findLineEnd() bool {
   295  	// initial '/' already consumed
   296  
   297  	defer func(offs int) {
   298  		// reset scanner state to where it was upon calling findLineEnd
   299  		s.ch = '/'
   300  		s.offset = offs
   301  		s.rdOffset = offs + 1
   302  		s.next() // consume initial '/' again
   303  	}(s.offset - 1)
   304  
   305  	// read ahead until a newline, EOF, or non-comment token is found
   306  	for s.ch == '/' || s.ch == '*' {
   307  		if s.ch == '/' {
   308  			//-style comment always contains a newline
   309  			return true
   310  		}
   311  		/*-style comment: look for newline */
   312  		s.next()
   313  		for s.ch >= 0 {
   314  			ch := s.ch
   315  			if ch == '\n' {
   316  				return true
   317  			}
   318  			s.next()
   319  			if ch == '*' && s.ch == '/' {
   320  				s.next()
   321  				break
   322  			}
   323  		}
   324  		s.skipWhitespace() // s.insertSemi is set
   325  		if s.ch < 0 || s.ch == '\n' {
   326  			return true
   327  		}
   328  		if s.ch != '/' {
   329  			// non-comment token
   330  			return false
   331  		}
   332  		s.next() // consume '/'
   333  	}
   334  
   335  	return false
   336  }
   337  
   338  func isLetter(ch rune) bool {
   339  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
   340  }
   341  
   342  func isDigit(ch rune) bool {
   343  	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
   344  }
   345  
   346  func (s *Scanner) scanIdentifier() string {
   347  	offs := s.offset
   348  	for isLetter(s.ch) || isDigit(s.ch) {
   349  		s.next()
   350  	}
   351  	return string(s.src[offs:s.offset])
   352  }
   353  
   354  func digitVal(ch rune) int {
   355  	switch {
   356  	case '0' <= ch && ch <= '9':
   357  		return int(ch - '0')
   358  	case 'a' <= ch && ch <= 'f':
   359  		return int(ch - 'a' + 10)
   360  	case 'A' <= ch && ch <= 'F':
   361  		return int(ch - 'A' + 10)
   362  	}
   363  	return 16 // larger than any legal digit val
   364  }
   365  
   366  func (s *Scanner) scanMantissa(base int) {
   367  	for digitVal(s.ch) < base {
   368  		s.next()
   369  	}
   370  }
   371  
   372  func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
   373  	// digitVal(s.ch) < 10
   374  	offs := s.offset
   375  	tok := token.INT
   376  
   377  	if seenDecimalPoint {
   378  		offs--
   379  		tok = token.FLOAT
   380  		s.scanMantissa(10)
   381  		goto exponent
   382  	}
   383  
   384  	if s.ch == '0' {
   385  		// int or float
   386  		offs := s.offset
   387  		s.next()
   388  		if s.ch == 'x' || s.ch == 'X' {
   389  			// hexadecimal int
   390  			s.next()
   391  			s.scanMantissa(16)
   392  			if s.offset-offs <= 2 {
   393  				// only scanned "0x" or "0X"
   394  				s.error(offs, "illegal hexadecimal number")
   395  			}
   396  		} else {
   397  			// octal int or float
   398  			seenDecimalDigit := false
   399  			s.scanMantissa(8)
   400  			if s.ch == '8' || s.ch == '9' {
   401  				// illegal octal int or float
   402  				seenDecimalDigit = true
   403  				s.scanMantissa(10)
   404  			}
   405  			if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
   406  				goto fraction
   407  			}
   408  			// octal int
   409  			if seenDecimalDigit {
   410  				s.error(offs, "illegal octal number")
   411  			}
   412  		}
   413  		goto exit
   414  	}
   415  
   416  	// decimal int or float
   417  	s.scanMantissa(10)
   418  
   419  fraction:
   420  	if s.ch == '.' {
   421  		tok = token.FLOAT
   422  		s.next()
   423  		s.scanMantissa(10)
   424  	}
   425  
   426  exponent:
   427  	if s.ch == 'e' || s.ch == 'E' {
   428  		tok = token.FLOAT
   429  		s.next()
   430  		if s.ch == '-' || s.ch == '+' {
   431  			s.next()
   432  		}
   433  		if digitVal(s.ch) < 10 {
   434  			s.scanMantissa(10)
   435  		} else {
   436  			s.error(offs, "illegal floating-point exponent")
   437  		}
   438  	}
   439  
   440  	if s.ch == 'i' {
   441  		tok = token.IMAG
   442  		s.next()
   443  	}
   444  
   445  exit:
   446  	return tok, string(s.src[offs:s.offset])
   447  }
   448  
   449  // scanEscape parses an escape sequence where rune is the accepted
   450  // escaped quote. In case of a syntax error, it stops at the offending
   451  // character (without consuming it) and returns false. Otherwise
   452  // it returns true.
   453  func (s *Scanner) scanEscape(quote rune) bool {
   454  	offs := s.offset
   455  
   456  	var n int
   457  	var base, max uint32
   458  	switch s.ch {
   459  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   460  		s.next()
   461  		return true
   462  	case '0', '1', '2', '3', '4', '5', '6', '7':
   463  		n, base, max = 3, 8, 255
   464  	case 'x':
   465  		s.next()
   466  		n, base, max = 2, 16, 255
   467  	case 'u':
   468  		s.next()
   469  		n, base, max = 4, 16, unicode.MaxRune
   470  	case 'U':
   471  		s.next()
   472  		n, base, max = 8, 16, unicode.MaxRune
   473  	default:
   474  		msg := "unknown escape sequence"
   475  		if s.ch < 0 {
   476  			msg = "escape sequence not terminated"
   477  		}
   478  		s.error(offs, msg)
   479  		return false
   480  	}
   481  
   482  	var x uint32
   483  	for n > 0 {
   484  		d := uint32(digitVal(s.ch))
   485  		if d >= base {
   486  			msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
   487  			if s.ch < 0 {
   488  				msg = "escape sequence not terminated"
   489  			}
   490  			s.error(s.offset, msg)
   491  			return false
   492  		}
   493  		x = x*base + d
   494  		s.next()
   495  		n--
   496  	}
   497  
   498  	if x > max || 0xD800 <= x && x < 0xE000 {
   499  		s.error(offs, "escape sequence is invalid Unicode code point")
   500  		return false
   501  	}
   502  
   503  	return true
   504  }
   505  
   506  func (s *Scanner) scanRune() string {
   507  	// '\'' opening already consumed
   508  	offs := s.offset - 1
   509  
   510  	valid := true
   511  	n := 0
   512  	for {
   513  		ch := s.ch
   514  		if ch == '\n' || ch < 0 {
   515  			// only report error if we don't have one already
   516  			if valid {
   517  				s.error(offs, "rune literal not terminated")
   518  				valid = false
   519  			}
   520  			break
   521  		}
   522  		s.next()
   523  		if ch == '\'' {
   524  			break
   525  		}
   526  		n++
   527  		if ch == '\\' {
   528  			if !s.scanEscape('\'') {
   529  				valid = false
   530  			}
   531  			// continue to read to closing quote
   532  		}
   533  	}
   534  
   535  	if valid && n != 1 {
   536  		s.error(offs, "illegal rune literal")
   537  	}
   538  
   539  	return string(s.src[offs:s.offset])
   540  }
   541  
   542  func (s *Scanner) scanString() string {
   543  	// '"' opening already consumed
   544  	offs := s.offset - 1
   545  
   546  	for {
   547  		ch := s.ch
   548  		if ch == '\n' || ch < 0 {
   549  			s.error(offs, "string literal not terminated")
   550  			break
   551  		}
   552  		s.next()
   553  		if ch == '"' {
   554  			break
   555  		}
   556  		if ch == '\\' {
   557  			s.scanEscape('"')
   558  		}
   559  	}
   560  
   561  	return string(s.src[offs:s.offset])
   562  }
   563  
   564  func stripCR(b []byte, comment bool) []byte {
   565  	c := make([]byte, len(b))
   566  	i := 0
   567  	for j, ch := range b {
   568  		// In a /*-style comment, don't strip \r from *\r/ (incl.
   569  		// sequences of \r from *\r\r...\r/) since the resulting
   570  		// */ would terminate the comment too early unless the \r
   571  		// is immediately following the opening /* in which case
   572  		// it's ok because /*/ is not closed yet (issue #11151).
   573  		if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' {
   574  			c[i] = ch
   575  			i++
   576  		}
   577  	}
   578  	return c[:i]
   579  }
   580  
   581  func (s *Scanner) scanRawString() string {
   582  	// '`' opening already consumed
   583  	offs := s.offset - 1
   584  
   585  	hasCR := false
   586  	for {
   587  		ch := s.ch
   588  		if ch < 0 {
   589  			s.error(offs, "raw string literal not terminated")
   590  			break
   591  		}
   592  		s.next()
   593  		if ch == '`' {
   594  			break
   595  		}
   596  		if ch == '\r' {
   597  			hasCR = true
   598  		}
   599  	}
   600  
   601  	lit := s.src[offs:s.offset]
   602  	if hasCR {
   603  		lit = stripCR(lit, false)
   604  	}
   605  
   606  	return string(lit)
   607  }
   608  
   609  func (s *Scanner) skipWhitespace() {
   610  	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
   611  		s.next()
   612  	}
   613  }
   614  
   615  // Helper functions for scanning multi-byte tokens such as >> += >>= .
   616  // Different routines recognize different length tok_i based on matches
   617  // of ch_i. If a token ends in '=', the result is tok1 or tok3
   618  // respectively. Otherwise, the result is tok0 if there was no other
   619  // matching character, or tok2 if the matching character was ch2.
   620  
   621  func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
   622  	if s.ch == '=' {
   623  		s.next()
   624  		return tok1
   625  	}
   626  	return tok0
   627  }
   628  
   629  func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
   630  	if s.ch == '=' {
   631  		s.next()
   632  		return tok1
   633  	}
   634  	if s.ch == ch2 {
   635  		s.next()
   636  		return tok2
   637  	}
   638  	return tok0
   639  }
   640  
   641  func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
   642  	if s.ch == '=' {
   643  		s.next()
   644  		return tok1
   645  	}
   646  	if s.ch == ch2 {
   647  		s.next()
   648  		if s.ch == '=' {
   649  			s.next()
   650  			return tok3
   651  		}
   652  		return tok2
   653  	}
   654  	return tok0
   655  }
   656  
   657  // Scan scans the next token and returns the token position, the token,
   658  // and its literal string if applicable. The source end is indicated by
   659  // token.EOF.
   660  //
   661  // If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
   662  // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
   663  // has the corresponding value.
   664  //
   665  // If the returned token is a keyword, the literal string is the keyword.
   666  //
   667  // If the returned token is token.SEMICOLON, the corresponding
   668  // literal string is ";" if the semicolon was present in the source,
   669  // and "\n" if the semicolon was inserted because of a newline or
   670  // at EOF.
   671  //
   672  // If the returned token is token.ILLEGAL, the literal string is the
   673  // offending character.
   674  //
   675  // In all other cases, Scan returns an empty literal string.
   676  //
   677  // For more tolerant parsing, Scan will return a valid token if
   678  // possible even if a syntax error was encountered. Thus, even
   679  // if the resulting token sequence contains no illegal tokens,
   680  // a client may not assume that no error occurred. Instead it
   681  // must check the scanner's ErrorCount or the number of calls
   682  // of the error handler, if there was one installed.
   683  //
   684  // Scan adds line information to the file added to the file
   685  // set with Init. Token positions are relative to that file
   686  // and thus relative to the file set.
   687  //
   688  func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
   689  scanAgain:
   690  	s.skipWhitespace()
   691  
   692  	// current token start
   693  	pos = s.file.Pos(s.offset)
   694  
   695  	// determine token value
   696  	insertSemi := false
   697  	switch ch := s.ch; {
   698  	case isLetter(ch):
   699  		lit = s.scanIdentifier()
   700  		if len(lit) > 1 {
   701  			// keywords are longer than one letter - avoid lookup otherwise
   702  			tok = token.Lookup(lit)
   703  			switch tok {
   704  			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
   705  				insertSemi = true
   706  			}
   707  		} else {
   708  			insertSemi = true
   709  			tok = token.IDENT
   710  		}
   711  	case '0' <= ch && ch <= '9':
   712  		insertSemi = true
   713  		tok, lit = s.scanNumber(false)
   714  	default:
   715  		s.next() // always make progress
   716  		switch ch {
   717  		case -1:
   718  			if s.insertSemi {
   719  				s.insertSemi = false // EOF consumed
   720  				return pos, token.SEMICOLON, "\n"
   721  			}
   722  			tok = token.EOF
   723  		case '\n':
   724  			// we only reach here if s.insertSemi was
   725  			// set in the first place and exited early
   726  			// from s.skipWhitespace()
   727  			s.insertSemi = false // newline consumed
   728  			return pos, token.SEMICOLON, "\n"
   729  		case '"':
   730  			insertSemi = true
   731  			tok = token.STRING
   732  			lit = s.scanString()
   733  		case '\'':
   734  			insertSemi = true
   735  			tok = token.CHAR
   736  			lit = s.scanRune()
   737  		case '`':
   738  			insertSemi = true
   739  			tok = token.STRING
   740  			lit = s.scanRawString()
   741  		case ':':
   742  			tok = s.switch2(token.COLON, token.DEFINE)
   743  		case '.':
   744  			if '0' <= s.ch && s.ch <= '9' {
   745  				insertSemi = true
   746  				tok, lit = s.scanNumber(true)
   747  			} else {
   748  				tok = token.PERIOD
   749  				if s.ch == '.' && s.peek() == '.' {
   750  					s.next()
   751  					s.next() // consume last '.'
   752  					tok = token.ELLIPSIS
   753  				}
   754  			}
   755  		case ',':
   756  			tok = token.COMMA
   757  		case ';':
   758  			tok = token.SEMICOLON
   759  			lit = ";"
   760  		case '(':
   761  			tok = token.LPAREN
   762  		case ')':
   763  			insertSemi = true
   764  			tok = token.RPAREN
   765  		case '[':
   766  			tok = token.LBRACK
   767  		case ']':
   768  			insertSemi = true
   769  			tok = token.RBRACK
   770  		case '{':
   771  			tok = token.LBRACE
   772  		case '}':
   773  			insertSemi = true
   774  			tok = token.RBRACE
   775  		case '+':
   776  			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
   777  			if tok == token.INC {
   778  				insertSemi = true
   779  			}
   780  		case '-':
   781  			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
   782  			if tok == token.DEC {
   783  				insertSemi = true
   784  			}
   785  		case '*':
   786  			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
   787  		case '/':
   788  			if s.ch == '/' || s.ch == '*' {
   789  				// comment
   790  				if s.insertSemi && s.findLineEnd() {
   791  					// reset position to the beginning of the comment
   792  					s.ch = '/'
   793  					s.offset = s.file.Offset(pos)
   794  					s.rdOffset = s.offset + 1
   795  					s.insertSemi = false // newline consumed
   796  					return pos, token.SEMICOLON, "\n"
   797  				}
   798  				comment := s.scanComment()
   799  				if s.mode&ScanComments == 0 {
   800  					// skip comment
   801  					s.insertSemi = false // newline consumed
   802  					goto scanAgain
   803  				}
   804  				tok = token.COMMENT
   805  				lit = comment
   806  			} else {
   807  				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
   808  			}
   809  		case '%':
   810  			tok = s.switch2(token.REM, token.REM_ASSIGN)
   811  		case '^':
   812  			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
   813  		case '<':
   814  			if s.ch == '-' {
   815  				s.next()
   816  				tok = token.ARROW
   817  			} else {
   818  				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
   819  			}
   820  		case '>':
   821  			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
   822  		case '=':
   823  			tok = s.switch2(token.ASSIGN, token.EQL)
   824  		case '!':
   825  			tok = s.switch2(token.NOT, token.NEQ)
   826  		case '&':
   827  			if s.ch == '^' {
   828  				s.next()
   829  				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
   830  			} else {
   831  				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
   832  			}
   833  		case '|':
   834  			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
   835  		default:
   836  			// next reports unexpected BOMs - don't repeat
   837  			if ch != bom {
   838  				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
   839  			}
   840  			insertSemi = s.insertSemi // preserve insertSemi info
   841  			tok = token.ILLEGAL
   842  			lit = string(ch)
   843  		}
   844  	}
   845  	if s.mode&dontInsertSemis == 0 {
   846  		s.insertSemi = insertSemi
   847  	}
   848  
   849  	return
   850  }
   851  

View as plain text