...
Run Format

Source file src/go/scanner/scanner.go

Documentation: go/scanner

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package scanner implements a scanner for Go source text.
     6  // It takes a []byte as source which can then be tokenized
     7  // through repeated calls to the Scan method.
     8  //
     9  package scanner
    10  
    11  import (
    12  	"bytes"
    13  	"fmt"
    14  	"go/token"
    15  	"path/filepath"
    16  	"strconv"
    17  	"unicode"
    18  	"unicode/utf8"
    19  )
    20  
    21  // An ErrorHandler may be provided to Scanner.Init. If a syntax error is
    22  // encountered and a handler was installed, the handler is called with a
    23  // position and an error message. The position points to the beginning of
    24  // the offending token.
    25  //
    26  type ErrorHandler func(pos token.Position, msg string)
    27  
    28  // A Scanner holds the scanner's internal state while processing
    29  // a given text. It can be allocated as part of another data
    30  // structure but must be initialized via Init before use.
    31  //
    32  type Scanner struct {
    33  	// immutable state
    34  	file *token.File  // source file handle
    35  	dir  string       // directory portion of file.Name()
    36  	src  []byte       // source
    37  	err  ErrorHandler // error reporting; or nil
    38  	mode Mode         // scanning mode
    39  
    40  	// scanning state
    41  	ch         rune // current character
    42  	offset     int  // character offset
    43  	rdOffset   int  // reading offset (position after current character)
    44  	lineOffset int  // current line offset
    45  	insertSemi bool // insert a semicolon before next newline
    46  
    47  	// public state - ok to modify
    48  	ErrorCount int // number of errors encountered
    49  }
    50  
    51  const bom = 0xFEFF // byte order mark, only permitted as very first character
    52  
    53  // Read the next Unicode char into s.ch.
    54  // s.ch < 0 means end-of-file.
    55  //
    56  func (s *Scanner) next() {
    57  	if s.rdOffset < len(s.src) {
    58  		s.offset = s.rdOffset
    59  		if s.ch == '\n' {
    60  			s.lineOffset = s.offset
    61  			s.file.AddLine(s.offset)
    62  		}
    63  		r, w := rune(s.src[s.rdOffset]), 1
    64  		switch {
    65  		case r == 0:
    66  			s.error(s.offset, "illegal character NUL")
    67  		case r >= utf8.RuneSelf:
    68  			// not ASCII
    69  			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
    70  			if r == utf8.RuneError && w == 1 {
    71  				s.error(s.offset, "illegal UTF-8 encoding")
    72  			} else if r == bom && s.offset > 0 {
    73  				s.error(s.offset, "illegal byte order mark")
    74  			}
    75  		}
    76  		s.rdOffset += w
    77  		s.ch = r
    78  	} else {
    79  		s.offset = len(s.src)
    80  		if s.ch == '\n' {
    81  			s.lineOffset = s.offset
    82  			s.file.AddLine(s.offset)
    83  		}
    84  		s.ch = -1 // eof
    85  	}
    86  }
    87  
    88  // A mode value is a set of flags (or 0).
    89  // They control scanner behavior.
    90  //
    91  type Mode uint
    92  
    93  const (
    94  	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
    95  	dontInsertSemis                  // do not automatically insert semicolons - for testing only
    96  )
    97  
    98  // Init prepares the scanner s to tokenize the text src by setting the
    99  // scanner at the beginning of src. The scanner uses the file set file
   100  // for position information and it adds line information for each line.
   101  // It is ok to re-use the same file when re-scanning the same file as
   102  // line information which is already present is ignored. Init causes a
   103  // panic if the file size does not match the src size.
   104  //
   105  // Calls to Scan will invoke the error handler err if they encounter a
   106  // syntax error and err is not nil. Also, for each error encountered,
   107  // the Scanner field ErrorCount is incremented by one. The mode parameter
   108  // determines how comments are handled.
   109  //
   110  // Note that Init may call err if there is an error in the first character
   111  // of the file.
   112  //
   113  func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
   114  	// Explicitly initialize all fields since a scanner may be reused.
   115  	if file.Size() != len(src) {
   116  		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
   117  	}
   118  	s.file = file
   119  	s.dir, _ = filepath.Split(file.Name())
   120  	s.src = src
   121  	s.err = err
   122  	s.mode = mode
   123  
   124  	s.ch = ' '
   125  	s.offset = 0
   126  	s.rdOffset = 0
   127  	s.lineOffset = 0
   128  	s.insertSemi = false
   129  	s.ErrorCount = 0
   130  
   131  	s.next()
   132  	if s.ch == bom {
   133  		s.next() // ignore BOM at file beginning
   134  	}
   135  }
   136  
   137  func (s *Scanner) error(offs int, msg string) {
   138  	if s.err != nil {
   139  		s.err(s.file.Position(s.file.Pos(offs)), msg)
   140  	}
   141  	s.ErrorCount++
   142  }
   143  
   144  func (s *Scanner) scanComment() string {
   145  	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
   146  	offs := s.offset - 1 // position of initial '/'
   147  	next := -1           // position immediately following the comment; < 0 means invalid comment
   148  	numCR := 0
   149  
   150  	if s.ch == '/' {
   151  		//-style comment
   152  		// (the final '\n' is not considered part of the comment)
   153  		s.next()
   154  		for s.ch != '\n' && s.ch >= 0 {
   155  			if s.ch == '\r' {
   156  				numCR++
   157  			}
   158  			s.next()
   159  		}
   160  		// if we are at '\n', the position following the comment is afterwards
   161  		next = s.offset
   162  		if s.ch == '\n' {
   163  			next++
   164  		}
   165  		goto exit
   166  	}
   167  
   168  	/*-style comment */
   169  	s.next()
   170  	for s.ch >= 0 {
   171  		ch := s.ch
   172  		if ch == '\r' {
   173  			numCR++
   174  		}
   175  		s.next()
   176  		if ch == '*' && s.ch == '/' {
   177  			s.next()
   178  			next = s.offset
   179  			goto exit
   180  		}
   181  	}
   182  
   183  	s.error(offs, "comment not terminated")
   184  
   185  exit:
   186  	lit := s.src[offs:s.offset]
   187  
   188  	// On Windows, a (//-comment) line may end in "\r\n".
   189  	// Remove the final '\r' before analyzing the text for
   190  	// line directives (matching the compiler). Remove any
   191  	// other '\r' afterwards (matching the pre-existing be-
   192  	// havior of the scanner).
   193  	if numCR > 0 && len(lit) >= 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' {
   194  		lit = lit[:len(lit)-1]
   195  		numCR--
   196  	}
   197  
   198  	// interpret line directives
   199  	// (//line directives must start at the beginning of the current line)
   200  	if next >= 0 /* implies valid comment */ && (lit[1] == '*' || offs == s.lineOffset) && bytes.HasPrefix(lit[2:], prefix) {
   201  		s.updateLineInfo(next, offs, lit)
   202  	}
   203  
   204  	if numCR > 0 {
   205  		lit = stripCR(lit, lit[1] == '*')
   206  	}
   207  
   208  	return string(lit)
   209  }
   210  
   211  var prefix = []byte("line ")
   212  
   213  // updateLineInfo parses the incoming comment text at offset offs
   214  // as a line directive. If successful, it updates the line info table
   215  // for the position next per the line directive.
   216  func (s *Scanner) updateLineInfo(next, offs int, text []byte) {
   217  	// extract comment text
   218  	if text[1] == '*' {
   219  		text = text[:len(text)-2] // lop off trailing "*/"
   220  	}
   221  	text = text[7:] // lop off leading "//line " or "/*line "
   222  	offs += 7
   223  
   224  	i, n, ok := trailingDigits(text)
   225  	if i == 0 {
   226  		return // ignore (not a line directive)
   227  	}
   228  	// i > 0
   229  
   230  	if !ok {
   231  		// text has a suffix :xxx but xxx is not a number
   232  		s.error(offs+i, "invalid line number: "+string(text[i:]))
   233  		return
   234  	}
   235  
   236  	var line, col int
   237  	i2, n2, ok2 := trailingDigits(text[:i-1])
   238  	if ok2 {
   239  		//line filename:line:col
   240  		i, i2 = i2, i
   241  		line, col = n2, n
   242  		if col == 0 {
   243  			s.error(offs+i2, "invalid column number: "+string(text[i2:]))
   244  			return
   245  		}
   246  		text = text[:i2-1] // lop off ":col"
   247  	} else {
   248  		//line filename:line
   249  		line = n
   250  	}
   251  
   252  	if line == 0 {
   253  		s.error(offs+i, "invalid line number: "+string(text[i:]))
   254  		return
   255  	}
   256  
   257  	// If we have a column (//line filename:line:col form),
   258  	// an empty filename means to use the previous filename.
   259  	filename := string(text[:i-1]) // lop off ":line", and trim white space
   260  	if filename == "" && ok2 {
   261  		filename = s.file.Position(s.file.Pos(offs)).Filename
   262  	} else if filename != "" {
   263  		// Put a relative filename in the current directory.
   264  		// This is for compatibility with earlier releases.
   265  		// See issue 26671.
   266  		filename = filepath.Clean(filename)
   267  		if !filepath.IsAbs(filename) {
   268  			filename = filepath.Join(s.dir, filename)
   269  		}
   270  	}
   271  
   272  	s.file.AddLineColumnInfo(next, filename, line, col)
   273  }
   274  
   275  func trailingDigits(text []byte) (int, int, bool) {
   276  	i := bytes.LastIndexByte(text, ':') // look from right (Windows filenames may contain ':')
   277  	if i < 0 {
   278  		return 0, 0, false // no ":"
   279  	}
   280  	// i >= 0
   281  	n, err := strconv.ParseUint(string(text[i+1:]), 10, 0)
   282  	return i + 1, int(n), err == nil
   283  }
   284  
   285  func (s *Scanner) findLineEnd() bool {
   286  	// initial '/' already consumed
   287  
   288  	defer func(offs int) {
   289  		// reset scanner state to where it was upon calling findLineEnd
   290  		s.ch = '/'
   291  		s.offset = offs
   292  		s.rdOffset = offs + 1
   293  		s.next() // consume initial '/' again
   294  	}(s.offset - 1)
   295  
   296  	// read ahead until a newline, EOF, or non-comment token is found
   297  	for s.ch == '/' || s.ch == '*' {
   298  		if s.ch == '/' {
   299  			//-style comment always contains a newline
   300  			return true
   301  		}
   302  		/*-style comment: look for newline */
   303  		s.next()
   304  		for s.ch >= 0 {
   305  			ch := s.ch
   306  			if ch == '\n' {
   307  				return true
   308  			}
   309  			s.next()
   310  			if ch == '*' && s.ch == '/' {
   311  				s.next()
   312  				break
   313  			}
   314  		}
   315  		s.skipWhitespace() // s.insertSemi is set
   316  		if s.ch < 0 || s.ch == '\n' {
   317  			return true
   318  		}
   319  		if s.ch != '/' {
   320  			// non-comment token
   321  			return false
   322  		}
   323  		s.next() // consume '/'
   324  	}
   325  
   326  	return false
   327  }
   328  
   329  func isLetter(ch rune) bool {
   330  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
   331  }
   332  
   333  func isDigit(ch rune) bool {
   334  	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
   335  }
   336  
   337  func (s *Scanner) scanIdentifier() string {
   338  	offs := s.offset
   339  	for isLetter(s.ch) || isDigit(s.ch) {
   340  		s.next()
   341  	}
   342  	return string(s.src[offs:s.offset])
   343  }
   344  
   345  func digitVal(ch rune) int {
   346  	switch {
   347  	case '0' <= ch && ch <= '9':
   348  		return int(ch - '0')
   349  	case 'a' <= ch && ch <= 'f':
   350  		return int(ch - 'a' + 10)
   351  	case 'A' <= ch && ch <= 'F':
   352  		return int(ch - 'A' + 10)
   353  	}
   354  	return 16 // larger than any legal digit val
   355  }
   356  
   357  func (s *Scanner) scanMantissa(base int) {
   358  	for digitVal(s.ch) < base {
   359  		s.next()
   360  	}
   361  }
   362  
   363  func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
   364  	// digitVal(s.ch) < 10
   365  	offs := s.offset
   366  	tok := token.INT
   367  
   368  	if seenDecimalPoint {
   369  		offs--
   370  		tok = token.FLOAT
   371  		s.scanMantissa(10)
   372  		goto exponent
   373  	}
   374  
   375  	if s.ch == '0' {
   376  		// int or float
   377  		offs := s.offset
   378  		s.next()
   379  		if s.ch == 'x' || s.ch == 'X' {
   380  			// hexadecimal int
   381  			s.next()
   382  			s.scanMantissa(16)
   383  			if s.offset-offs <= 2 {
   384  				// only scanned "0x" or "0X"
   385  				s.error(offs, "illegal hexadecimal number")
   386  			}
   387  		} else {
   388  			// octal int or float
   389  			seenDecimalDigit := false
   390  			s.scanMantissa(8)
   391  			if s.ch == '8' || s.ch == '9' {
   392  				// illegal octal int or float
   393  				seenDecimalDigit = true
   394  				s.scanMantissa(10)
   395  			}
   396  			if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
   397  				goto fraction
   398  			}
   399  			// octal int
   400  			if seenDecimalDigit {
   401  				s.error(offs, "illegal octal number")
   402  			}
   403  		}
   404  		goto exit
   405  	}
   406  
   407  	// decimal int or float
   408  	s.scanMantissa(10)
   409  
   410  fraction:
   411  	if s.ch == '.' {
   412  		tok = token.FLOAT
   413  		s.next()
   414  		s.scanMantissa(10)
   415  	}
   416  
   417  exponent:
   418  	if s.ch == 'e' || s.ch == 'E' {
   419  		tok = token.FLOAT
   420  		s.next()
   421  		if s.ch == '-' || s.ch == '+' {
   422  			s.next()
   423  		}
   424  		if digitVal(s.ch) < 10 {
   425  			s.scanMantissa(10)
   426  		} else {
   427  			s.error(offs, "illegal floating-point exponent")
   428  		}
   429  	}
   430  
   431  	if s.ch == 'i' {
   432  		tok = token.IMAG
   433  		s.next()
   434  	}
   435  
   436  exit:
   437  	return tok, string(s.src[offs:s.offset])
   438  }
   439  
   440  // scanEscape parses an escape sequence where rune is the accepted
   441  // escaped quote. In case of a syntax error, it stops at the offending
   442  // character (without consuming it) and returns false. Otherwise
   443  // it returns true.
   444  func (s *Scanner) scanEscape(quote rune) bool {
   445  	offs := s.offset
   446  
   447  	var n int
   448  	var base, max uint32
   449  	switch s.ch {
   450  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   451  		s.next()
   452  		return true
   453  	case '0', '1', '2', '3', '4', '5', '6', '7':
   454  		n, base, max = 3, 8, 255
   455  	case 'x':
   456  		s.next()
   457  		n, base, max = 2, 16, 255
   458  	case 'u':
   459  		s.next()
   460  		n, base, max = 4, 16, unicode.MaxRune
   461  	case 'U':
   462  		s.next()
   463  		n, base, max = 8, 16, unicode.MaxRune
   464  	default:
   465  		msg := "unknown escape sequence"
   466  		if s.ch < 0 {
   467  			msg = "escape sequence not terminated"
   468  		}
   469  		s.error(offs, msg)
   470  		return false
   471  	}
   472  
   473  	var x uint32
   474  	for n > 0 {
   475  		d := uint32(digitVal(s.ch))
   476  		if d >= base {
   477  			msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
   478  			if s.ch < 0 {
   479  				msg = "escape sequence not terminated"
   480  			}
   481  			s.error(s.offset, msg)
   482  			return false
   483  		}
   484  		x = x*base + d
   485  		s.next()
   486  		n--
   487  	}
   488  
   489  	if x > max || 0xD800 <= x && x < 0xE000 {
   490  		s.error(offs, "escape sequence is invalid Unicode code point")
   491  		return false
   492  	}
   493  
   494  	return true
   495  }
   496  
   497  func (s *Scanner) scanRune() string {
   498  	// '\'' opening already consumed
   499  	offs := s.offset - 1
   500  
   501  	valid := true
   502  	n := 0
   503  	for {
   504  		ch := s.ch
   505  		if ch == '\n' || ch < 0 {
   506  			// only report error if we don't have one already
   507  			if valid {
   508  				s.error(offs, "rune literal not terminated")
   509  				valid = false
   510  			}
   511  			break
   512  		}
   513  		s.next()
   514  		if ch == '\'' {
   515  			break
   516  		}
   517  		n++
   518  		if ch == '\\' {
   519  			if !s.scanEscape('\'') {
   520  				valid = false
   521  			}
   522  			// continue to read to closing quote
   523  		}
   524  	}
   525  
   526  	if valid && n != 1 {
   527  		s.error(offs, "illegal rune literal")
   528  	}
   529  
   530  	return string(s.src[offs:s.offset])
   531  }
   532  
   533  func (s *Scanner) scanString() string {
   534  	// '"' opening already consumed
   535  	offs := s.offset - 1
   536  
   537  	for {
   538  		ch := s.ch
   539  		if ch == '\n' || ch < 0 {
   540  			s.error(offs, "string literal not terminated")
   541  			break
   542  		}
   543  		s.next()
   544  		if ch == '"' {
   545  			break
   546  		}
   547  		if ch == '\\' {
   548  			s.scanEscape('"')
   549  		}
   550  	}
   551  
   552  	return string(s.src[offs:s.offset])
   553  }
   554  
   555  func stripCR(b []byte, comment bool) []byte {
   556  	c := make([]byte, len(b))
   557  	i := 0
   558  	for j, ch := range b {
   559  		// In a /*-style comment, don't strip \r from *\r/ (incl.
   560  		// sequences of \r from *\r\r...\r/) since the resulting
   561  		// */ would terminate the comment too early unless the \r
   562  		// is immediately following the opening /* in which case
   563  		// it's ok because /*/ is not closed yet (issue #11151).
   564  		if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' {
   565  			c[i] = ch
   566  			i++
   567  		}
   568  	}
   569  	return c[:i]
   570  }
   571  
   572  func (s *Scanner) scanRawString() string {
   573  	// '`' opening already consumed
   574  	offs := s.offset - 1
   575  
   576  	hasCR := false
   577  	for {
   578  		ch := s.ch
   579  		if ch < 0 {
   580  			s.error(offs, "raw string literal not terminated")
   581  			break
   582  		}
   583  		s.next()
   584  		if ch == '`' {
   585  			break
   586  		}
   587  		if ch == '\r' {
   588  			hasCR = true
   589  		}
   590  	}
   591  
   592  	lit := s.src[offs:s.offset]
   593  	if hasCR {
   594  		lit = stripCR(lit, false)
   595  	}
   596  
   597  	return string(lit)
   598  }
   599  
   600  func (s *Scanner) skipWhitespace() {
   601  	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
   602  		s.next()
   603  	}
   604  }
   605  
   606  // Helper functions for scanning multi-byte tokens such as >> += >>= .
   607  // Different routines recognize different length tok_i based on matches
   608  // of ch_i. If a token ends in '=', the result is tok1 or tok3
   609  // respectively. Otherwise, the result is tok0 if there was no other
   610  // matching character, or tok2 if the matching character was ch2.
   611  
   612  func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
   613  	if s.ch == '=' {
   614  		s.next()
   615  		return tok1
   616  	}
   617  	return tok0
   618  }
   619  
   620  func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
   621  	if s.ch == '=' {
   622  		s.next()
   623  		return tok1
   624  	}
   625  	if s.ch == ch2 {
   626  		s.next()
   627  		return tok2
   628  	}
   629  	return tok0
   630  }
   631  
   632  func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
   633  	if s.ch == '=' {
   634  		s.next()
   635  		return tok1
   636  	}
   637  	if s.ch == ch2 {
   638  		s.next()
   639  		if s.ch == '=' {
   640  			s.next()
   641  			return tok3
   642  		}
   643  		return tok2
   644  	}
   645  	return tok0
   646  }
   647  
   648  // Scan scans the next token and returns the token position, the token,
   649  // and its literal string if applicable. The source end is indicated by
   650  // token.EOF.
   651  //
   652  // If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
   653  // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
   654  // has the corresponding value.
   655  //
   656  // If the returned token is a keyword, the literal string is the keyword.
   657  //
   658  // If the returned token is token.SEMICOLON, the corresponding
   659  // literal string is ";" if the semicolon was present in the source,
   660  // and "\n" if the semicolon was inserted because of a newline or
   661  // at EOF.
   662  //
   663  // If the returned token is token.ILLEGAL, the literal string is the
   664  // offending character.
   665  //
   666  // In all other cases, Scan returns an empty literal string.
   667  //
   668  // For more tolerant parsing, Scan will return a valid token if
   669  // possible even if a syntax error was encountered. Thus, even
   670  // if the resulting token sequence contains no illegal tokens,
   671  // a client may not assume that no error occurred. Instead it
   672  // must check the scanner's ErrorCount or the number of calls
   673  // of the error handler, if there was one installed.
   674  //
   675  // Scan adds line information to the file added to the file
   676  // set with Init. Token positions are relative to that file
   677  // and thus relative to the file set.
   678  //
   679  func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
   680  scanAgain:
   681  	s.skipWhitespace()
   682  
   683  	// current token start
   684  	pos = s.file.Pos(s.offset)
   685  
   686  	// determine token value
   687  	insertSemi := false
   688  	switch ch := s.ch; {
   689  	case isLetter(ch):
   690  		lit = s.scanIdentifier()
   691  		if len(lit) > 1 {
   692  			// keywords are longer than one letter - avoid lookup otherwise
   693  			tok = token.Lookup(lit)
   694  			switch tok {
   695  			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
   696  				insertSemi = true
   697  			}
   698  		} else {
   699  			insertSemi = true
   700  			tok = token.IDENT
   701  		}
   702  	case '0' <= ch && ch <= '9':
   703  		insertSemi = true
   704  		tok, lit = s.scanNumber(false)
   705  	default:
   706  		s.next() // always make progress
   707  		switch ch {
   708  		case -1:
   709  			if s.insertSemi {
   710  				s.insertSemi = false // EOF consumed
   711  				return pos, token.SEMICOLON, "\n"
   712  			}
   713  			tok = token.EOF
   714  		case '\n':
   715  			// we only reach here if s.insertSemi was
   716  			// set in the first place and exited early
   717  			// from s.skipWhitespace()
   718  			s.insertSemi = false // newline consumed
   719  			return pos, token.SEMICOLON, "\n"
   720  		case '"':
   721  			insertSemi = true
   722  			tok = token.STRING
   723  			lit = s.scanString()
   724  		case '\'':
   725  			insertSemi = true
   726  			tok = token.CHAR
   727  			lit = s.scanRune()
   728  		case '`':
   729  			insertSemi = true
   730  			tok = token.STRING
   731  			lit = s.scanRawString()
   732  		case ':':
   733  			tok = s.switch2(token.COLON, token.DEFINE)
   734  		case '.':
   735  			if '0' <= s.ch && s.ch <= '9' {
   736  				insertSemi = true
   737  				tok, lit = s.scanNumber(true)
   738  			} else if s.ch == '.' {
   739  				s.next()
   740  				if s.ch == '.' {
   741  					s.next()
   742  					tok = token.ELLIPSIS
   743  				}
   744  			} else {
   745  				tok = token.PERIOD
   746  			}
   747  		case ',':
   748  			tok = token.COMMA
   749  		case ';':
   750  			tok = token.SEMICOLON
   751  			lit = ";"
   752  		case '(':
   753  			tok = token.LPAREN
   754  		case ')':
   755  			insertSemi = true
   756  			tok = token.RPAREN
   757  		case '[':
   758  			tok = token.LBRACK
   759  		case ']':
   760  			insertSemi = true
   761  			tok = token.RBRACK
   762  		case '{':
   763  			tok = token.LBRACE
   764  		case '}':
   765  			insertSemi = true
   766  			tok = token.RBRACE
   767  		case '+':
   768  			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
   769  			if tok == token.INC {
   770  				insertSemi = true
   771  			}
   772  		case '-':
   773  			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
   774  			if tok == token.DEC {
   775  				insertSemi = true
   776  			}
   777  		case '*':
   778  			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
   779  		case '/':
   780  			if s.ch == '/' || s.ch == '*' {
   781  				// comment
   782  				if s.insertSemi && s.findLineEnd() {
   783  					// reset position to the beginning of the comment
   784  					s.ch = '/'
   785  					s.offset = s.file.Offset(pos)
   786  					s.rdOffset = s.offset + 1
   787  					s.insertSemi = false // newline consumed
   788  					return pos, token.SEMICOLON, "\n"
   789  				}
   790  				comment := s.scanComment()
   791  				if s.mode&ScanComments == 0 {
   792  					// skip comment
   793  					s.insertSemi = false // newline consumed
   794  					goto scanAgain
   795  				}
   796  				tok = token.COMMENT
   797  				lit = comment
   798  			} else {
   799  				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
   800  			}
   801  		case '%':
   802  			tok = s.switch2(token.REM, token.REM_ASSIGN)
   803  		case '^':
   804  			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
   805  		case '<':
   806  			if s.ch == '-' {
   807  				s.next()
   808  				tok = token.ARROW
   809  			} else {
   810  				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
   811  			}
   812  		case '>':
   813  			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
   814  		case '=':
   815  			tok = s.switch2(token.ASSIGN, token.EQL)
   816  		case '!':
   817  			tok = s.switch2(token.NOT, token.NEQ)
   818  		case '&':
   819  			if s.ch == '^' {
   820  				s.next()
   821  				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
   822  			} else {
   823  				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
   824  			}
   825  		case '|':
   826  			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
   827  		default:
   828  			// next reports unexpected BOMs - don't repeat
   829  			if ch != bom {
   830  				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
   831  			}
   832  			insertSemi = s.insertSemi // preserve insertSemi info
   833  			tok = token.ILLEGAL
   834  			lit = string(ch)
   835  		}
   836  	}
   837  	if s.mode&dontInsertSemis == 0 {
   838  		s.insertSemi = insertSemi
   839  	}
   840  
   841  	return
   842  }
   843  

View as plain text