scanner_test.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"os"
    11  	"strings"
    12  	"testing"
    13  )
    14  
    15  // errh is a default error handler for basic tests.
    16  func errh(line, col uint, msg string) {
    17  	panic(fmt.Sprintf("%d:%d: %s", line, col, msg))
    18  }
    19  
    20  // Don't bother with other tests if TestSmoke doesn't pass.
    21  func TestSmoke(t *testing.T) {
    22  	const src = "if (+foo\t+=..123/***/0.9_0e-0i'a'`raw`\"string\"..f;//$"
    23  	tokens := []token{_If, _Lparen, _Operator, _Name, _AssignOp, _Dot, _Literal, _Literal, _Literal, _Literal, _Literal, _Dot, _Dot, _Name, _Semi, _EOF}
    24  
    25  	var got scanner
    26  	got.init(strings.NewReader(src), errh, 0)
    27  	for _, want := range tokens {
    28  		got.next()
    29  		if got.tok != want {
    30  			t.Errorf("%d:%d: got %s; want %s", got.line, got.col, got.tok, want)
    31  			continue
    32  		}
    33  	}
    34  }
    35  
    36  // Once TestSmoke passes, run TestTokens next.
    37  func TestTokens(t *testing.T) {
    38  	var got scanner
    39  	for _, want := range sampleTokens {
    40  		got.init(strings.NewReader(want.src), func(line, col uint, msg string) {
    41  			t.Errorf("%s:%d:%d: %s", want.src, line, col, msg)
    42  		}, 0)
    43  		got.next()
    44  		if got.tok != want.tok {
    45  			t.Errorf("%s: got %s; want %s", want.src, got.tok, want.tok)
    46  			continue
    47  		}
    48  		if (got.tok == _Name || got.tok == _Literal) && got.lit != want.src {
    49  			t.Errorf("%s: got %q; want %q", want.src, got.lit, want.src)
    50  		}
    51  	}
    52  }
    53  
    54  func TestScanner(t *testing.T) {
    55  	if testing.Short() {
    56  		t.Skip("skipping test in short mode")
    57  	}
    58  
    59  	filename := *src_ // can be changed via -src flag
    60  	src, err := os.Open(filename)
    61  	if err != nil {
    62  		t.Fatal(err)
    63  	}
    64  	defer src.Close()
    65  
    66  	var s scanner
    67  	s.init(src, errh, 0)
    68  	for {
    69  		s.next()
    70  		if s.tok == _EOF {
    71  			break
    72  		}
    73  		if !testing.Verbose() {
    74  			continue
    75  		}
    76  		switch s.tok {
    77  		case _Name, _Literal:
    78  			fmt.Printf("%s:%d:%d: %s => %s\n", filename, s.line, s.col, s.tok, s.lit)
    79  		case _Operator:
    80  			fmt.Printf("%s:%d:%d: %s => %s (prec = %d)\n", filename, s.line, s.col, s.tok, s.op, s.prec)
    81  		default:
    82  			fmt.Printf("%s:%d:%d: %s\n", filename, s.line, s.col, s.tok)
    83  		}
    84  	}
    85  }
    86  
    87  func TestEmbeddedTokens(t *testing.T) {
    88  	// make source
    89  	var buf bytes.Buffer
    90  	for i, s := range sampleTokens {
    91  		buf.WriteString("\t\t\t\t"[:i&3])                 // leading indentation
    92  		buf.WriteString(s.src)                            // token
    93  		buf.WriteString("        "[:i&7])                 // trailing spaces
    94  		fmt.Fprintf(&buf, "/*line foo:%d */ // bar\n", i) // comments + newline (don't crash w/o directive handler)
    95  	}
    96  
    97  	// scan source
    98  	var got scanner
    99  	var src string
   100  	got.init(&buf, func(line, col uint, msg string) {
   101  		t.Fatalf("%s:%d:%d: %s", src, line, col, msg)
   102  	}, 0)
   103  	got.next()
   104  	for i, want := range sampleTokens {
   105  		src = want.src
   106  		nlsemi := false
   107  
   108  		if got.line-linebase != uint(i) {
   109  			t.Errorf("%s: got line %d; want %d", src, got.line-linebase, i)
   110  		}
   111  
   112  		if got.tok != want.tok {
   113  			t.Errorf("%s: got tok %s; want %s", src, got.tok, want.tok)
   114  			continue
   115  		}
   116  
   117  		switch want.tok {
   118  		case _Semi:
   119  			if got.lit != "semicolon" {
   120  				t.Errorf("%s: got %s; want semicolon", src, got.lit)
   121  			}
   122  
   123  		case _Name, _Literal:
   124  			if got.lit != want.src {
   125  				t.Errorf("%s: got lit %q; want %q", src, got.lit, want.src)
   126  				continue
   127  			}
   128  			nlsemi = true
   129  
   130  		case _Operator, _AssignOp, _IncOp:
   131  			if got.op != want.op {
   132  				t.Errorf("%s: got op %s; want %s", src, got.op, want.op)
   133  				continue
   134  			}
   135  			if got.prec != want.prec {
   136  				t.Errorf("%s: got prec %d; want %d", src, got.prec, want.prec)
   137  				continue
   138  			}
   139  			nlsemi = want.tok == _IncOp
   140  
   141  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
   142  			nlsemi = true
   143  		}
   144  
   145  		if nlsemi {
   146  			got.next()
   147  			if got.tok != _Semi {
   148  				t.Errorf("%s: got tok %s; want ;", src, got.tok)
   149  				continue
   150  			}
   151  			if got.lit != "newline" {
   152  				t.Errorf("%s: got %s; want newline", src, got.lit)
   153  			}
   154  		}
   155  
   156  		got.next()
   157  	}
   158  
   159  	if got.tok != _EOF {
   160  		t.Errorf("got %q; want _EOF", got.tok)
   161  	}
   162  }
   163  
   164  var sampleTokens = [...]struct {
   165  	tok  token
   166  	src  string
   167  	op   Operator
   168  	prec int
   169  }{
   170  	// name samples
   171  	{_Name, "x", 0, 0},
   172  	{_Name, "X123", 0, 0},
   173  	{_Name, "foo", 0, 0},
   174  	{_Name, "Foo123", 0, 0},
   175  	{_Name, "foo_bar", 0, 0},
   176  	{_Name, "_", 0, 0},
   177  	{_Name, "_foobar", 0, 0},
   178  	{_Name, "a۰۱۸", 0, 0},
   179  	{_Name, "foo६४", 0, 0},
   180  	{_Name, "bar９８７６", 0, 0},
   181  	{_Name, "ŝ", 0, 0},
   182  	{_Name, "ŝfoo", 0, 0},
   183  
   184  	// literal samples
   185  	{_Literal, "0", 0, 0},
   186  	{_Literal, "1", 0, 0},
   187  	{_Literal, "12345", 0, 0},
   188  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   189  	{_Literal, "01234567", 0, 0},
   190  	{_Literal, "0_1_234_567", 0, 0},
   191  	{_Literal, "0X0", 0, 0},
   192  	{_Literal, "0xcafebabe", 0, 0},
   193  	{_Literal, "0x_cafe_babe", 0, 0},
   194  	{_Literal, "0O0", 0, 0},
   195  	{_Literal, "0o000", 0, 0},
   196  	{_Literal, "0o_000", 0, 0},
   197  	{_Literal, "0B1", 0, 0},
   198  	{_Literal, "0b01100110", 0, 0},
   199  	{_Literal, "0b_0110_0110", 0, 0},
   200  	{_Literal, "0.", 0, 0},
   201  	{_Literal, "0.e0", 0, 0},
   202  	{_Literal, "0.e-1", 0, 0},
   203  	{_Literal, "0.e+123", 0, 0},
   204  	{_Literal, ".0", 0, 0},
   205  	{_Literal, ".0E00", 0, 0},
   206  	{_Literal, ".0E-0123", 0, 0},
   207  	{_Literal, ".0E+12345678901234567890", 0, 0},
   208  	{_Literal, ".45e1", 0, 0},
   209  	{_Literal, "3.14159265", 0, 0},
   210  	{_Literal, "1e0", 0, 0},
   211  	{_Literal, "1e+100", 0, 0},
   212  	{_Literal, "1e-100", 0, 0},
   213  	{_Literal, "2.71828e-1000", 0, 0},
   214  	{_Literal, "0i", 0, 0},
   215  	{_Literal, "1i", 0, 0},
   216  	{_Literal, "012345678901234567889i", 0, 0},
   217  	{_Literal, "123456789012345678890i", 0, 0},
   218  	{_Literal, "0.i", 0, 0},
   219  	{_Literal, ".0i", 0, 0},
   220  	{_Literal, "3.14159265i", 0, 0},
   221  	{_Literal, "1e0i", 0, 0},
   222  	{_Literal, "1e+100i", 0, 0},
   223  	{_Literal, "1e-100i", 0, 0},
   224  	{_Literal, "2.71828e-1000i", 0, 0},
   225  	{_Literal, "'a'", 0, 0},
   226  	{_Literal, "'\\000'", 0, 0},
   227  	{_Literal, "'\\xFF'", 0, 0},
   228  	{_Literal, "'\\uff16'", 0, 0},
   229  	{_Literal, "'\\U0000ff16'", 0, 0},
   230  	{_Literal, "`foobar`", 0, 0},
   231  	{_Literal, "`foo\tbar`", 0, 0},
   232  	{_Literal, "`\r`", 0, 0},
   233  
   234  	// operators
   235  	{_Operator, "!", Not, 0},
   236  	{_Operator, "~", Tilde, 0},
   237  
   238  	{_Operator, "||", OrOr, precOrOr},
   239  
   240  	{_Operator, "&&", AndAnd, precAndAnd},
   241  
   242  	{_Operator, "==", Eql, precCmp},
   243  	{_Operator, "!=", Neq, precCmp},
   244  	{_Operator, "<", Lss, precCmp},
   245  	{_Operator, "<=", Leq, precCmp},
   246  	{_Operator, ">", Gtr, precCmp},
   247  	{_Operator, ">=", Geq, precCmp},
   248  
   249  	{_Operator, "+", Add, precAdd},
   250  	{_Operator, "-", Sub, precAdd},
   251  	{_Operator, "|", Or, precAdd},
   252  	{_Operator, "^", Xor, precAdd},
   253  
   254  	{_Star, "*", Mul, precMul},
   255  	{_Operator, "/", Div, precMul},
   256  	{_Operator, "%", Rem, precMul},
   257  	{_Operator, "&", And, precMul},
   258  	{_Operator, "&^", AndNot, precMul},
   259  	{_Operator, "<<", Shl, precMul},
   260  	{_Operator, ">>", Shr, precMul},
   261  
   262  	// assignment operations
   263  	{_AssignOp, "+=", Add, precAdd},
   264  	{_AssignOp, "-=", Sub, precAdd},
   265  	{_AssignOp, "|=", Or, precAdd},
   266  	{_AssignOp, "^=", Xor, precAdd},
   267  
   268  	{_AssignOp, "*=", Mul, precMul},
   269  	{_AssignOp, "/=", Div, precMul},
   270  	{_AssignOp, "%=", Rem, precMul},
   271  	{_AssignOp, "&=", And, precMul},
   272  	{_AssignOp, "&^=", AndNot, precMul},
   273  	{_AssignOp, "<<=", Shl, precMul},
   274  	{_AssignOp, ">>=", Shr, precMul},
   275  
   276  	// other operations
   277  	{_IncOp, "++", Add, precAdd},
   278  	{_IncOp, "--", Sub, precAdd},
   279  	{_Assign, "=", 0, 0},
   280  	{_Define, ":=", 0, 0},
   281  	{_Arrow, "<-", 0, 0},
   282  
   283  	// delimiters
   284  	{_Lparen, "(", 0, 0},
   285  	{_Lbrack, "[", 0, 0},
   286  	{_Lbrace, "{", 0, 0},
   287  	{_Rparen, ")", 0, 0},
   288  	{_Rbrack, "]", 0, 0},
   289  	{_Rbrace, "}", 0, 0},
   290  	{_Comma, ",", 0, 0},
   291  	{_Semi, ";", 0, 0},
   292  	{_Colon, ":", 0, 0},
   293  	{_Dot, ".", 0, 0},
   294  	{_DotDotDot, "...", 0, 0},
   295  
   296  	// keywords
   297  	{_Break, "break", 0, 0},
   298  	{_Case, "case", 0, 0},
   299  	{_Chan, "chan", 0, 0},
   300  	{_Const, "const", 0, 0},
   301  	{_Continue, "continue", 0, 0},
   302  	{_Default, "default", 0, 0},
   303  	{_Defer, "defer", 0, 0},
   304  	{_Else, "else", 0, 0},
   305  	{_Fallthrough, "fallthrough", 0, 0},
   306  	{_For, "for", 0, 0},
   307  	{_Func, "func", 0, 0},
   308  	{_Go, "go", 0, 0},
   309  	{_Goto, "goto", 0, 0},
   310  	{_If, "if", 0, 0},
   311  	{_Import, "import", 0, 0},
   312  	{_Interface, "interface", 0, 0},
   313  	{_Map, "map", 0, 0},
   314  	{_Package, "package", 0, 0},
   315  	{_Range, "range", 0, 0},
   316  	{_Return, "return", 0, 0},
   317  	{_Select, "select", 0, 0},
   318  	{_Struct, "struct", 0, 0},
   319  	{_Switch, "switch", 0, 0},
   320  	{_Type, "type", 0, 0},
   321  	{_Var, "var", 0, 0},
   322  }
   323  
   324  func TestComments(t *testing.T) {
   325  	type comment struct {
   326  		line, col uint // 0-based
   327  		text      string
   328  	}
   329  
   330  	for _, test := range []struct {
   331  		src  string
   332  		want comment
   333  	}{
   334  		// no comments
   335  		{"no comment here", comment{0, 0, ""}},
   336  		{" /", comment{0, 0, ""}},
   337  		{"\n /*/", comment{0, 0, ""}},
   338  
   339  		//-style comments
   340  		{"// line comment\n", comment{0, 0, "// line comment"}},
   341  		{"package p // line comment\n", comment{0, 10, "// line comment"}},
   342  		{"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}},
   343  		{"\n\n//\n", comment{2, 0, "//"}},
   344  		{"//", comment{0, 0, "//"}},
   345  
   346  		/*-style comments */
   347  		{"123/* regular comment */", comment{0, 3, "/* regular comment */"}},
   348  		{"package p /* regular comment", comment{0, 0, ""}},
   349  		{"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
   350  		{"\n\n/**/", comment{2, 0, "/**/"}},
   351  		{"/*", comment{0, 0, ""}},
   352  	} {
   353  		var s scanner
   354  		var got comment
   355  		s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
   356  			if msg[0] != '/' {
   357  				// error
   358  				if msg != "comment not terminated" {
   359  					t.Errorf("%q: %s", test.src, msg)
   360  				}
   361  				return
   362  			}
   363  			got = comment{line - linebase, col - colbase, msg} // keep last one
   364  		}, comments)
   365  
   366  		for {
   367  			s.next()
   368  			if s.tok == _EOF {
   369  				break
   370  			}
   371  		}
   372  
   373  		want := test.want
   374  		if got.line != want.line || got.col != want.col {
   375  			t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col)
   376  		}
   377  		if got.text != want.text {
   378  			t.Errorf("%q: got %q; want %q", test.src, got.text, want.text)
   379  		}
   380  	}
   381  }
   382  
   383  func TestNumbers(t *testing.T) {
   384  	for _, test := range []struct {
   385  		kind             LitKind
   386  		src, tokens, err string
   387  	}{
   388  		// binaries
   389  		{IntLit, "0b0", "0b0", ""},
   390  		{IntLit, "0b1010", "0b1010", ""},
   391  		{IntLit, "0B1110", "0B1110", ""},
   392  
   393  		{IntLit, "0b", "0b", "binary literal has no digits"},
   394  		{IntLit, "0b0190", "0b0190", "invalid digit '9' in binary literal"},
   395  		{IntLit, "0b01a0", "0b01 a0", ""}, // only accept 0-9
   396  
   397  		{FloatLit, "0b.", "0b.", "invalid radix point in binary literal"},
   398  		{FloatLit, "0b.1", "0b.1", "invalid radix point in binary literal"},
   399  		{FloatLit, "0b1.0", "0b1.0", "invalid radix point in binary literal"},
   400  		{FloatLit, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"},
   401  		{FloatLit, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"},
   402  
   403  		{ImagLit, "0b10i", "0b10i", ""},
   404  		{ImagLit, "0b10.0i", "0b10.0i", "invalid radix point in binary literal"},
   405  
   406  		// octals
   407  		{IntLit, "0o0", "0o0", ""},
   408  		{IntLit, "0o1234", "0o1234", ""},
   409  		{IntLit, "0O1234", "0O1234", ""},
   410  
   411  		{IntLit, "0o", "0o", "octal literal has no digits"},
   412  		{IntLit, "0o8123", "0o8123", "invalid digit '8' in octal literal"},
   413  		{IntLit, "0o1293", "0o1293", "invalid digit '9' in octal literal"},
   414  		{IntLit, "0o12a3", "0o12 a3", ""}, // only accept 0-9
   415  
   416  		{FloatLit, "0o.", "0o.", "invalid radix point in octal literal"},
   417  		{FloatLit, "0o.2", "0o.2", "invalid radix point in octal literal"},
   418  		{FloatLit, "0o1.2", "0o1.2", "invalid radix point in octal literal"},
   419  		{FloatLit, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"},
   420  		{FloatLit, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"},
   421  
   422  		{ImagLit, "0o10i", "0o10i", ""},
   423  		{ImagLit, "0o10e0i", "0o10e0i", "'e' exponent requires decimal mantissa"},
   424  
   425  		// 0-octals
   426  		{IntLit, "0", "0", ""},
   427  		{IntLit, "0123", "0123", ""},
   428  
   429  		{IntLit, "08123", "08123", "invalid digit '8' in octal literal"},
   430  		{IntLit, "01293", "01293", "invalid digit '9' in octal literal"},
   431  		{IntLit, "0F.", "0 F .", ""}, // only accept 0-9
   432  		{IntLit, "0123F.", "0123 F .", ""},
   433  		{IntLit, "0123456x", "0123456 x", ""},
   434  
   435  		// decimals
   436  		{IntLit, "1", "1", ""},
   437  		{IntLit, "1234", "1234", ""},
   438  
   439  		{IntLit, "1f", "1 f", ""}, // only accept 0-9
   440  
   441  		{ImagLit, "0i", "0i", ""},
   442  		{ImagLit, "0678i", "0678i", ""},
   443  
   444  		// decimal floats
   445  		{FloatLit, "0.", "0.", ""},
   446  		{FloatLit, "123.", "123.", ""},
   447  		{FloatLit, "0123.", "0123.", ""},
   448  
   449  		{FloatLit, ".0", ".0", ""},
   450  		{FloatLit, ".123", ".123", ""},
   451  		{FloatLit, ".0123", ".0123", ""},
   452  
   453  		{FloatLit, "0.0", "0.0", ""},
   454  		{FloatLit, "123.123", "123.123", ""},
   455  		{FloatLit, "0123.0123", "0123.0123", ""},
   456  
   457  		{FloatLit, "0e0", "0e0", ""},
   458  		{FloatLit, "123e+0", "123e+0", ""},
   459  		{FloatLit, "0123E-1", "0123E-1", ""},
   460  
   461  		{FloatLit, "0.e+1", "0.e+1", ""},
   462  		{FloatLit, "123.E-10", "123.E-10", ""},
   463  		{FloatLit, "0123.e123", "0123.e123", ""},
   464  
   465  		{FloatLit, ".0e-1", ".0e-1", ""},
   466  		{FloatLit, ".123E+10", ".123E+10", ""},
   467  		{FloatLit, ".0123E123", ".0123E123", ""},
   468  
   469  		{FloatLit, "0.0e1", "0.0e1", ""},
   470  		{FloatLit, "123.123E-10", "123.123E-10", ""},
   471  		{FloatLit, "0123.0123e+456", "0123.0123e+456", ""},
   472  
   473  		{FloatLit, "0e", "0e", "exponent has no digits"},
   474  		{FloatLit, "0E+", "0E+", "exponent has no digits"},
   475  		{FloatLit, "1e+f", "1e+ f", "exponent has no digits"},
   476  		{FloatLit, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"},
   477  		{FloatLit, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"},
   478  
   479  		{ImagLit, "0.i", "0.i", ""},
   480  		{ImagLit, ".123i", ".123i", ""},
   481  		{ImagLit, "123.123i", "123.123i", ""},
   482  		{ImagLit, "123e+0i", "123e+0i", ""},
   483  		{ImagLit, "123.E-10i", "123.E-10i", ""},
   484  		{ImagLit, ".123E+10i", ".123E+10i", ""},
   485  
   486  		// hexadecimals
   487  		{IntLit, "0x0", "0x0", ""},
   488  		{IntLit, "0x1234", "0x1234", ""},
   489  		{IntLit, "0xcafef00d", "0xcafef00d", ""},
   490  		{IntLit, "0XCAFEF00D", "0XCAFEF00D", ""},
   491  
   492  		{IntLit, "0x", "0x", "hexadecimal literal has no digits"},
   493  		{IntLit, "0x1g", "0x1 g", ""},
   494  
   495  		{ImagLit, "0xf00i", "0xf00i", ""},
   496  
   497  		// hexadecimal floats
   498  		{FloatLit, "0x0p0", "0x0p0", ""},
   499  		{FloatLit, "0x12efp-123", "0x12efp-123", ""},
   500  		{FloatLit, "0xABCD.p+0", "0xABCD.p+0", ""},
   501  		{FloatLit, "0x.0189P-0", "0x.0189P-0", ""},
   502  		{FloatLit, "0x1.ffffp+1023", "0x1.ffffp+1023", ""},
   503  
   504  		{FloatLit, "0x.", "0x.", "hexadecimal literal has no digits"},
   505  		{FloatLit, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"},
   506  		{FloatLit, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"},
   507  		{FloatLit, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"},
   508  		{FloatLit, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"},
   509  		{FloatLit, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"},
   510  		{FloatLit, "0x0p", "0x0p", "exponent has no digits"},
   511  		{FloatLit, "0xeP-", "0xeP-", "exponent has no digits"},
   512  		{FloatLit, "0x1234PAB", "0x1234P AB", "exponent has no digits"},
   513  		{FloatLit, "0x1.2p1a", "0x1.2p1 a", ""},
   514  
   515  		{ImagLit, "0xf00.bap+12i", "0xf00.bap+12i", ""},
   516  
   517  		// separators
   518  		{IntLit, "0b_1000_0001", "0b_1000_0001", ""},
   519  		{IntLit, "0o_600", "0o_600", ""},
   520  		{IntLit, "0_466", "0_466", ""},
   521  		{IntLit, "1_000", "1_000", ""},
   522  		{FloatLit, "1_000.000_1", "1_000.000_1", ""},
   523  		{ImagLit, "10e+1_2_3i", "10e+1_2_3i", ""},
   524  		{IntLit, "0x_f00d", "0x_f00d", ""},
   525  		{FloatLit, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""},
   526  
   527  		{IntLit, "0b__1000", "0b__1000", "'_' must separate successive digits"},
   528  		{IntLit, "0o60___0", "0o60___0", "'_' must separate successive digits"},
   529  		{IntLit, "0466_", "0466_", "'_' must separate successive digits"},
   530  		{FloatLit, "1_.", "1_.", "'_' must separate successive digits"},
   531  		{FloatLit, "0._1", "0._1", "'_' must separate successive digits"},
   532  		{FloatLit, "2.7_e0", "2.7_e0", "'_' must separate successive digits"},
   533  		{ImagLit, "10e+12_i", "10e+12_i", "'_' must separate successive digits"},
   534  		{IntLit, "0x___0", "0x___0", "'_' must separate successive digits"},
   535  		{FloatLit, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"},
   536  	} {
   537  		var s scanner
   538  		var err string
   539  		s.init(strings.NewReader(test.src), func(_, _ uint, msg string) {
   540  			if err == "" {
   541  				err = msg
   542  			}
   543  		}, 0)
   544  
   545  		for i, want := range strings.Split(test.tokens, " ") {
   546  			err = ""
   547  			s.next()
   548  
   549  			if err != "" && !s.bad {
   550  				t.Errorf("%q: got error but bad not set", test.src)
   551  			}
   552  
   553  			// compute lit where s.lit is not defined
   554  			var lit string
   555  			switch s.tok {
   556  			case _Name, _Literal:
   557  				lit = s.lit
   558  			case _Dot:
   559  				lit = "."
   560  			}
   561  
   562  			if i == 0 {
   563  				if s.tok != _Literal || s.kind != test.kind {
   564  					t.Errorf("%q: got token %s (kind = %d); want literal (kind = %d)", test.src, s.tok, s.kind, test.kind)
   565  				}
   566  				if err != test.err {
   567  					t.Errorf("%q: got error %q; want %q", test.src, err, test.err)
   568  				}
   569  			}
   570  
   571  			if lit != want {
   572  				t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, s.tok, want)
   573  			}
   574  		}
   575  
   576  		// make sure we read all
   577  		s.next()
   578  		if s.tok == _Semi {
   579  			s.next()
   580  		}
   581  		if s.tok != _EOF {
   582  			t.Errorf("%q: got %s; want EOF", test.src, s.tok)
   583  		}
   584  	}
   585  }
   586  
   587  func TestScanErrors(t *testing.T) {
   588  	for _, test := range []struct {
   589  		src, err  string
   590  		line, col uint // 0-based
   591  	}{
   592  		// Note: Positions for lexical errors are the earliest position
   593  		// where the error is apparent, not the beginning of the respective
   594  		// token.
   595  
   596  		// rune-level errors
   597  		{"fo\x00o", "invalid NUL character", 0, 2},
   598  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
   599  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},
   600  
   601  		// token-level errors
   602  		{"\u00BD" /* ½ */, "invalid character U+00BD '½' in identifier", 0, 0},
   603  		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid character U+00BD '½' in identifier", 0, 13 /* byte offset */},
   604  		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
   605  		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid character U+00BD '½' in identifier", 0, 8 /* byte offset */},
   606  
   607  		{"x + #y", "invalid character U+0023 '#'", 0, 4},
   608  		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
   609  		{"0123456789", "invalid digit '8' in octal literal", 0, 8},
   610  		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
   611  		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
   612  		{"var a, b = 09, 07\n", "invalid digit '9' in octal literal", 0, 12},
   613  
   614  		{`''`, "empty rune literal or unescaped '", 0, 1},
   615  		{"'\n", "newline in rune literal", 0, 1},
   616  		{`'\`, "rune literal not terminated", 0, 0},
   617  		{`'\'`, "rune literal not terminated", 0, 0},
   618  		{`'\x`, "rune literal not terminated", 0, 0},
   619  		{`'\x'`, "invalid character '\\'' in hexadecimal escape", 0, 3},
   620  		{`'\y'`, "unknown escape", 0, 2},
   621  		{`'\x0'`, "invalid character '\\'' in hexadecimal escape", 0, 4},
   622  		{`'\00'`, "invalid character '\\'' in octal escape", 0, 4},
   623  		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
   624  		{`'\378`, "invalid character '8' in octal escape", 0, 4},
   625  		{`'\400'`, "octal escape value 256 > 255", 0, 5},
   626  		{`'xx`, "rune literal not terminated", 0, 0},
   627  		{`'xx'`, "more than one character in rune literal", 0, 0},
   628  
   629  		{"\n   \"foo\n", "newline in string", 1, 7},
   630  		{`"`, "string not terminated", 0, 0},
   631  		{`"foo`, "string not terminated", 0, 0},
   632  		{"`", "string not terminated", 0, 0},
   633  		{"`foo", "string not terminated", 0, 0},
   634  		{"/*/", "comment not terminated", 0, 0},
   635  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   636  		{`"\`, "string not terminated", 0, 0},
   637  		{`"\"`, "string not terminated", 0, 0},
   638  		{`"\x`, "string not terminated", 0, 0},
   639  		{`"\x"`, "invalid character '\"' in hexadecimal escape", 0, 3},
   640  		{`"\y"`, "unknown escape", 0, 2},
   641  		{`"\x0"`, "invalid character '\"' in hexadecimal escape", 0, 4},
   642  		{`"\00"`, "invalid character '\"' in octal escape", 0, 4},
   643  		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
   644  		{`"\378"`, "invalid character '8' in octal escape", 0, 4},
   645  		{`"\400"`, "octal escape value 256 > 255", 0, 5},
   646  
   647  		{`s := "foo\z"`, "unknown escape", 0, 10},
   648  		{`s := "foo\z00\nbar"`, "unknown escape", 0, 10},
   649  		{`"\x`, "string not terminated", 0, 0},
   650  		{`"\x"`, "invalid character '\"' in hexadecimal escape", 0, 3},
   651  		{`var s string = "\x"`, "invalid character '\"' in hexadecimal escape", 0, 18},
   652  		{`return "\Uffffffff"`, "escape is invalid Unicode code point U+FFFFFFFF", 0, 18},
   653  
   654  		{"0b.0", "invalid radix point in binary literal", 0, 2},
   655  		{"0x.p0\n", "hexadecimal literal has no digits", 0, 3},
   656  
   657  		// former problem cases
   658  		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
   659  	} {
   660  		var s scanner
   661  		var line, col uint
   662  		var err string
   663  		s.init(strings.NewReader(test.src), func(l, c uint, msg string) {
   664  			if err == "" {
   665  				line, col = l-linebase, c-colbase
   666  				err = msg
   667  			}
   668  		}, 0)
   669  
   670  		for {
   671  			s.next()
   672  			if s.tok == _EOF {
   673  				break
   674  			}
   675  		}
   676  
   677  		if err != "" {
   678  			if err != test.err {
   679  				t.Errorf("%q: got err = %q; want %q", test.src, err, test.err)
   680  			}
   681  			if line != test.line {
   682  				t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
   683  			}
   684  			if col != test.col {
   685  				t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
   686  			}
   687  		} else {
   688  			t.Errorf("%q: got no error; want %q", test.src, test.err)
   689  		}
   690  	}
   691  }
   692  
   693  func TestDirectives(t *testing.T) {
   694  	for _, src := range []string{
   695  		"line",
   696  		"// line",
   697  		"//line",
   698  		"//line foo",
   699  		"//line foo%bar",
   700  
   701  		"go",
   702  		"// go:",
   703  		"//go:",
   704  		"//go :foo",
   705  		"//go:foo",
   706  		"//go:foo%bar",
   707  	} {
   708  		got := ""
   709  		var s scanner
   710  		s.init(strings.NewReader(src), func(_, col uint, msg string) {
   711  			if col != colbase {
   712  				t.Errorf("%s: got col = %d; want %d", src, col, colbase)
   713  			}
   714  			if msg == "" {
   715  				t.Errorf("%s: handler called with empty msg", src)
   716  			}
   717  			got = msg
   718  		}, directives)
   719  
   720  		s.next()
   721  		if strings.HasPrefix(src, "//line ") || strings.HasPrefix(src, "//go:") {
   722  			// handler should have been called
   723  			if got != src {
   724  				t.Errorf("got %s; want %s", got, src)
   725  			}
   726  		} else {
   727  			// handler should not have been called
   728  			if got != "" {
   729  				t.Errorf("got %s for %s", got, src)
   730  			}
   731  		}
   732  	}
   733  }
   734  
   735  func TestIssue21938(t *testing.T) {
   736  	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
   737  
   738  	var got scanner
   739  	got.init(strings.NewReader(s), errh, 0)
   740  	got.next()
   741  
   742  	if got.tok != _Literal || got.lit != ".5" {
   743  		t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
   744  	}
   745  }
   746  
   747  func TestIssue33961(t *testing.T) {
   748  	literals := `08__ 0b.p 0b_._p 0x.e 0x.p`
   749  	for _, lit := range strings.Split(literals, " ") {
   750  		n := 0
   751  		var got scanner
   752  		got.init(strings.NewReader(lit), func(_, _ uint, msg string) {
   753  			// fmt.Printf("%s: %s\n", lit, msg) // uncomment for debugging
   754  			n++
   755  		}, 0)
   756  		got.next()
   757  
   758  		if n != 1 {
   759  			t.Errorf("%q: got %d errors; want 1", lit, n)
   760  			continue
   761  		}
   762  
   763  		if !got.bad {
   764  			t.Errorf("%q: got error but bad not set", lit)
   765  		}
   766  	}
   767  }
   768
View as plain text