...
Run Format

Source file src/text/scanner/scanner_test.go

Documentation: text/scanner

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package scanner
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  	"testing"
    13  	"unicode/utf8"
    14  )
    15  
    16  // A StringReader delivers its data one string segment at a time via Read.
    17  type StringReader struct {
    18  	data []string
    19  	step int
    20  }
    21  
    22  func (r *StringReader) Read(p []byte) (n int, err error) {
    23  	if r.step < len(r.data) {
    24  		s := r.data[r.step]
    25  		n = copy(p, s)
    26  		r.step++
    27  	} else {
    28  		err = io.EOF
    29  	}
    30  	return
    31  }
    32  
    33  func readRuneSegments(t *testing.T, segments []string) {
    34  	got := ""
    35  	want := strings.Join(segments, "")
    36  	s := new(Scanner).Init(&StringReader{data: segments})
    37  	for {
    38  		ch := s.Next()
    39  		if ch == EOF {
    40  			break
    41  		}
    42  		got += string(ch)
    43  	}
    44  	if got != want {
    45  		t.Errorf("segments=%v got=%s want=%s", segments, got, want)
    46  	}
    47  }
    48  
    49  var segmentList = [][]string{
    50  	{},
    51  	{""},
    52  	{"日", "本語"},
    53  	{"\u65e5", "\u672c", "\u8a9e"},
    54  	{"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
    55  	{"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
    56  	{"Hello", ", ", "World", "!"},
    57  	{"Hello", ", ", "", "World", "!"},
    58  }
    59  
    60  func TestNext(t *testing.T) {
    61  	for _, s := range segmentList {
    62  		readRuneSegments(t, s)
    63  	}
    64  }
    65  
    66  type token struct {
    67  	tok  rune
    68  	text string
    69  }
    70  
    71  var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
    72  
    73  var tokenList = []token{
    74  	{Comment, "// line comments"},
    75  	{Comment, "//"},
    76  	{Comment, "////"},
    77  	{Comment, "// comment"},
    78  	{Comment, "// /* comment */"},
    79  	{Comment, "// // comment //"},
    80  	{Comment, "//" + f100},
    81  
    82  	{Comment, "// general comments"},
    83  	{Comment, "/**/"},
    84  	{Comment, "/***/"},
    85  	{Comment, "/* comment */"},
    86  	{Comment, "/* // comment */"},
    87  	{Comment, "/* /* comment */"},
    88  	{Comment, "/*\n comment\n*/"},
    89  	{Comment, "/*" + f100 + "*/"},
    90  
    91  	{Comment, "// identifiers"},
    92  	{Ident, "a"},
    93  	{Ident, "a0"},
    94  	{Ident, "foobar"},
    95  	{Ident, "abc123"},
    96  	{Ident, "LGTM"},
    97  	{Ident, "_"},
    98  	{Ident, "_abc123"},
    99  	{Ident, "abc123_"},
   100  	{Ident, "_abc_123_"},
   101  	{Ident, "_äöü"},
   102  	{Ident, "_本"},
   103  	{Ident, "äöü"},
   104  	{Ident, "本"},
   105  	{Ident, "a۰۱۸"},
   106  	{Ident, "foo६४"},
   107  	{Ident, "bar9876"},
   108  	{Ident, f100},
   109  
   110  	{Comment, "// decimal ints"},
   111  	{Int, "0"},
   112  	{Int, "1"},
   113  	{Int, "9"},
   114  	{Int, "42"},
   115  	{Int, "1234567890"},
   116  
   117  	{Comment, "// octal ints"},
   118  	{Int, "00"},
   119  	{Int, "01"},
   120  	{Int, "07"},
   121  	{Int, "042"},
   122  	{Int, "01234567"},
   123  
   124  	{Comment, "// hexadecimal ints"},
   125  	{Int, "0x0"},
   126  	{Int, "0x1"},
   127  	{Int, "0xf"},
   128  	{Int, "0x42"},
   129  	{Int, "0x123456789abcDEF"},
   130  	{Int, "0x" + f100},
   131  	{Int, "0X0"},
   132  	{Int, "0X1"},
   133  	{Int, "0XF"},
   134  	{Int, "0X42"},
   135  	{Int, "0X123456789abcDEF"},
   136  	{Int, "0X" + f100},
   137  
   138  	{Comment, "// floats"},
   139  	{Float, "0."},
   140  	{Float, "1."},
   141  	{Float, "42."},
   142  	{Float, "01234567890."},
   143  	{Float, ".0"},
   144  	{Float, ".1"},
   145  	{Float, ".42"},
   146  	{Float, ".0123456789"},
   147  	{Float, "0.0"},
   148  	{Float, "1.0"},
   149  	{Float, "42.0"},
   150  	{Float, "01234567890.0"},
   151  	{Float, "0e0"},
   152  	{Float, "1e0"},
   153  	{Float, "42e0"},
   154  	{Float, "01234567890e0"},
   155  	{Float, "0E0"},
   156  	{Float, "1E0"},
   157  	{Float, "42E0"},
   158  	{Float, "01234567890E0"},
   159  	{Float, "0e+10"},
   160  	{Float, "1e-10"},
   161  	{Float, "42e+10"},
   162  	{Float, "01234567890e-10"},
   163  	{Float, "0E+10"},
   164  	{Float, "1E-10"},
   165  	{Float, "42E+10"},
   166  	{Float, "01234567890E-10"},
   167  
   168  	{Comment, "// chars"},
   169  	{Char, `' '`},
   170  	{Char, `'a'`},
   171  	{Char, `'本'`},
   172  	{Char, `'\a'`},
   173  	{Char, `'\b'`},
   174  	{Char, `'\f'`},
   175  	{Char, `'\n'`},
   176  	{Char, `'\r'`},
   177  	{Char, `'\t'`},
   178  	{Char, `'\v'`},
   179  	{Char, `'\''`},
   180  	{Char, `'\000'`},
   181  	{Char, `'\777'`},
   182  	{Char, `'\x00'`},
   183  	{Char, `'\xff'`},
   184  	{Char, `'\u0000'`},
   185  	{Char, `'\ufA16'`},
   186  	{Char, `'\U00000000'`},
   187  	{Char, `'\U0000ffAB'`},
   188  
   189  	{Comment, "// strings"},
   190  	{String, `" "`},
   191  	{String, `"a"`},
   192  	{String, `"本"`},
   193  	{String, `"\a"`},
   194  	{String, `"\b"`},
   195  	{String, `"\f"`},
   196  	{String, `"\n"`},
   197  	{String, `"\r"`},
   198  	{String, `"\t"`},
   199  	{String, `"\v"`},
   200  	{String, `"\""`},
   201  	{String, `"\000"`},
   202  	{String, `"\777"`},
   203  	{String, `"\x00"`},
   204  	{String, `"\xff"`},
   205  	{String, `"\u0000"`},
   206  	{String, `"\ufA16"`},
   207  	{String, `"\U00000000"`},
   208  	{String, `"\U0000ffAB"`},
   209  	{String, `"` + f100 + `"`},
   210  
   211  	{Comment, "// raw strings"},
   212  	{String, "``"},
   213  	{String, "`\\`"},
   214  	{String, "`" + "\n\n/* foobar */\n\n" + "`"},
   215  	{String, "`" + f100 + "`"},
   216  
   217  	{Comment, "// individual characters"},
   218  	// NUL character is not allowed
   219  	{'\x01', "\x01"},
   220  	{' ' - 1, string(' ' - 1)},
   221  	{'+', "+"},
   222  	{'/', "/"},
   223  	{'.', "."},
   224  	{'~', "~"},
   225  	{'(', "("},
   226  }
   227  
   228  func makeSource(pattern string) *bytes.Buffer {
   229  	var buf bytes.Buffer
   230  	for _, k := range tokenList {
   231  		fmt.Fprintf(&buf, pattern, k.text)
   232  	}
   233  	return &buf
   234  }
   235  
   236  func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
   237  	if got != want {
   238  		t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
   239  	}
   240  	if s.Line != line {
   241  		t.Errorf("line = %d, want %d for %q", s.Line, line, text)
   242  	}
   243  	stext := s.TokenText()
   244  	if stext != text {
   245  		t.Errorf("text = %q, want %q", stext, text)
   246  	} else {
   247  		// check idempotency of TokenText() call
   248  		stext = s.TokenText()
   249  		if stext != text {
   250  			t.Errorf("text = %q, want %q (idempotency check)", stext, text)
   251  		}
   252  	}
   253  }
   254  
   255  func countNewlines(s string) int {
   256  	n := 0
   257  	for _, ch := range s {
   258  		if ch == '\n' {
   259  			n++
   260  		}
   261  	}
   262  	return n
   263  }
   264  
   265  func testScan(t *testing.T, mode uint) {
   266  	s := new(Scanner).Init(makeSource(" \t%s\n"))
   267  	s.Mode = mode
   268  	tok := s.Scan()
   269  	line := 1
   270  	for _, k := range tokenList {
   271  		if mode&SkipComments == 0 || k.tok != Comment {
   272  			checkTok(t, s, line, tok, k.tok, k.text)
   273  			tok = s.Scan()
   274  		}
   275  		line += countNewlines(k.text) + 1 // each token is on a new line
   276  	}
   277  	checkTok(t, s, line, tok, EOF, "")
   278  }
   279  
   280  func TestScan(t *testing.T) {
   281  	testScan(t, GoTokens)
   282  	testScan(t, GoTokens&^SkipComments)
   283  }
   284  
   285  func TestPosition(t *testing.T) {
   286  	src := makeSource("\t\t\t\t%s\n")
   287  	s := new(Scanner).Init(src)
   288  	s.Mode = GoTokens &^ SkipComments
   289  	s.Scan()
   290  	pos := Position{"", 4, 1, 5}
   291  	for _, k := range tokenList {
   292  		if s.Offset != pos.Offset {
   293  			t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
   294  		}
   295  		if s.Line != pos.Line {
   296  			t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
   297  		}
   298  		if s.Column != pos.Column {
   299  			t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
   300  		}
   301  		pos.Offset += 4 + len(k.text) + 1     // 4 tabs + token bytes + newline
   302  		pos.Line += countNewlines(k.text) + 1 // each token is on a new line
   303  		s.Scan()
   304  	}
   305  	// make sure there were no token-internal errors reported by scanner
   306  	if s.ErrorCount != 0 {
   307  		t.Errorf("%d errors", s.ErrorCount)
   308  	}
   309  }
   310  
   311  func TestScanZeroMode(t *testing.T) {
   312  	src := makeSource("%s\n")
   313  	str := src.String()
   314  	s := new(Scanner).Init(src)
   315  	s.Mode = 0       // don't recognize any token classes
   316  	s.Whitespace = 0 // don't skip any whitespace
   317  	tok := s.Scan()
   318  	for i, ch := range str {
   319  		if tok != ch {
   320  			t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
   321  		}
   322  		tok = s.Scan()
   323  	}
   324  	if tok != EOF {
   325  		t.Fatalf("tok = %s, want EOF", TokenString(tok))
   326  	}
   327  	if s.ErrorCount != 0 {
   328  		t.Errorf("%d errors", s.ErrorCount)
   329  	}
   330  }
   331  
   332  func testScanSelectedMode(t *testing.T, mode uint, class rune) {
   333  	src := makeSource("%s\n")
   334  	s := new(Scanner).Init(src)
   335  	s.Mode = mode
   336  	tok := s.Scan()
   337  	for tok != EOF {
   338  		if tok < 0 && tok != class {
   339  			t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
   340  		}
   341  		tok = s.Scan()
   342  	}
   343  	if s.ErrorCount != 0 {
   344  		t.Errorf("%d errors", s.ErrorCount)
   345  	}
   346  }
   347  
   348  func TestScanSelectedMask(t *testing.T) {
   349  	testScanSelectedMode(t, 0, 0)
   350  	testScanSelectedMode(t, ScanIdents, Ident)
   351  	// Don't test ScanInts and ScanNumbers since some parts of
   352  	// the floats in the source look like (illegal) octal ints
   353  	// and ScanNumbers may return either Int or Float.
   354  	testScanSelectedMode(t, ScanChars, Char)
   355  	testScanSelectedMode(t, ScanStrings, String)
   356  	testScanSelectedMode(t, SkipComments, 0)
   357  	testScanSelectedMode(t, ScanComments, Comment)
   358  }
   359  
   360  func TestScanCustomIdent(t *testing.T) {
   361  	const src = "faab12345 a12b123 a12 3b"
   362  	s := new(Scanner).Init(strings.NewReader(src))
   363  	// ident = ( 'a' | 'b' ) { digit } .
   364  	// digit = '0' .. '3' .
   365  	// with a maximum length of 4
   366  	s.IsIdentRune = func(ch rune, i int) bool {
   367  		return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
   368  	}
   369  	checkTok(t, s, 1, s.Scan(), 'f', "f")
   370  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   371  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   372  	checkTok(t, s, 1, s.Scan(), Ident, "b123")
   373  	checkTok(t, s, 1, s.Scan(), Int, "45")
   374  	checkTok(t, s, 1, s.Scan(), Ident, "a12")
   375  	checkTok(t, s, 1, s.Scan(), Ident, "b123")
   376  	checkTok(t, s, 1, s.Scan(), Ident, "a12")
   377  	checkTok(t, s, 1, s.Scan(), Int, "3")
   378  	checkTok(t, s, 1, s.Scan(), Ident, "b")
   379  	checkTok(t, s, 1, s.Scan(), EOF, "")
   380  }
   381  
   382  func TestScanNext(t *testing.T) {
   383  	const BOM = '\uFEFF'
   384  	BOMs := string(BOM)
   385  	s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
   386  	checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored
   387  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   388  	checkTok(t, s, 1, s.Scan(), '=', "=")
   389  	checkTok(t, s, 0, s.Next(), '=', "")
   390  	checkTok(t, s, 0, s.Next(), ' ', "")
   391  	checkTok(t, s, 0, s.Next(), 'b', "")
   392  	checkTok(t, s, 1, s.Scan(), Ident, "cd")
   393  	checkTok(t, s, 1, s.Scan(), '{', "{")
   394  	checkTok(t, s, 2, s.Scan(), Ident, "a")
   395  	checkTok(t, s, 2, s.Scan(), '+', "+")
   396  	checkTok(t, s, 0, s.Next(), '=', "")
   397  	checkTok(t, s, 2, s.Scan(), Ident, "c")
   398  	checkTok(t, s, 3, s.Scan(), '}', "}")
   399  	checkTok(t, s, 3, s.Scan(), BOM, BOMs)
   400  	checkTok(t, s, 3, s.Scan(), -1, "")
   401  	if s.ErrorCount != 0 {
   402  		t.Errorf("%d errors", s.ErrorCount)
   403  	}
   404  }
   405  
   406  func TestScanWhitespace(t *testing.T) {
   407  	var buf bytes.Buffer
   408  	var ws uint64
   409  	// start at 1, NUL character is not allowed
   410  	for ch := byte(1); ch < ' '; ch++ {
   411  		buf.WriteByte(ch)
   412  		ws |= 1 << ch
   413  	}
   414  	const orig = 'x'
   415  	buf.WriteByte(orig)
   416  
   417  	s := new(Scanner).Init(&buf)
   418  	s.Mode = 0
   419  	s.Whitespace = ws
   420  	tok := s.Scan()
   421  	if tok != orig {
   422  		t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
   423  	}
   424  }
   425  
   426  func testError(t *testing.T, src, pos, msg string, tok rune) {
   427  	s := new(Scanner).Init(strings.NewReader(src))
   428  	errorCalled := false
   429  	s.Error = func(s *Scanner, m string) {
   430  		if !errorCalled {
   431  			// only look at first error
   432  			if p := s.Pos().String(); p != pos {
   433  				t.Errorf("pos = %q, want %q for %q", p, pos, src)
   434  			}
   435  			if m != msg {
   436  				t.Errorf("msg = %q, want %q for %q", m, msg, src)
   437  			}
   438  			errorCalled = true
   439  		}
   440  	}
   441  	tk := s.Scan()
   442  	if tk != tok {
   443  		t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
   444  	}
   445  	if !errorCalled {
   446  		t.Errorf("error handler not called for %q", src)
   447  	}
   448  	if s.ErrorCount == 0 {
   449  		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
   450  	}
   451  }
   452  
   453  func TestError(t *testing.T) {
   454  	testError(t, "\x00", "<input>:1:1", "illegal character NUL", 0)
   455  	testError(t, "\x80", "<input>:1:1", "illegal UTF-8 encoding", utf8.RuneError)
   456  	testError(t, "\xff", "<input>:1:1", "illegal UTF-8 encoding", utf8.RuneError)
   457  
   458  	testError(t, "a\x00", "<input>:1:2", "illegal character NUL", Ident)
   459  	testError(t, "ab\x80", "<input>:1:3", "illegal UTF-8 encoding", Ident)
   460  	testError(t, "abc\xff", "<input>:1:4", "illegal UTF-8 encoding", Ident)
   461  
   462  	testError(t, `"a`+"\x00", "<input>:1:3", "illegal character NUL", String)
   463  	testError(t, `"ab`+"\x80", "<input>:1:4", "illegal UTF-8 encoding", String)
   464  	testError(t, `"abc`+"\xff", "<input>:1:5", "illegal UTF-8 encoding", String)
   465  
   466  	testError(t, "`a"+"\x00", "<input>:1:3", "illegal character NUL", String)
   467  	testError(t, "`ab"+"\x80", "<input>:1:4", "illegal UTF-8 encoding", String)
   468  	testError(t, "`abc"+"\xff", "<input>:1:5", "illegal UTF-8 encoding", String)
   469  
   470  	testError(t, `'\"'`, "<input>:1:3", "illegal char escape", Char)
   471  	testError(t, `"\'"`, "<input>:1:3", "illegal char escape", String)
   472  
   473  	testError(t, `01238`, "<input>:1:6", "illegal octal number", Int)
   474  	testError(t, `01238123`, "<input>:1:9", "illegal octal number", Int)
   475  	testError(t, `0x`, "<input>:1:3", "illegal hexadecimal number", Int)
   476  	testError(t, `0xg`, "<input>:1:3", "illegal hexadecimal number", Int)
   477  	testError(t, `'aa'`, "<input>:1:4", "illegal char literal", Char)
   478  
   479  	testError(t, `'`, "<input>:1:2", "literal not terminated", Char)
   480  	testError(t, `'`+"\n", "<input>:1:2", "literal not terminated", Char)
   481  	testError(t, `"abc`, "<input>:1:5", "literal not terminated", String)
   482  	testError(t, `"abc`+"\n", "<input>:1:5", "literal not terminated", String)
   483  	testError(t, "`abc\n", "<input>:2:1", "literal not terminated", String)
   484  	testError(t, `/*/`, "<input>:1:4", "comment not terminated", EOF)
   485  }
   486  
   487  // An errReader returns (0, err) where err is not io.EOF.
   488  type errReader struct{}
   489  
   490  func (errReader) Read(b []byte) (int, error) {
   491  	return 0, io.ErrNoProgress // some error that is not io.EOF
   492  }
   493  
   494  func TestIOError(t *testing.T) {
   495  	s := new(Scanner).Init(errReader{})
   496  	errorCalled := false
   497  	s.Error = func(s *Scanner, msg string) {
   498  		if !errorCalled {
   499  			if want := io.ErrNoProgress.Error(); msg != want {
   500  				t.Errorf("msg = %q, want %q", msg, want)
   501  			}
   502  			errorCalled = true
   503  		}
   504  	}
   505  	tok := s.Scan()
   506  	if tok != EOF {
   507  		t.Errorf("tok = %s, want EOF", TokenString(tok))
   508  	}
   509  	if !errorCalled {
   510  		t.Errorf("error handler not called")
   511  	}
   512  }
   513  
   514  func checkPos(t *testing.T, got, want Position) {
   515  	if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
   516  		t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
   517  			got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
   518  	}
   519  }
   520  
   521  func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
   522  	if ch := s.Next(); ch != char {
   523  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
   524  	}
   525  	want := Position{Offset: offset, Line: line, Column: column}
   526  	checkPos(t, s.Pos(), want)
   527  }
   528  
   529  func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
   530  	want := Position{Offset: offset, Line: line, Column: column}
   531  	checkPos(t, s.Pos(), want)
   532  	if ch := s.Scan(); ch != char {
   533  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
   534  		if string(ch) != s.TokenText() {
   535  			t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
   536  		}
   537  	}
   538  	checkPos(t, s.Position, want)
   539  }
   540  
   541  func TestPos(t *testing.T) {
   542  	// corner case: empty source
   543  	s := new(Scanner).Init(strings.NewReader(""))
   544  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   545  	s.Peek() // peek doesn't affect the position
   546  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   547  
   548  	// corner case: source with only a newline
   549  	s = new(Scanner).Init(strings.NewReader("\n"))
   550  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   551  	checkNextPos(t, s, 1, 2, 1, '\n')
   552  	// after EOF position doesn't change
   553  	for i := 10; i > 0; i-- {
   554  		checkScanPos(t, s, 1, 2, 1, EOF)
   555  	}
   556  	if s.ErrorCount != 0 {
   557  		t.Errorf("%d errors", s.ErrorCount)
   558  	}
   559  
   560  	// corner case: source with only a single character
   561  	s = new(Scanner).Init(strings.NewReader("本"))
   562  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   563  	checkNextPos(t, s, 3, 1, 2, '本')
   564  	// after EOF position doesn't change
   565  	for i := 10; i > 0; i-- {
   566  		checkScanPos(t, s, 3, 1, 2, EOF)
   567  	}
   568  	if s.ErrorCount != 0 {
   569  		t.Errorf("%d errors", s.ErrorCount)
   570  	}
   571  
   572  	// positions after calling Next
   573  	s = new(Scanner).Init(strings.NewReader("  foo६४  \n\n本語\n"))
   574  	checkNextPos(t, s, 1, 1, 2, ' ')
   575  	s.Peek() // peek doesn't affect the position
   576  	checkNextPos(t, s, 2, 1, 3, ' ')
   577  	checkNextPos(t, s, 3, 1, 4, 'f')
   578  	checkNextPos(t, s, 4, 1, 5, 'o')
   579  	checkNextPos(t, s, 5, 1, 6, 'o')
   580  	checkNextPos(t, s, 8, 1, 7, '६')
   581  	checkNextPos(t, s, 11, 1, 8, '४')
   582  	checkNextPos(t, s, 12, 1, 9, ' ')
   583  	checkNextPos(t, s, 13, 1, 10, ' ')
   584  	checkNextPos(t, s, 14, 2, 1, '\n')
   585  	checkNextPos(t, s, 15, 3, 1, '\n')
   586  	checkNextPos(t, s, 18, 3, 2, '本')
   587  	checkNextPos(t, s, 21, 3, 3, '語')
   588  	checkNextPos(t, s, 22, 4, 1, '\n')
   589  	// after EOF position doesn't change
   590  	for i := 10; i > 0; i-- {
   591  		checkScanPos(t, s, 22, 4, 1, EOF)
   592  	}
   593  	if s.ErrorCount != 0 {
   594  		t.Errorf("%d errors", s.ErrorCount)
   595  	}
   596  
   597  	// positions after calling Scan
   598  	s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx"))
   599  	s.Mode = 0
   600  	s.Whitespace = 0
   601  	checkScanPos(t, s, 0, 1, 1, 'a')
   602  	s.Peek() // peek doesn't affect the position
   603  	checkScanPos(t, s, 1, 1, 2, 'b')
   604  	checkScanPos(t, s, 2, 1, 3, 'c')
   605  	checkScanPos(t, s, 3, 1, 4, '\n')
   606  	checkScanPos(t, s, 4, 2, 1, '本')
   607  	checkScanPos(t, s, 7, 2, 2, '語')
   608  	checkScanPos(t, s, 10, 2, 3, '\n')
   609  	checkScanPos(t, s, 11, 3, 1, '\n')
   610  	checkScanPos(t, s, 12, 4, 1, 'x')
   611  	// after EOF position doesn't change
   612  	for i := 10; i > 0; i-- {
   613  		checkScanPos(t, s, 13, 4, 2, EOF)
   614  	}
   615  	if s.ErrorCount != 0 {
   616  		t.Errorf("%d errors", s.ErrorCount)
   617  	}
   618  }
   619  
   620  type countReader int
   621  
   622  func (r *countReader) Read([]byte) (int, error) {
   623  	*r++
   624  	return 0, io.EOF
   625  }
   626  
   627  func TestNextEOFHandling(t *testing.T) {
   628  	var r countReader
   629  
   630  	// corner case: empty source
   631  	s := new(Scanner).Init(&r)
   632  
   633  	tok := s.Next()
   634  	if tok != EOF {
   635  		t.Error("1) EOF not reported")
   636  	}
   637  
   638  	tok = s.Peek()
   639  	if tok != EOF {
   640  		t.Error("2) EOF not reported")
   641  	}
   642  
   643  	if r != 1 {
   644  		t.Errorf("scanner called Read %d times, not once", r)
   645  	}
   646  }
   647  
   648  func TestScanEOFHandling(t *testing.T) {
   649  	var r countReader
   650  
   651  	// corner case: empty source
   652  	s := new(Scanner).Init(&r)
   653  
   654  	tok := s.Scan()
   655  	if tok != EOF {
   656  		t.Error("1) EOF not reported")
   657  	}
   658  
   659  	tok = s.Peek()
   660  	if tok != EOF {
   661  		t.Error("2) EOF not reported")
   662  	}
   663  
   664  	if r != 1 {
   665  		t.Errorf("scanner called Read %d times, not once", r)
   666  	}
   667  }
   668  

View as plain text