...
Run Format

Source file src/regexp/find_test.go

Documentation: regexp

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  // For each pattern/text pair, what is the expected output of each function?
    14  // We can derive the textual results from the indexed results, the non-submatch
    15  // results from the submatched results, the single results from the 'all' results,
    16  // and the byte results from the string results. Therefore the table includes
    17  // only the FindAllStringSubmatchIndex result.
    18  type FindTest struct {
    19  	pat     string
    20  	text    string
    21  	matches [][]int
    22  }
    23  
    24  func (t FindTest) String() string {
    25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
    26  }
    27  
    28  var findTests = []FindTest{
    29  	{``, ``, build(1, 0, 0)},
    30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
    31  	{`a+`, "baaab", build(1, 1, 4)},
    32  	{"abcd..", "abcdef", build(1, 0, 6)},
    33  	{`a`, "a", build(1, 0, 1)},
    34  	{`x`, "y", nil},
    35  	{`b`, "abc", build(1, 1, 2)},
    36  	{`.`, "a", build(1, 0, 1)},
    37  	{`.*`, "abcdef", build(1, 0, 6)},
    38  	{`^`, "abcde", build(1, 0, 0)},
    39  	{`$`, "abcde", build(1, 5, 5)},
    40  	{`^abcd$`, "abcd", build(1, 0, 4)},
    41  	{`^bcd'`, "abcdef", nil},
    42  	{`^abcd$`, "abcde", nil},
    43  	{`a+`, "baaab", build(1, 1, 4)},
    44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
    45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
    46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
    47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
    48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
    49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
    50  	{`日本語+`, "日本語", build(1, 0, 9)},
    51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
    52  	{`()`, "", build(1, 0, 0, 0, 0)},
    53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
    54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
    55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
    56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
    57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
    58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
    59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
    60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
    61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
    62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
    63  
    64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
    65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
    66  	{`[.]`, ".", build(1, 0, 1)},
    67  	{`/$`, "/abc/", build(1, 4, 5)},
    68  	{`/$`, "/abc", nil},
    69  
    70  	// multiple matches
    71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
    72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
    73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
    74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
    75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
    76  
    77  	// fixed bugs
    78  	{`ab$`, "cab", build(1, 1, 3)},
    79  	{`axxb$`, "axxcb", nil},
    80  	{`data`, "daXY data", build(1, 5, 9)},
    81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
    82  	{`zx+`, "zzx", build(1, 1, 3)},
    83  	{`ab$`, "abcab", build(1, 3, 5)},
    84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
    85  	{`(?:.|(?:.a))`, "", nil},
    86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
    87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
    88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
    89  	{`(?-s)(?:(?:^).)`, "\n", nil},
    90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
    91  	{`(?:(?:^).)`, "\n", nil},
    92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
    93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
    94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
    95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
    96  	{`\B`, "x", nil},
    97  	{`\B`, "xx", build(1, 1, 1)},
    98  	{`\B`, "x y", nil},
    99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
   100  
   101  	// RE2 tests
   102  	{`[^\S\s]`, "abcd", nil},
   103  	{`[^\S[:space:]]`, "abcd", nil},
   104  	{`[^\D\d]`, "abcd", nil},
   105  	{`[^\D[:digit:]]`, "abcd", nil},
   106  	{`(?i)\W`, "x", nil},
   107  	{`(?i)\W`, "k", nil},
   108  	{`(?i)\W`, "s", nil},
   109  
   110  	// can backslash-escape any punctuation
   111  	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
   112  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   113  	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
   114  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   115  	{"\\`", "`", build(1, 0, 1)},
   116  	{"[\\`]+", "`", build(1, 0, 1)},
   117  
   118  	// long set of matches (longer than startSize)
   119  	{
   120  		".",
   121  		"qwertyuiopasdfghjklzxcvbnm1234567890",
   122  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
   123  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
   124  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
   125  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
   126  	},
   127  }
   128  
   129  // build is a helper to construct a [][]int by extracting n sequences from x.
   130  // This represents n matches with len(x)/n submatches each.
   131  func build(n int, x ...int) [][]int {
   132  	ret := make([][]int, n)
   133  	runLength := len(x) / n
   134  	j := 0
   135  	for i := range ret {
   136  		ret[i] = make([]int, runLength)
   137  		copy(ret[i], x[j:])
   138  		j += runLength
   139  		if j > len(x) {
   140  			panic("invalid build entry")
   141  		}
   142  	}
   143  	return ret
   144  }
   145  
   146  // First the simple cases.
   147  
   148  func TestFind(t *testing.T) {
   149  	for _, test := range findTests {
   150  		re := MustCompile(test.pat)
   151  		if re.String() != test.pat {
   152  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
   153  		}
   154  		result := re.Find([]byte(test.text))
   155  		switch {
   156  		case len(test.matches) == 0 && len(result) == 0:
   157  			// ok
   158  		case test.matches == nil && result != nil:
   159  			t.Errorf("expected no match; got one: %s", test)
   160  		case test.matches != nil && result == nil:
   161  			t.Errorf("expected match; got none: %s", test)
   162  		case test.matches != nil && result != nil:
   163  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   164  			if expect != string(result) {
   165  				t.Errorf("expected %q got %q: %s", expect, result, test)
   166  			}
   167  		}
   168  	}
   169  }
   170  
   171  func TestFindString(t *testing.T) {
   172  	for _, test := range findTests {
   173  		result := MustCompile(test.pat).FindString(test.text)
   174  		switch {
   175  		case len(test.matches) == 0 && len(result) == 0:
   176  			// ok
   177  		case test.matches == nil && result != "":
   178  			t.Errorf("expected no match; got one: %s", test)
   179  		case test.matches != nil && result == "":
   180  			// Tricky because an empty result has two meanings: no match or empty match.
   181  			if test.matches[0][0] != test.matches[0][1] {
   182  				t.Errorf("expected match; got none: %s", test)
   183  			}
   184  		case test.matches != nil && result != "":
   185  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   186  			if expect != result {
   187  				t.Errorf("expected %q got %q: %s", expect, result, test)
   188  			}
   189  		}
   190  	}
   191  }
   192  
   193  func testFindIndex(test *FindTest, result []int, t *testing.T) {
   194  	switch {
   195  	case len(test.matches) == 0 && len(result) == 0:
   196  		// ok
   197  	case test.matches == nil && result != nil:
   198  		t.Errorf("expected no match; got one: %s", test)
   199  	case test.matches != nil && result == nil:
   200  		t.Errorf("expected match; got none: %s", test)
   201  	case test.matches != nil && result != nil:
   202  		expect := test.matches[0]
   203  		if expect[0] != result[0] || expect[1] != result[1] {
   204  			t.Errorf("expected %v got %v: %s", expect, result, test)
   205  		}
   206  	}
   207  }
   208  
   209  func TestFindIndex(t *testing.T) {
   210  	for _, test := range findTests {
   211  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
   212  	}
   213  }
   214  
   215  func TestFindStringIndex(t *testing.T) {
   216  	for _, test := range findTests {
   217  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
   218  	}
   219  }
   220  
   221  func TestFindReaderIndex(t *testing.T) {
   222  	for _, test := range findTests {
   223  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
   224  	}
   225  }
   226  
   227  // Now come the simple All cases.
   228  
   229  func TestFindAll(t *testing.T) {
   230  	for _, test := range findTests {
   231  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
   232  		switch {
   233  		case test.matches == nil && result == nil:
   234  			// ok
   235  		case test.matches == nil && result != nil:
   236  			t.Errorf("expected no match; got one: %s", test)
   237  		case test.matches != nil && result == nil:
   238  			t.Fatalf("expected match; got none: %s", test)
   239  		case test.matches != nil && result != nil:
   240  			if len(test.matches) != len(result) {
   241  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   242  				continue
   243  			}
   244  			for k, e := range test.matches {
   245  				expect := test.text[e[0]:e[1]]
   246  				if expect != string(result[k]) {
   247  					t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
   248  				}
   249  			}
   250  		}
   251  	}
   252  }
   253  
   254  func TestFindAllString(t *testing.T) {
   255  	for _, test := range findTests {
   256  		result := MustCompile(test.pat).FindAllString(test.text, -1)
   257  		switch {
   258  		case test.matches == nil && result == nil:
   259  			// ok
   260  		case test.matches == nil && result != nil:
   261  			t.Errorf("expected no match; got one: %s", test)
   262  		case test.matches != nil && result == nil:
   263  			t.Errorf("expected match; got none: %s", test)
   264  		case test.matches != nil && result != nil:
   265  			if len(test.matches) != len(result) {
   266  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   267  				continue
   268  			}
   269  			for k, e := range test.matches {
   270  				expect := test.text[e[0]:e[1]]
   271  				if expect != result[k] {
   272  					t.Errorf("expected %q got %q: %s", expect, result, test)
   273  				}
   274  			}
   275  		}
   276  	}
   277  }
   278  
   279  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
   280  	switch {
   281  	case test.matches == nil && result == nil:
   282  		// ok
   283  	case test.matches == nil && result != nil:
   284  		t.Errorf("expected no match; got one: %s", test)
   285  	case test.matches != nil && result == nil:
   286  		t.Errorf("expected match; got none: %s", test)
   287  	case test.matches != nil && result != nil:
   288  		if len(test.matches) != len(result) {
   289  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   290  			return
   291  		}
   292  		for k, e := range test.matches {
   293  			if e[0] != result[k][0] || e[1] != result[k][1] {
   294  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
   295  			}
   296  		}
   297  	}
   298  }
   299  
   300  func TestFindAllIndex(t *testing.T) {
   301  	for _, test := range findTests {
   302  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
   303  	}
   304  }
   305  
   306  func TestFindAllStringIndex(t *testing.T) {
   307  	for _, test := range findTests {
   308  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
   309  	}
   310  }
   311  
   312  // Now come the Submatch cases.
   313  
   314  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
   315  	if len(submatches) != len(result)*2 {
   316  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   317  		return
   318  	}
   319  	for k := 0; k < len(submatches); k += 2 {
   320  		if submatches[k] == -1 {
   321  			if result[k/2] != nil {
   322  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   323  			}
   324  			continue
   325  		}
   326  		expect := test.text[submatches[k]:submatches[k+1]]
   327  		if expect != string(result[k/2]) {
   328  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   329  			return
   330  		}
   331  	}
   332  }
   333  
   334  func TestFindSubmatch(t *testing.T) {
   335  	for _, test := range findTests {
   336  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
   337  		switch {
   338  		case test.matches == nil && result == nil:
   339  			// ok
   340  		case test.matches == nil && result != nil:
   341  			t.Errorf("expected no match; got one: %s", test)
   342  		case test.matches != nil && result == nil:
   343  			t.Errorf("expected match; got none: %s", test)
   344  		case test.matches != nil && result != nil:
   345  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
   346  		}
   347  	}
   348  }
   349  
   350  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
   351  	if len(submatches) != len(result)*2 {
   352  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   353  		return
   354  	}
   355  	for k := 0; k < len(submatches); k += 2 {
   356  		if submatches[k] == -1 {
   357  			if result[k/2] != "" {
   358  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   359  			}
   360  			continue
   361  		}
   362  		expect := test.text[submatches[k]:submatches[k+1]]
   363  		if expect != result[k/2] {
   364  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   365  			return
   366  		}
   367  	}
   368  }
   369  
   370  func TestFindStringSubmatch(t *testing.T) {
   371  	for _, test := range findTests {
   372  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
   373  		switch {
   374  		case test.matches == nil && result == nil:
   375  			// ok
   376  		case test.matches == nil && result != nil:
   377  			t.Errorf("expected no match; got one: %s", test)
   378  		case test.matches != nil && result == nil:
   379  			t.Errorf("expected match; got none: %s", test)
   380  		case test.matches != nil && result != nil:
   381  			testSubmatchString(&test, 0, test.matches[0], result, t)
   382  		}
   383  	}
   384  }
   385  
   386  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
   387  	if len(expect) != len(result) {
   388  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
   389  		return
   390  	}
   391  	for k, e := range expect {
   392  		if e != result[k] {
   393  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
   394  		}
   395  	}
   396  }
   397  
   398  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
   399  	switch {
   400  	case test.matches == nil && result == nil:
   401  		// ok
   402  	case test.matches == nil && result != nil:
   403  		t.Errorf("expected no match; got one: %s", test)
   404  	case test.matches != nil && result == nil:
   405  		t.Errorf("expected match; got none: %s", test)
   406  	case test.matches != nil && result != nil:
   407  		testSubmatchIndices(test, 0, test.matches[0], result, t)
   408  	}
   409  }
   410  
   411  func TestFindSubmatchIndex(t *testing.T) {
   412  	for _, test := range findTests {
   413  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
   414  	}
   415  }
   416  
   417  func TestFindStringSubmatchIndex(t *testing.T) {
   418  	for _, test := range findTests {
   419  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
   420  	}
   421  }
   422  
   423  func TestFindReaderSubmatchIndex(t *testing.T) {
   424  	for _, test := range findTests {
   425  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
   426  	}
   427  }
   428  
   429  // Now come the monster AllSubmatch cases.
   430  
   431  func TestFindAllSubmatch(t *testing.T) {
   432  	for _, test := range findTests {
   433  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
   434  		switch {
   435  		case test.matches == nil && result == nil:
   436  			// ok
   437  		case test.matches == nil && result != nil:
   438  			t.Errorf("expected no match; got one: %s", test)
   439  		case test.matches != nil && result == nil:
   440  			t.Errorf("expected match; got none: %s", test)
   441  		case len(test.matches) != len(result):
   442  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   443  		case test.matches != nil && result != nil:
   444  			for k, match := range test.matches {
   445  				testSubmatchBytes(&test, k, match, result[k], t)
   446  			}
   447  		}
   448  	}
   449  }
   450  
   451  func TestFindAllStringSubmatch(t *testing.T) {
   452  	for _, test := range findTests {
   453  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
   454  		switch {
   455  		case test.matches == nil && result == nil:
   456  			// ok
   457  		case test.matches == nil && result != nil:
   458  			t.Errorf("expected no match; got one: %s", test)
   459  		case test.matches != nil && result == nil:
   460  			t.Errorf("expected match; got none: %s", test)
   461  		case len(test.matches) != len(result):
   462  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   463  		case test.matches != nil && result != nil:
   464  			for k, match := range test.matches {
   465  				testSubmatchString(&test, k, match, result[k], t)
   466  			}
   467  		}
   468  	}
   469  }
   470  
   471  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
   472  	switch {
   473  	case test.matches == nil && result == nil:
   474  		// ok
   475  	case test.matches == nil && result != nil:
   476  		t.Errorf("expected no match; got one: %s", test)
   477  	case test.matches != nil && result == nil:
   478  		t.Errorf("expected match; got none: %s", test)
   479  	case len(test.matches) != len(result):
   480  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   481  	case test.matches != nil && result != nil:
   482  		for k, match := range test.matches {
   483  			testSubmatchIndices(test, k, match, result[k], t)
   484  		}
   485  	}
   486  }
   487  
   488  func TestFindAllSubmatchIndex(t *testing.T) {
   489  	for _, test := range findTests {
   490  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
   491  	}
   492  }
   493  
   494  func TestFindAllStringSubmatchIndex(t *testing.T) {
   495  	for _, test := range findTests {
   496  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
   497  	}
   498  }
   499  

View as plain text