Source file src/encoding/csv/reader_test.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type readTest struct {
    18  	Name      string
    19  	Input     string
    20  	Output    [][]string
    21  	Positions [][][2]int
    22  	Errors    []error
    23  
    24  	// These fields are copied into the Reader
    25  	Comma              rune
    26  	Comment            rune
    27  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    28  	FieldsPerRecord    int
    29  	LazyQuotes         bool
    30  	TrimLeadingSpace   bool
    31  	ReuseRecord        bool
    32  }
    33  
    34  // In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
    35  // the start of a field, a record boundary and the position of an error respectively.
    36  // They are removed before parsing and are used to verify the position
    37  // information reported by FieldPos.
    38  
    39  var readTests = []readTest{{
    40  	Name:   "Simple",
    41  	Input:  "§a,§b,§c\n",
    42  	Output: [][]string{{"a", "b", "c"}},
    43  }, {
    44  	Name:   "CRLF",
    45  	Input:  "§a,§b\r\n¶§c,§d\r\n",
    46  	Output: [][]string{{"a", "b"}, {"c", "d"}},
    47  }, {
    48  	Name:   "BareCR",
    49  	Input:  "§a,§b\rc,§d\r\n",
    50  	Output: [][]string{{"a", "b\rc", "d"}},
    51  }, {
    52  	Name: "RFC4180test",
    53  	Input: `§#field1,§field2,§field3
    54  ¶§"aaa",§"bb
    55  b",§"ccc"
    56  ¶§"a,a",§"b""bb",§"ccc"
    57  ¶§zzz,§yyy,§xxx
    58  `,
    59  	Output: [][]string{
    60  		{"#field1", "field2", "field3"},
    61  		{"aaa", "bb\nb", "ccc"},
    62  		{"a,a", `b"bb`, "ccc"},
    63  		{"zzz", "yyy", "xxx"},
    64  	},
    65  	UseFieldsPerRecord: true,
    66  	FieldsPerRecord:    0,
    67  }, {
    68  	Name:   "NoEOLTest",
    69  	Input:  "§a,§b,§c",
    70  	Output: [][]string{{"a", "b", "c"}},
    71  }, {
    72  	Name:   "Semicolon",
    73  	Input:  "§a;§b;§c\n",
    74  	Output: [][]string{{"a", "b", "c"}},
    75  	Comma:  ';',
    76  }, {
    77  	Name: "MultiLine",
    78  	Input: `§"two
    79  line",§"one line",§"three
    80  line
    81  field"`,
    82  	Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    83  }, {
    84  	Name:  "BlankLine",
    85  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
    86  	Output: [][]string{
    87  		{"a", "b", "c"},
    88  		{"d", "e", "f"},
    89  	},
    90  }, {
    91  	Name:  "BlankLineFieldCount",
    92  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
    93  	Output: [][]string{
    94  		{"a", "b", "c"},
    95  		{"d", "e", "f"},
    96  	},
    97  	UseFieldsPerRecord: true,
    98  	FieldsPerRecord:    0,
    99  }, {
   100  	Name:             "TrimSpace",
   101  	Input:            " §a,  §b,   §c\n",
   102  	Output:           [][]string{{"a", "b", "c"}},
   103  	TrimLeadingSpace: true,
   104  }, {
   105  	Name:   "LeadingSpace",
   106  	Input:  "§ a,§  b,§   c\n",
   107  	Output: [][]string{{" a", "  b", "   c"}},
   108  }, {
   109  	Name:    "Comment",
   110  	Input:   "#1,2,3\n§a,§b,§c\n#comment",
   111  	Output:  [][]string{{"a", "b", "c"}},
   112  	Comment: '#',
   113  }, {
   114  	Name:   "NoComment",
   115  	Input:  "§#1,§2,§3\n¶§a,§b,§c",
   116  	Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   117  }, {
   118  	Name:       "LazyQuotes",
   119  	Input:      `§a "word",§"1"2",§a",§"b`,
   120  	Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   121  	LazyQuotes: true,
   122  }, {
   123  	Name:       "BareQuotes",
   124  	Input:      `§a "word",§"1"2",§a"`,
   125  	Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   126  	LazyQuotes: true,
   127  }, {
   128  	Name:       "BareDoubleQuotes",
   129  	Input:      `§a""b,§c`,
   130  	Output:     [][]string{{`a""b`, `c`}},
   131  	LazyQuotes: true,
   132  }, {
   133  	Name:   "BadDoubleQuotes",
   134  	Input:  `§a∑""b,c`,
   135  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   136  }, {
   137  	Name:             "TrimQuote",
   138  	Input:            ` §"a",§" b",§c`,
   139  	Output:           [][]string{{"a", " b", "c"}},
   140  	TrimLeadingSpace: true,
   141  }, {
   142  	Name:   "BadBareQuote",
   143  	Input:  `§a ∑"word","b"`,
   144  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   145  }, {
   146  	Name:   "BadTrailingQuote",
   147  	Input:  `§"a word",b∑"`,
   148  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   149  }, {
   150  	Name:   "ExtraneousQuote",
   151  	Input:  `§"a ∑"word","b"`,
   152  	Errors: []error{&ParseError{Err: ErrQuote}},
   153  }, {
   154  	Name:               "BadFieldCount",
   155  	Input:              "§a,§b,§c\n¶∑§d,§e",
   156  	Errors:             []error{nil, &ParseError{Err: ErrFieldCount}},
   157  	Output:             [][]string{{"a", "b", "c"}, {"d", "e"}},
   158  	UseFieldsPerRecord: true,
   159  	FieldsPerRecord:    0,
   160  }, {
   161  	Name:               "BadFieldCountMultiple",
   162  	Input:              "§a,§b,§c\n¶∑§d,§e\n¶∑§f",
   163  	Errors:             []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}},
   164  	Output:             [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
   165  	UseFieldsPerRecord: true,
   166  	FieldsPerRecord:    0,
   167  }, {
   168  	Name:               "BadFieldCount1",
   169  	Input:              `§∑a,§b,§c`,
   170  	Errors:             []error{&ParseError{Err: ErrFieldCount}},
   171  	Output:             [][]string{{"a", "b", "c"}},
   172  	UseFieldsPerRecord: true,
   173  	FieldsPerRecord:    2,
   174  }, {
   175  	Name:   "FieldCount",
   176  	Input:  "§a,§b,§c\n¶§d,§e",
   177  	Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   178  }, {
   179  	Name:   "TrailingCommaEOF",
   180  	Input:  "§a,§b,§c,§",
   181  	Output: [][]string{{"a", "b", "c", ""}},
   182  }, {
   183  	Name:   "TrailingCommaEOL",
   184  	Input:  "§a,§b,§c,§\n",
   185  	Output: [][]string{{"a", "b", "c", ""}},
   186  }, {
   187  	Name:             "TrailingCommaSpaceEOF",
   188  	Input:            "§a,§b,§c, §",
   189  	Output:           [][]string{{"a", "b", "c", ""}},
   190  	TrimLeadingSpace: true,
   191  }, {
   192  	Name:             "TrailingCommaSpaceEOL",
   193  	Input:            "§a,§b,§c, §\n",
   194  	Output:           [][]string{{"a", "b", "c", ""}},
   195  	TrimLeadingSpace: true,
   196  }, {
   197  	Name:             "TrailingCommaLine3",
   198  	Input:            "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
   199  	Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   200  	TrimLeadingSpace: true,
   201  }, {
   202  	Name:   "NotTrailingComma3",
   203  	Input:  "§a,§b,§c,§ \n",
   204  	Output: [][]string{{"a", "b", "c", " "}},
   205  }, {
   206  	Name: "CommaFieldTest",
   207  	Input: `§x,§y,§z,§w
   208  ¶§x,§y,§z,§
   209  ¶§x,§y,§,§
   210  ¶§x,§,§,§
   211  ¶§,§,§,§
   212  ¶§"x",§"y",§"z",§"w"
   213  ¶§"x",§"y",§"z",§""
   214  ¶§"x",§"y",§"",§""
   215  ¶§"x",§"",§"",§""
   216  ¶§"",§"",§"",§""
   217  `,
   218  	Output: [][]string{
   219  		{"x", "y", "z", "w"},
   220  		{"x", "y", "z", ""},
   221  		{"x", "y", "", ""},
   222  		{"x", "", "", ""},
   223  		{"", "", "", ""},
   224  		{"x", "y", "z", "w"},
   225  		{"x", "y", "z", ""},
   226  		{"x", "y", "", ""},
   227  		{"x", "", "", ""},
   228  		{"", "", "", ""},
   229  	},
   230  }, {
   231  	Name:  "TrailingCommaIneffective1",
   232  	Input: "§a,§b,§\n¶§c,§d,§e",
   233  	Output: [][]string{
   234  		{"a", "b", ""},
   235  		{"c", "d", "e"},
   236  	},
   237  	TrimLeadingSpace: true,
   238  }, {
   239  	Name:  "ReadAllReuseRecord",
   240  	Input: "§a,§b\n¶§c,§d",
   241  	Output: [][]string{
   242  		{"a", "b"},
   243  		{"c", "d"},
   244  	},
   245  	ReuseRecord: true,
   246  }, {
   247  	Name:   "StartLine1", // Issue 19019
   248  	Input:  "§a,\"b\nc∑\"d,e",
   249  	Errors: []error{&ParseError{Err: ErrQuote}},
   250  }, {
   251  	Name:   "StartLine2",
   252  	Input:  "§a,§b\n¶§\"d\n\n,e∑",
   253  	Errors: []error{nil, &ParseError{Err: ErrQuote}},
   254  	Output: [][]string{{"a", "b"}},
   255  }, {
   256  	Name:  "CRLFInQuotedField", // Issue 21201
   257  	Input: "§A,§\"Hello\r\nHi\",§B\r\n",
   258  	Output: [][]string{
   259  		{"A", "Hello\nHi", "B"},
   260  	},
   261  }, {
   262  	Name:   "BinaryBlobField", // Issue 19410
   263  	Input:  "§x09\x41\xb4\x1c,§aktau",
   264  	Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
   265  }, {
   266  	Name:   "TrailingCR",
   267  	Input:  "§field1,§field2\r",
   268  	Output: [][]string{{"field1", "field2"}},
   269  }, {
   270  	Name:   "QuotedTrailingCR",
   271  	Input:  "§\"field\"\r",
   272  	Output: [][]string{{"field"}},
   273  }, {
   274  	Name:   "QuotedTrailingCRCR",
   275  	Input:  "§\"field∑\"\r\r",
   276  	Errors: []error{&ParseError{Err: ErrQuote}},
   277  }, {
   278  	Name:   "FieldCR",
   279  	Input:  "§field\rfield\r",
   280  	Output: [][]string{{"field\rfield"}},
   281  }, {
   282  	Name:   "FieldCRCR",
   283  	Input:  "§field\r\rfield\r\r",
   284  	Output: [][]string{{"field\r\rfield\r"}},
   285  }, {
   286  	Name:   "FieldCRCRLF",
   287  	Input:  "§field\r\r\n¶§field\r\r\n",
   288  	Output: [][]string{{"field\r"}, {"field\r"}},
   289  }, {
   290  	Name:   "FieldCRCRLFCR",
   291  	Input:  "§field\r\r\n¶§\rfield\r\r\n\r",
   292  	Output: [][]string{{"field\r"}, {"\rfield\r"}},
   293  }, {
   294  	Name:   "FieldCRCRLFCRCR",
   295  	Input:  "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
   296  	Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
   297  }, {
   298  	Name:  "MultiFieldCRCRLFCRCR",
   299  	Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
   300  	Output: [][]string{
   301  		{"field1", "field2\r"},
   302  		{"\r\rfield1", "field2\r"},
   303  		{"\r\r", ""},
   304  	},
   305  }, {
   306  	Name:             "NonASCIICommaAndComment",
   307  	Input:            "§a£§b,c£ \t§d,e\n€ comment\n",
   308  	Output:           [][]string{{"a", "b,c", "d,e"}},
   309  	TrimLeadingSpace: true,
   310  	Comma:            '£',
   311  	Comment:          '€',
   312  }, {
   313  	Name:    "NonASCIICommaAndCommentWithQuotes",
   314  	Input:   "§a€§\"  b,\"€§ c\nλ comment\n",
   315  	Output:  [][]string{{"a", "  b,", " c"}},
   316  	Comma:   '€',
   317  	Comment: 'λ',
   318  }, {
   319  	// λ and θ start with the same byte.
   320  	// This tests that the parser doesn't confuse such characters.
   321  	Name:    "NonASCIICommaConfusion",
   322  	Input:   "§\"abθcd\"λ§efθgh",
   323  	Output:  [][]string{{"abθcd", "efθgh"}},
   324  	Comma:   'λ',
   325  	Comment: '€',
   326  }, {
   327  	Name:    "NonASCIICommentConfusion",
   328  	Input:   "§λ\n¶§λ\nθ\n¶§λ\n",
   329  	Output:  [][]string{{"λ"}, {"λ"}, {"λ"}},
   330  	Comment: 'θ',
   331  }, {
   332  	Name:   "QuotedFieldMultipleLF",
   333  	Input:  "§\"\n\n\n\n\"",
   334  	Output: [][]string{{"\n\n\n\n"}},
   335  }, {
   336  	Name:  "MultipleCRLF",
   337  	Input: "\r\n\r\n\r\n\r\n",
   338  }, {
   339  	// The implementation may read each line in several chunks if it doesn't fit entirely
   340  	// in the read buffer, so we should test the code to handle that condition.
   341  	Name:    "HugeLines",
   342  	Input:   strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000),
   343  	Output:  [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
   344  	Comment: '#',
   345  }, {
   346  	Name:   "QuoteWithTrailingCRLF",
   347  	Input:  "§\"foo∑\"bar\"\r\n",
   348  	Errors: []error{&ParseError{Err: ErrQuote}},
   349  }, {
   350  	Name:       "LazyQuoteWithTrailingCRLF",
   351  	Input:      "§\"foo\"bar\"\r\n",
   352  	Output:     [][]string{{`foo"bar`}},
   353  	LazyQuotes: true,
   354  }, {
   355  	Name:   "DoubleQuoteWithTrailingCRLF",
   356  	Input:  "§\"foo\"\"bar\"\r\n",
   357  	Output: [][]string{{`foo"bar`}},
   358  }, {
   359  	Name:   "EvenQuotes",
   360  	Input:  `§""""""""`,
   361  	Output: [][]string{{`"""`}},
   362  }, {
   363  	Name:   "OddQuotes",
   364  	Input:  `§"""""""∑`,
   365  	Errors: []error{&ParseError{Err: ErrQuote}},
   366  }, {
   367  	Name:       "LazyOddQuotes",
   368  	Input:      `§"""""""`,
   369  	Output:     [][]string{{`"""`}},
   370  	LazyQuotes: true,
   371  }, {
   372  	Name:   "BadComma1",
   373  	Comma:  '\n',
   374  	Errors: []error{errInvalidDelim},
   375  }, {
   376  	Name:   "BadComma2",
   377  	Comma:  '\r',
   378  	Errors: []error{errInvalidDelim},
   379  }, {
   380  	Name:   "BadComma3",
   381  	Comma:  '"',
   382  	Errors: []error{errInvalidDelim},
   383  }, {
   384  	Name:   "BadComma4",
   385  	Comma:  utf8.RuneError,
   386  	Errors: []error{errInvalidDelim},
   387  }, {
   388  	Name:    "BadComment1",
   389  	Comment: '\n',
   390  	Errors:  []error{errInvalidDelim},
   391  }, {
   392  	Name:    "BadComment2",
   393  	Comment: '\r',
   394  	Errors:  []error{errInvalidDelim},
   395  }, {
   396  	Name:    "BadComment3",
   397  	Comment: utf8.RuneError,
   398  	Errors:  []error{errInvalidDelim},
   399  }, {
   400  	Name:    "BadCommaComment",
   401  	Comma:   'X',
   402  	Comment: 'X',
   403  	Errors:  []error{errInvalidDelim},
   404  }}
   405  
   406  func TestRead(t *testing.T) {
   407  	newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int, string) {
   408  		positions, errPositions, input := makePositions(tt.Input)
   409  		r := NewReader(strings.NewReader(input))
   410  
   411  		if tt.Comma != 0 {
   412  			r.Comma = tt.Comma
   413  		}
   414  		r.Comment = tt.Comment
   415  		if tt.UseFieldsPerRecord {
   416  			r.FieldsPerRecord = tt.FieldsPerRecord
   417  		} else {
   418  			r.FieldsPerRecord = -1
   419  		}
   420  		r.LazyQuotes = tt.LazyQuotes
   421  		r.TrimLeadingSpace = tt.TrimLeadingSpace
   422  		r.ReuseRecord = tt.ReuseRecord
   423  		return r, positions, errPositions, input
   424  	}
   425  
   426  	for _, tt := range readTests {
   427  		t.Run(tt.Name, func(t *testing.T) {
   428  			r, positions, errPositions, input := newReader(tt)
   429  			out, err := r.ReadAll()
   430  			if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
   431  				if !reflect.DeepEqual(err, wantErr) {
   432  					t.Fatalf("ReadAll() error mismatch:\ngot  %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr)
   433  				}
   434  				if out != nil {
   435  					t.Fatalf("ReadAll() output:\ngot  %q\nwant nil", out)
   436  				}
   437  			} else {
   438  				if err != nil {
   439  					t.Fatalf("unexpected Readall() error: %v", err)
   440  				}
   441  				if !reflect.DeepEqual(out, tt.Output) {
   442  					t.Fatalf("ReadAll() output:\ngot  %q\nwant %q", out, tt.Output)
   443  				}
   444  			}
   445  
   446  			// Check input offset after call ReadAll()
   447  			inputByteSize := len(input)
   448  			inputOffset := r.InputOffset()
   449  			if err == nil && int64(inputByteSize) != inputOffset {
   450  				t.Errorf("wrong input offset after call ReadAll():\ngot:  %d\nwant: %d\ninput: %s", inputOffset, inputByteSize, input)
   451  			}
   452  
   453  			// Check field and error positions.
   454  			r, _, _, _ = newReader(tt)
   455  			for recNum := 0; ; recNum++ {
   456  				rec, err := r.Read()
   457  				var wantErr error
   458  				if recNum < len(tt.Errors) && tt.Errors[recNum] != nil {
   459  					wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions)
   460  				} else if recNum >= len(tt.Output) {
   461  					wantErr = io.EOF
   462  				}
   463  				if !reflect.DeepEqual(err, wantErr) {
   464  					t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr)
   465  				}
   466  				// ErrFieldCount is explicitly non-fatal.
   467  				if err != nil && !errors.Is(err, ErrFieldCount) {
   468  					if recNum < len(tt.Output) {
   469  						t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output))
   470  					}
   471  					break
   472  				}
   473  				if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) {
   474  					t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want)
   475  				}
   476  				pos := positions[recNum]
   477  				if len(pos) != len(rec) {
   478  					t.Fatalf("mismatched position length at record %d", recNum)
   479  				}
   480  				for i := range rec {
   481  					line, col := r.FieldPos(i)
   482  					if got, want := [2]int{line, col}, pos[i]; got != want {
   483  						t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want)
   484  					}
   485  				}
   486  			}
   487  		})
   488  	}
   489  }
   490  
   491  // firstError returns the first non-nil error in errs,
   492  // with the position adjusted according to the error's
   493  // index inside positions.
   494  func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error {
   495  	for i, err := range errs {
   496  		if err != nil {
   497  			return errorWithPosition(err, i, positions, errPositions)
   498  		}
   499  	}
   500  	return nil
   501  }
   502  
   503  func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error {
   504  	parseErr, ok := err.(*ParseError)
   505  	if !ok {
   506  		return err
   507  	}
   508  	if recNum >= len(positions) {
   509  		panic(fmt.Errorf("no positions found for error at record %d", recNum))
   510  	}
   511  	errPos, ok := errPositions[recNum]
   512  	if !ok {
   513  		panic(fmt.Errorf("no error position found for error at record %d", recNum))
   514  	}
   515  	parseErr1 := *parseErr
   516  	parseErr1.StartLine = positions[recNum][0][0]
   517  	parseErr1.Line = errPos[0]
   518  	parseErr1.Column = errPos[1]
   519  	return &parseErr1
   520  }
   521  
   522  // makePositions returns the expected field positions of all
   523  // the fields in text, the positions of any errors, and the text with the position markers
   524  // removed.
   525  //
   526  // The start of each field is marked with a § symbol;
   527  // CSV lines are separated by ¶ symbols;
   528  // Error positions are marked with ∑ symbols.
   529  func makePositions(text string) ([][][2]int, map[int][2]int, string) {
   530  	buf := make([]byte, 0, len(text))
   531  	var positions [][][2]int
   532  	errPositions := make(map[int][2]int)
   533  	line, col := 1, 1
   534  	recNum := 0
   535  
   536  	for len(text) > 0 {
   537  		r, size := utf8.DecodeRuneInString(text)
   538  		switch r {
   539  		case '\n':
   540  			line++
   541  			col = 1
   542  			buf = append(buf, '\n')
   543  		case '§':
   544  			if len(positions) == 0 {
   545  				positions = append(positions, [][2]int{})
   546  			}
   547  			positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col})
   548  		case '¶':
   549  			positions = append(positions, [][2]int{})
   550  			recNum++
   551  		case '∑':
   552  			errPositions[recNum] = [2]int{line, col}
   553  		default:
   554  			buf = append(buf, text[:size]...)
   555  			col += size
   556  		}
   557  		text = text[size:]
   558  	}
   559  	return positions, errPositions, string(buf)
   560  }
   561  
   562  // nTimes is an io.Reader which yields the string s n times.
   563  type nTimes struct {
   564  	s   string
   565  	n   int
   566  	off int
   567  }
   568  
   569  func (r *nTimes) Read(p []byte) (n int, err error) {
   570  	for {
   571  		if r.n <= 0 || r.s == "" {
   572  			return n, io.EOF
   573  		}
   574  		n0 := copy(p, r.s[r.off:])
   575  		p = p[n0:]
   576  		n += n0
   577  		r.off += n0
   578  		if r.off == len(r.s) {
   579  			r.off = 0
   580  			r.n--
   581  		}
   582  		if len(p) == 0 {
   583  			return
   584  		}
   585  	}
   586  }
   587  
   588  // benchmarkRead measures reading the provided CSV rows data.
   589  // initReader, if non-nil, modifies the Reader before it's used.
   590  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   591  	b.ReportAllocs()
   592  	r := NewReader(&nTimes{s: rows, n: b.N})
   593  	if initReader != nil {
   594  		initReader(r)
   595  	}
   596  	for {
   597  		_, err := r.Read()
   598  		if err == io.EOF {
   599  			break
   600  		}
   601  		if err != nil {
   602  			b.Fatal(err)
   603  		}
   604  	}
   605  }
   606  
   607  const benchmarkCSVData = `x,y,z,w
   608  x,y,z,
   609  x,y,,
   610  x,,,
   611  ,,,
   612  "x","y","z","w"
   613  "x","y","z",""
   614  "x","y","",""
   615  "x","","",""
   616  "","","",""
   617  `
   618  
   619  func BenchmarkRead(b *testing.B) {
   620  	benchmarkRead(b, nil, benchmarkCSVData)
   621  }
   622  
   623  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   624  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   625  }
   626  
   627  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   628  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   629  }
   630  
   631  func BenchmarkReadLargeFields(b *testing.B) {
   632  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   633  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   634  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   635  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   636  `, 3))
   637  }
   638  
   639  func BenchmarkReadReuseRecord(b *testing.B) {
   640  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
   641  }
   642  
   643  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
   644  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
   645  }
   646  
   647  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
   648  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
   649  }
   650  
   651  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
   652  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   653  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   654  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   655  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   656  `, 3))
   657  }
   658  

View as plain text