Source file src/encoding/xml/xml_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type toks struct {
    18  	earlyEOF bool
    19  	t        []Token
    20  }
    21  
    22  func (t *toks) Token() (Token, error) {
    23  	if len(t.t) == 0 {
    24  		return nil, io.EOF
    25  	}
    26  	var tok Token
    27  	tok, t.t = t.t[0], t.t[1:]
    28  	if t.earlyEOF && len(t.t) == 0 {
    29  		return tok, io.EOF
    30  	}
    31  	return tok, nil
    32  }
    33  
    34  func TestDecodeEOF(t *testing.T) {
    35  	start := StartElement{Name: Name{Local: "test"}}
    36  	tests := []struct {
    37  		name   string
    38  		tokens []Token
    39  		ok     bool
    40  	}{
    41  		{
    42  			name: "OK",
    43  			tokens: []Token{
    44  				start,
    45  				start.End(),
    46  			},
    47  			ok: true,
    48  		},
    49  		{
    50  			name: "Malformed",
    51  			tokens: []Token{
    52  				start,
    53  				StartElement{Name: Name{Local: "bad"}},
    54  				start.End(),
    55  			},
    56  			ok: false,
    57  		},
    58  	}
    59  	for _, tc := range tests {
    60  		for _, eof := range []bool{true, false} {
    61  			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
    62  			t.Run(name, func(t *testing.T) {
    63  				d := NewTokenDecoder(&toks{
    64  					earlyEOF: eof,
    65  					t:        tc.tokens,
    66  				})
    67  				err := d.Decode(&struct {
    68  					XMLName Name `xml:"test"`
    69  				}{})
    70  				if tc.ok && err != nil {
    71  					t.Fatalf("d.Decode: expected nil error, got %v", err)
    72  				}
    73  				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
    74  					t.Errorf("d.Decode: expected syntax error, got %v", err)
    75  				}
    76  			})
    77  		}
    78  	}
    79  }
    80  
    81  type toksNil struct {
    82  	returnEOF bool
    83  	t         []Token
    84  }
    85  
    86  func (t *toksNil) Token() (Token, error) {
    87  	if len(t.t) == 0 {
    88  		if !t.returnEOF {
    89  			// Return nil, nil before returning an EOF. It's legal, but
    90  			// discouraged.
    91  			t.returnEOF = true
    92  			return nil, nil
    93  		}
    94  		return nil, io.EOF
    95  	}
    96  	var tok Token
    97  	tok, t.t = t.t[0], t.t[1:]
    98  	return tok, nil
    99  }
   100  
   101  func TestDecodeNilToken(t *testing.T) {
   102  	for _, strict := range []bool{true, false} {
   103  		name := fmt.Sprintf("Strict=%v", strict)
   104  		t.Run(name, func(t *testing.T) {
   105  			start := StartElement{Name: Name{Local: "test"}}
   106  			bad := StartElement{Name: Name{Local: "bad"}}
   107  			d := NewTokenDecoder(&toksNil{
   108  				// Malformed
   109  				t: []Token{start, bad, start.End()},
   110  			})
   111  			d.Strict = strict
   112  			err := d.Decode(&struct {
   113  				XMLName Name `xml:"test"`
   114  			}{})
   115  			if _, ok := err.(*SyntaxError); !ok {
   116  				t.Errorf("d.Decode: expected syntax error, got %v", err)
   117  			}
   118  		})
   119  	}
   120  }
   121  
   122  const testInput = `
   123  <?xml version="1.0" encoding="UTF-8"?>
   124  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   125    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
   126  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
   127  	"\r\n\t" + `  >
   128    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
   129    <query>&何; &is-it;</query>
   130    <goodbye />
   131    <outer foo:attr="value" xmlns:tag="ns4">
   132      <inner/>
   133    </outer>
   134    <tag:name>
   135      <![CDATA[Some text here.]]>
   136    </tag:name>
   137  </body><!-- missing final newline -->`
   138  
   139  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
   140  
   141  var rawTokens = []Token{
   142  	CharData("\n"),
   143  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   144  	CharData("\n"),
   145  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   146    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   147  	CharData("\n"),
   148  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   149  	CharData("\n  "),
   150  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   151  	CharData("World <>'\" 白鵬翔"),
   152  	EndElement{Name{"", "hello"}},
   153  	CharData("\n  "),
   154  	StartElement{Name{"", "query"}, []Attr{}},
   155  	CharData("What is it?"),
   156  	EndElement{Name{"", "query"}},
   157  	CharData("\n  "),
   158  	StartElement{Name{"", "goodbye"}, []Attr{}},
   159  	EndElement{Name{"", "goodbye"}},
   160  	CharData("\n  "),
   161  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   162  	CharData("\n    "),
   163  	StartElement{Name{"", "inner"}, []Attr{}},
   164  	EndElement{Name{"", "inner"}},
   165  	CharData("\n  "),
   166  	EndElement{Name{"", "outer"}},
   167  	CharData("\n  "),
   168  	StartElement{Name{"tag", "name"}, []Attr{}},
   169  	CharData("\n    "),
   170  	CharData("Some text here."),
   171  	CharData("\n  "),
   172  	EndElement{Name{"tag", "name"}},
   173  	CharData("\n"),
   174  	EndElement{Name{"", "body"}},
   175  	Comment(" missing final newline "),
   176  }
   177  
   178  var cookedTokens = []Token{
   179  	CharData("\n"),
   180  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   181  	CharData("\n"),
   182  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   183    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   184  	CharData("\n"),
   185  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   186  	CharData("\n  "),
   187  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   188  	CharData("World <>'\" 白鵬翔"),
   189  	EndElement{Name{"ns2", "hello"}},
   190  	CharData("\n  "),
   191  	StartElement{Name{"ns2", "query"}, []Attr{}},
   192  	CharData("What is it?"),
   193  	EndElement{Name{"ns2", "query"}},
   194  	CharData("\n  "),
   195  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
   196  	EndElement{Name{"ns2", "goodbye"}},
   197  	CharData("\n  "),
   198  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   199  	CharData("\n    "),
   200  	StartElement{Name{"ns2", "inner"}, []Attr{}},
   201  	EndElement{Name{"ns2", "inner"}},
   202  	CharData("\n  "),
   203  	EndElement{Name{"ns2", "outer"}},
   204  	CharData("\n  "),
   205  	StartElement{Name{"ns3", "name"}, []Attr{}},
   206  	CharData("\n    "),
   207  	CharData("Some text here."),
   208  	CharData("\n  "),
   209  	EndElement{Name{"ns3", "name"}},
   210  	CharData("\n"),
   211  	EndElement{Name{"ns2", "body"}},
   212  	Comment(" missing final newline "),
   213  }
   214  
   215  const testInputAltEncoding = `
   216  <?xml version="1.0" encoding="x-testing-uppercase"?>
   217  <TAG>VALUE</TAG>`
   218  
   219  var rawTokensAltEncoding = []Token{
   220  	CharData("\n"),
   221  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   222  	CharData("\n"),
   223  	StartElement{Name{"", "tag"}, []Attr{}},
   224  	CharData("value"),
   225  	EndElement{Name{"", "tag"}},
   226  }
   227  
   228  var xmlInput = []string{
   229  	// unexpected EOF cases
   230  	"<",
   231  	"<t",
   232  	"<t ",
   233  	"<t/",
   234  	"<!",
   235  	"<!-",
   236  	"<!--",
   237  	"<!--c-",
   238  	"<!--c--",
   239  	"<!d",
   240  	"<t></",
   241  	"<t></t",
   242  	"<?",
   243  	"<?p",
   244  	"<t a",
   245  	"<t a=",
   246  	"<t a='",
   247  	"<t a=''",
   248  	"<t/><![",
   249  	"<t/><![C",
   250  	"<t/><![CDATA[d",
   251  	"<t/><![CDATA[d]",
   252  	"<t/><![CDATA[d]]",
   253  
   254  	// other Syntax errors
   255  	"<>",
   256  	"<t/a",
   257  	"<0 />",
   258  	"<?0 >",
   259  	//	"<!0 >",	// let the Token() caller handle
   260  	"</0>",
   261  	"<t 0=''>",
   262  	"<t a='&'>",
   263  	"<t a='<'>",
   264  	"<t>&nbspc;</t>",
   265  	"<t a>",
   266  	"<t a=>",
   267  	"<t a=v>",
   268  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   269  	"<t></e>",
   270  	"<t></>",
   271  	"<t></t!",
   272  	"<t>cdata]]></t>",
   273  }
   274  
   275  func TestRawToken(t *testing.T) {
   276  	d := NewDecoder(strings.NewReader(testInput))
   277  	d.Entity = testEntity
   278  	testRawToken(t, d, testInput, rawTokens)
   279  }
   280  
   281  const nonStrictInput = `
   282  <tag>non&entity</tag>
   283  <tag>&unknown;entity</tag>
   284  <tag>&#123</tag>
   285  <tag>&#zzz;</tag>
   286  <tag>&なまえ3;</tag>
   287  <tag>&lt-gt;</tag>
   288  <tag>&;</tag>
   289  <tag>&0a;</tag>
   290  `
   291  
   292  var nonStrictTokens = []Token{
   293  	CharData("\n"),
   294  	StartElement{Name{"", "tag"}, []Attr{}},
   295  	CharData("non&entity"),
   296  	EndElement{Name{"", "tag"}},
   297  	CharData("\n"),
   298  	StartElement{Name{"", "tag"}, []Attr{}},
   299  	CharData("&unknown;entity"),
   300  	EndElement{Name{"", "tag"}},
   301  	CharData("\n"),
   302  	StartElement{Name{"", "tag"}, []Attr{}},
   303  	CharData("&#123"),
   304  	EndElement{Name{"", "tag"}},
   305  	CharData("\n"),
   306  	StartElement{Name{"", "tag"}, []Attr{}},
   307  	CharData("&#zzz;"),
   308  	EndElement{Name{"", "tag"}},
   309  	CharData("\n"),
   310  	StartElement{Name{"", "tag"}, []Attr{}},
   311  	CharData("&なまえ3;"),
   312  	EndElement{Name{"", "tag"}},
   313  	CharData("\n"),
   314  	StartElement{Name{"", "tag"}, []Attr{}},
   315  	CharData("&lt-gt;"),
   316  	EndElement{Name{"", "tag"}},
   317  	CharData("\n"),
   318  	StartElement{Name{"", "tag"}, []Attr{}},
   319  	CharData("&;"),
   320  	EndElement{Name{"", "tag"}},
   321  	CharData("\n"),
   322  	StartElement{Name{"", "tag"}, []Attr{}},
   323  	CharData("&0a;"),
   324  	EndElement{Name{"", "tag"}},
   325  	CharData("\n"),
   326  }
   327  
   328  func TestNonStrictRawToken(t *testing.T) {
   329  	d := NewDecoder(strings.NewReader(nonStrictInput))
   330  	d.Strict = false
   331  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   332  }
   333  
   334  type downCaser struct {
   335  	t *testing.T
   336  	r io.ByteReader
   337  }
   338  
   339  func (d *downCaser) ReadByte() (c byte, err error) {
   340  	c, err = d.r.ReadByte()
   341  	if c >= 'A' && c <= 'Z' {
   342  		c += 'a' - 'A'
   343  	}
   344  	return
   345  }
   346  
   347  func (d *downCaser) Read(p []byte) (int, error) {
   348  	d.t.Fatalf("unexpected Read call on downCaser reader")
   349  	panic("unreachable")
   350  }
   351  
   352  func TestRawTokenAltEncoding(t *testing.T) {
   353  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   354  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   355  		if charset != "x-testing-uppercase" {
   356  			t.Fatalf("unexpected charset %q", charset)
   357  		}
   358  		return &downCaser{t, input.(io.ByteReader)}, nil
   359  	}
   360  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   361  }
   362  
   363  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   364  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   365  	token, err := d.RawToken()
   366  	if token == nil {
   367  		t.Fatalf("expected a token on first RawToken call")
   368  	}
   369  	if err != nil {
   370  		t.Fatal(err)
   371  	}
   372  	token, err = d.RawToken()
   373  	if token != nil {
   374  		t.Errorf("expected a nil token; got %#v", token)
   375  	}
   376  	if err == nil {
   377  		t.Fatalf("expected an error on second RawToken call")
   378  	}
   379  	const encoding = "x-testing-uppercase"
   380  	if !strings.Contains(err.Error(), encoding) {
   381  		t.Errorf("expected error to contain %q; got error: %v",
   382  			encoding, err)
   383  	}
   384  }
   385  
   386  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   387  	lastEnd := int64(0)
   388  	for i, want := range rawTokens {
   389  		start := d.InputOffset()
   390  		have, err := d.RawToken()
   391  		end := d.InputOffset()
   392  		if err != nil {
   393  			t.Fatalf("token %d: unexpected error: %s", i, err)
   394  		}
   395  		if !reflect.DeepEqual(have, want) {
   396  			var shave, swant string
   397  			if _, ok := have.(CharData); ok {
   398  				shave = fmt.Sprintf("CharData(%q)", have)
   399  			} else {
   400  				shave = fmt.Sprintf("%#v", have)
   401  			}
   402  			if _, ok := want.(CharData); ok {
   403  				swant = fmt.Sprintf("CharData(%q)", want)
   404  			} else {
   405  				swant = fmt.Sprintf("%#v", want)
   406  			}
   407  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   408  		}
   409  
   410  		// Check that InputOffset returned actual token.
   411  		switch {
   412  		case start < lastEnd:
   413  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   414  		case start >= end:
   415  			// Special case: EndElement can be synthesized.
   416  			if start == end && end == lastEnd {
   417  				break
   418  			}
   419  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   420  		case end > int64(len(raw)):
   421  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   422  		default:
   423  			text := raw[start:end]
   424  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   425  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   426  			}
   427  		}
   428  		lastEnd = end
   429  	}
   430  }
   431  
   432  // Ensure that directives (specifically !DOCTYPE) include the complete
   433  // text of any nested directives, noting that < and > do not change
   434  // nesting depth if they are in single or double quotes.
   435  
   436  var nestedDirectivesInput = `
   437  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   438  <!DOCTYPE [<!ENTITY xlt ">">]>
   439  <!DOCTYPE [<!ENTITY xlt "<">]>
   440  <!DOCTYPE [<!ENTITY xlt '>'>]>
   441  <!DOCTYPE [<!ENTITY xlt '<'>]>
   442  <!DOCTYPE [<!ENTITY xlt '">'>]>
   443  <!DOCTYPE [<!ENTITY xlt "'<">]>
   444  `
   445  
   446  var nestedDirectivesTokens = []Token{
   447  	CharData("\n"),
   448  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   449  	CharData("\n"),
   450  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   451  	CharData("\n"),
   452  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   453  	CharData("\n"),
   454  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   455  	CharData("\n"),
   456  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   457  	CharData("\n"),
   458  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   459  	CharData("\n"),
   460  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   461  	CharData("\n"),
   462  }
   463  
   464  func TestNestedDirectives(t *testing.T) {
   465  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   466  
   467  	for i, want := range nestedDirectivesTokens {
   468  		have, err := d.Token()
   469  		if err != nil {
   470  			t.Fatalf("token %d: unexpected error: %s", i, err)
   471  		}
   472  		if !reflect.DeepEqual(have, want) {
   473  			t.Errorf("token %d = %#v want %#v", i, have, want)
   474  		}
   475  	}
   476  }
   477  
   478  func TestToken(t *testing.T) {
   479  	d := NewDecoder(strings.NewReader(testInput))
   480  	d.Entity = testEntity
   481  
   482  	for i, want := range cookedTokens {
   483  		have, err := d.Token()
   484  		if err != nil {
   485  			t.Fatalf("token %d: unexpected error: %s", i, err)
   486  		}
   487  		if !reflect.DeepEqual(have, want) {
   488  			t.Errorf("token %d = %#v want %#v", i, have, want)
   489  		}
   490  	}
   491  }
   492  
   493  func TestSyntax(t *testing.T) {
   494  	for i := range xmlInput {
   495  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   496  		var err error
   497  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   498  		}
   499  		if _, ok := err.(*SyntaxError); !ok {
   500  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   501  		}
   502  	}
   503  }
   504  
   505  func TestInputLinePos(t *testing.T) {
   506  	testInput := `<root>
   507  <?pi
   508   ?>  <elt
   509  att
   510  =
   511  "val">
   512  <![CDATA[
   513  ]]><!--
   514  
   515  --></elt>
   516  </root>`
   517  	linePos := [][]int{
   518  		{1, 7},
   519  		{2, 1},
   520  		{3, 4},
   521  		{3, 6},
   522  		{6, 7},
   523  		{7, 1},
   524  		{8, 4},
   525  		{10, 4},
   526  		{10, 10},
   527  		{11, 1},
   528  		{11, 8},
   529  	}
   530  	dec := NewDecoder(strings.NewReader(testInput))
   531  	for _, want := range linePos {
   532  		if _, err := dec.Token(); err != nil {
   533  			t.Errorf("Unexpected error: %v", err)
   534  			continue
   535  		}
   536  
   537  		gotLine, gotCol := dec.InputPos()
   538  		if gotLine != want[0] || gotCol != want[1] {
   539  			t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
   540  		}
   541  	}
   542  }
   543  
   544  type allScalars struct {
   545  	True1     bool
   546  	True2     bool
   547  	False1    bool
   548  	False2    bool
   549  	Int       int
   550  	Int8      int8
   551  	Int16     int16
   552  	Int32     int32
   553  	Int64     int64
   554  	Uint      int
   555  	Uint8     uint8
   556  	Uint16    uint16
   557  	Uint32    uint32
   558  	Uint64    uint64
   559  	Uintptr   uintptr
   560  	Float32   float32
   561  	Float64   float64
   562  	String    string
   563  	PtrString *string
   564  }
   565  
   566  var all = allScalars{
   567  	True1:     true,
   568  	True2:     true,
   569  	False1:    false,
   570  	False2:    false,
   571  	Int:       1,
   572  	Int8:      -2,
   573  	Int16:     3,
   574  	Int32:     -4,
   575  	Int64:     5,
   576  	Uint:      6,
   577  	Uint8:     7,
   578  	Uint16:    8,
   579  	Uint32:    9,
   580  	Uint64:    10,
   581  	Uintptr:   11,
   582  	Float32:   13.0,
   583  	Float64:   14.0,
   584  	String:    "15",
   585  	PtrString: &sixteen,
   586  }
   587  
   588  var sixteen = "16"
   589  
   590  const testScalarsInput = `<allscalars>
   591  	<True1>true</True1>
   592  	<True2>1</True2>
   593  	<False1>false</False1>
   594  	<False2>0</False2>
   595  	<Int>1</Int>
   596  	<Int8>-2</Int8>
   597  	<Int16>3</Int16>
   598  	<Int32>-4</Int32>
   599  	<Int64>5</Int64>
   600  	<Uint>6</Uint>
   601  	<Uint8>7</Uint8>
   602  	<Uint16>8</Uint16>
   603  	<Uint32>9</Uint32>
   604  	<Uint64>10</Uint64>
   605  	<Uintptr>11</Uintptr>
   606  	<Float>12.0</Float>
   607  	<Float32>13.0</Float32>
   608  	<Float64>14.0</Float64>
   609  	<String>15</String>
   610  	<PtrString>16</PtrString>
   611  </allscalars>`
   612  
   613  func TestAllScalars(t *testing.T) {
   614  	var a allScalars
   615  	err := Unmarshal([]byte(testScalarsInput), &a)
   616  
   617  	if err != nil {
   618  		t.Fatal(err)
   619  	}
   620  	if !reflect.DeepEqual(a, all) {
   621  		t.Errorf("have %+v want %+v", a, all)
   622  	}
   623  }
   624  
   625  type item struct {
   626  	FieldA string
   627  }
   628  
   629  func TestIssue569(t *testing.T) {
   630  	data := `<item><FieldA>abcd</FieldA></item>`
   631  	var i item
   632  	err := Unmarshal([]byte(data), &i)
   633  
   634  	if err != nil || i.FieldA != "abcd" {
   635  		t.Fatal("Expecting abcd")
   636  	}
   637  }
   638  
   639  func TestUnquotedAttrs(t *testing.T) {
   640  	data := "<tag attr=azAZ09:-_\t>"
   641  	d := NewDecoder(strings.NewReader(data))
   642  	d.Strict = false
   643  	token, err := d.Token()
   644  	if _, ok := err.(*SyntaxError); ok {
   645  		t.Errorf("Unexpected error: %v", err)
   646  	}
   647  	if token.(StartElement).Name.Local != "tag" {
   648  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   649  	}
   650  	attr := token.(StartElement).Attr[0]
   651  	if attr.Value != "azAZ09:-_" {
   652  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   653  	}
   654  	if attr.Name.Local != "attr" {
   655  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   656  	}
   657  }
   658  
   659  func TestValuelessAttrs(t *testing.T) {
   660  	tests := [][3]string{
   661  		{"<p nowrap>", "p", "nowrap"},
   662  		{"<p nowrap >", "p", "nowrap"},
   663  		{"<input checked/>", "input", "checked"},
   664  		{"<input checked />", "input", "checked"},
   665  	}
   666  	for _, test := range tests {
   667  		d := NewDecoder(strings.NewReader(test[0]))
   668  		d.Strict = false
   669  		token, err := d.Token()
   670  		if _, ok := err.(*SyntaxError); ok {
   671  			t.Errorf("Unexpected error: %v", err)
   672  		}
   673  		if token.(StartElement).Name.Local != test[1] {
   674  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   675  		}
   676  		attr := token.(StartElement).Attr[0]
   677  		if attr.Value != test[2] {
   678  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   679  		}
   680  		if attr.Name.Local != test[2] {
   681  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   682  		}
   683  	}
   684  }
   685  
   686  func TestCopyTokenCharData(t *testing.T) {
   687  	data := []byte("same data")
   688  	var tok1 Token = CharData(data)
   689  	tok2 := CopyToken(tok1)
   690  	if !reflect.DeepEqual(tok1, tok2) {
   691  		t.Error("CopyToken(CharData) != CharData")
   692  	}
   693  	data[1] = 'o'
   694  	if reflect.DeepEqual(tok1, tok2) {
   695  		t.Error("CopyToken(CharData) uses same buffer.")
   696  	}
   697  }
   698  
   699  func TestCopyTokenStartElement(t *testing.T) {
   700  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   701  	var tok1 Token = elt
   702  	tok2 := CopyToken(tok1)
   703  	if tok1.(StartElement).Attr[0].Value != "en" {
   704  		t.Error("CopyToken overwrote Attr[0]")
   705  	}
   706  	if !reflect.DeepEqual(tok1, tok2) {
   707  		t.Error("CopyToken(StartElement) != StartElement")
   708  	}
   709  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   710  	if reflect.DeepEqual(tok1, tok2) {
   711  		t.Error("CopyToken(CharData) uses same buffer.")
   712  	}
   713  }
   714  
   715  func TestCopyTokenComment(t *testing.T) {
   716  	data := []byte("<!-- some comment -->")
   717  	var tok1 Token = Comment(data)
   718  	tok2 := CopyToken(tok1)
   719  	if !reflect.DeepEqual(tok1, tok2) {
   720  		t.Error("CopyToken(Comment) != Comment")
   721  	}
   722  	data[1] = 'o'
   723  	if reflect.DeepEqual(tok1, tok2) {
   724  		t.Error("CopyToken(Comment) uses same buffer.")
   725  	}
   726  }
   727  
   728  func TestSyntaxErrorLineNum(t *testing.T) {
   729  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   730  	d := NewDecoder(strings.NewReader(testInput))
   731  	var err error
   732  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   733  	}
   734  	synerr, ok := err.(*SyntaxError)
   735  	if !ok {
   736  		t.Error("Expected SyntaxError.")
   737  	}
   738  	if synerr.Line != 3 {
   739  		t.Error("SyntaxError didn't have correct line number.")
   740  	}
   741  }
   742  
   743  func TestTrailingRawToken(t *testing.T) {
   744  	input := `<FOO></FOO>  `
   745  	d := NewDecoder(strings.NewReader(input))
   746  	var err error
   747  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   748  	}
   749  	if err != io.EOF {
   750  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   751  	}
   752  }
   753  
   754  func TestTrailingToken(t *testing.T) {
   755  	input := `<FOO></FOO>  `
   756  	d := NewDecoder(strings.NewReader(input))
   757  	var err error
   758  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   759  	}
   760  	if err != io.EOF {
   761  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   762  	}
   763  }
   764  
   765  func TestEntityInsideCDATA(t *testing.T) {
   766  	input := `<test><![CDATA[ &val=foo ]]></test>`
   767  	d := NewDecoder(strings.NewReader(input))
   768  	var err error
   769  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   770  	}
   771  	if err != io.EOF {
   772  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   773  	}
   774  }
   775  
   776  var characterTests = []struct {
   777  	in  string
   778  	err string
   779  }{
   780  	{"\x12<doc/>", "illegal character code U+0012"},
   781  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   782  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   783  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   784  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   785  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   786  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   787  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   788  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   789  }
   790  
   791  func TestDisallowedCharacters(t *testing.T) {
   792  
   793  	for i, tt := range characterTests {
   794  		d := NewDecoder(strings.NewReader(tt.in))
   795  		var err error
   796  
   797  		for err == nil {
   798  			_, err = d.Token()
   799  		}
   800  		synerr, ok := err.(*SyntaxError)
   801  		if !ok {
   802  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   803  		}
   804  		if synerr.Msg != tt.err {
   805  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   806  		}
   807  	}
   808  }
   809  
   810  func TestIsInCharacterRange(t *testing.T) {
   811  	invalid := []rune{
   812  		utf8.MaxRune + 1,
   813  		0xD800, // surrogate min
   814  		0xDFFF, // surrogate max
   815  		-1,
   816  	}
   817  	for _, r := range invalid {
   818  		if isInCharacterRange(r) {
   819  			t.Errorf("rune %U considered valid", r)
   820  		}
   821  	}
   822  }
   823  
   824  var procInstTests = []struct {
   825  	input  string
   826  	expect [2]string
   827  }{
   828  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   829  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   830  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   831  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   832  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   833  }
   834  
   835  func TestProcInstEncoding(t *testing.T) {
   836  	for _, test := range procInstTests {
   837  		if got := procInst("version", test.input); got != test.expect[0] {
   838  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   839  		}
   840  		if got := procInst("encoding", test.input); got != test.expect[1] {
   841  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   842  		}
   843  	}
   844  }
   845  
   846  // Ensure that directives with comments include the complete
   847  // text of any nested directives.
   848  
   849  var directivesWithCommentsInput = `
   850  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   851  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   852  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   853  `
   854  
   855  var directivesWithCommentsTokens = []Token{
   856  	CharData("\n"),
   857  	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   858  	CharData("\n"),
   859  	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
   860  	CharData("\n"),
   861  	Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
   862  	CharData("\n"),
   863  }
   864  
   865  func TestDirectivesWithComments(t *testing.T) {
   866  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   867  
   868  	for i, want := range directivesWithCommentsTokens {
   869  		have, err := d.Token()
   870  		if err != nil {
   871  			t.Fatalf("token %d: unexpected error: %s", i, err)
   872  		}
   873  		if !reflect.DeepEqual(have, want) {
   874  			t.Errorf("token %d = %#v want %#v", i, have, want)
   875  		}
   876  	}
   877  }
   878  
   879  // Writer whose Write method always returns an error.
   880  type errWriter struct{}
   881  
   882  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   883  
   884  func TestEscapeTextIOErrors(t *testing.T) {
   885  	expectErr := "unwritable"
   886  	err := EscapeText(errWriter{}, []byte{'A'})
   887  
   888  	if err == nil || err.Error() != expectErr {
   889  		t.Errorf("have %v, want %v", err, expectErr)
   890  	}
   891  }
   892  
   893  func TestEscapeTextInvalidChar(t *testing.T) {
   894  	input := []byte("A \x00 terminated string.")
   895  	expected := "A \uFFFD terminated string."
   896  
   897  	buff := new(strings.Builder)
   898  	if err := EscapeText(buff, input); err != nil {
   899  		t.Fatalf("have %v, want nil", err)
   900  	}
   901  	text := buff.String()
   902  
   903  	if text != expected {
   904  		t.Errorf("have %v, want %v", text, expected)
   905  	}
   906  }
   907  
   908  func TestIssue5880(t *testing.T) {
   909  	type T []byte
   910  	data, err := Marshal(T{192, 168, 0, 1})
   911  	if err != nil {
   912  		t.Errorf("Marshal error: %v", err)
   913  	}
   914  	if !utf8.Valid(data) {
   915  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   916  	}
   917  }
   918  
   919  func TestIssue8535(t *testing.T) {
   920  
   921  	type ExampleConflict struct {
   922  		XMLName  Name   `xml:"example"`
   923  		Link     string `xml:"link"`
   924  		AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
   925  	}
   926  	testCase := `<example>
   927  			<title>Example</title>
   928  			<link>http://example.com/default</link> <!-- not assigned -->
   929  			<link>http://example.com/home</link> <!-- not assigned -->
   930  			<ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
   931  		</example>`
   932  
   933  	var dest ExampleConflict
   934  	d := NewDecoder(strings.NewReader(testCase))
   935  	if err := d.Decode(&dest); err != nil {
   936  		t.Fatal(err)
   937  	}
   938  }
   939  
   940  func TestEncodeXMLNS(t *testing.T) {
   941  	testCases := []struct {
   942  		f    func() ([]byte, error)
   943  		want string
   944  		ok   bool
   945  	}{
   946  		{encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   947  		{encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
   948  		{encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   949  		{encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
   950  	}
   951  
   952  	for i, tc := range testCases {
   953  		if b, err := tc.f(); err == nil {
   954  			if got, want := string(b), tc.want; got != want {
   955  				t.Errorf("%d: got %s, want %s \n", i, got, want)
   956  			}
   957  		} else {
   958  			t.Errorf("%d: marshal failed with %s", i, err)
   959  		}
   960  	}
   961  }
   962  
   963  func encodeXMLNS1() ([]byte, error) {
   964  
   965  	type T struct {
   966  		XMLName Name   `xml:"Test"`
   967  		Ns      string `xml:"xmlns,attr"`
   968  		Body    string
   969  	}
   970  
   971  	s := &T{Ns: "http://example.com/ns", Body: "hello world"}
   972  	return Marshal(s)
   973  }
   974  
   975  func encodeXMLNS2() ([]byte, error) {
   976  
   977  	type Test struct {
   978  		Body string `xml:"http://example.com/ns body"`
   979  	}
   980  
   981  	s := &Test{Body: "hello world"}
   982  	return Marshal(s)
   983  }
   984  
   985  func encodeXMLNS3() ([]byte, error) {
   986  
   987  	type Test struct {
   988  		XMLName Name `xml:"http://example.com/ns Test"`
   989  		Body    string
   990  	}
   991  
   992  	//s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
   993  	// as documentation states
   994  	s := &Test{Body: "hello world"}
   995  	return Marshal(s)
   996  }
   997  
   998  func encodeXMLNS4() ([]byte, error) {
   999  
  1000  	type Test struct {
  1001  		Ns   string `xml:"xmlns,attr"`
  1002  		Body string
  1003  	}
  1004  
  1005  	s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
  1006  	return Marshal(s)
  1007  }
  1008  
  1009  func TestIssue11405(t *testing.T) {
  1010  	testCases := []string{
  1011  		"<root>",
  1012  		"<root><foo>",
  1013  		"<root><foo></foo>",
  1014  	}
  1015  	for _, tc := range testCases {
  1016  		d := NewDecoder(strings.NewReader(tc))
  1017  		var err error
  1018  		for {
  1019  			_, err = d.Token()
  1020  			if err != nil {
  1021  				break
  1022  			}
  1023  		}
  1024  		if _, ok := err.(*SyntaxError); !ok {
  1025  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
  1026  		}
  1027  	}
  1028  }
  1029  
  1030  func TestIssue12417(t *testing.T) {
  1031  	testCases := []struct {
  1032  		s  string
  1033  		ok bool
  1034  	}{
  1035  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
  1036  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
  1037  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
  1038  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
  1039  	}
  1040  	for _, tc := range testCases {
  1041  		d := NewDecoder(strings.NewReader(tc.s))
  1042  		var err error
  1043  		for {
  1044  			_, err = d.Token()
  1045  			if err != nil {
  1046  				if err == io.EOF {
  1047  					err = nil
  1048  				}
  1049  				break
  1050  			}
  1051  		}
  1052  		if err != nil && tc.ok {
  1053  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
  1054  			continue
  1055  		}
  1056  		if err == nil && !tc.ok {
  1057  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
  1058  		}
  1059  	}
  1060  }
  1061  
  1062  func TestIssue7113(t *testing.T) {
  1063  	type C struct {
  1064  		XMLName Name `xml:""` // Sets empty namespace
  1065  	}
  1066  
  1067  	type D struct {
  1068  		XMLName Name `xml:"d"`
  1069  	}
  1070  
  1071  	type A struct {
  1072  		XMLName Name `xml:""`
  1073  		C       C    `xml:""`
  1074  		D       D
  1075  	}
  1076  
  1077  	var a A
  1078  	structSpace := "b"
  1079  	xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
  1080  	t.Log(xmlTest)
  1081  	err := Unmarshal([]byte(xmlTest), &a)
  1082  	if err != nil {
  1083  		t.Fatal(err)
  1084  	}
  1085  
  1086  	if a.XMLName.Space != structSpace {
  1087  		t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace)
  1088  	}
  1089  	if len(a.C.XMLName.Space) != 0 {
  1090  		t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space)
  1091  	}
  1092  
  1093  	var b []byte
  1094  	b, err = Marshal(&a)
  1095  	if err != nil {
  1096  		t.Fatal(err)
  1097  	}
  1098  	if len(a.C.XMLName.Space) != 0 {
  1099  		t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
  1100  	}
  1101  	if string(b) != xmlTest {
  1102  		t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest)
  1103  	}
  1104  	var c A
  1105  	err = Unmarshal(b, &c)
  1106  	if err != nil {
  1107  		t.Fatalf("second Unmarshal failed: %s", err)
  1108  	}
  1109  	if c.XMLName.Space != "b" {
  1110  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
  1111  	}
  1112  	if len(c.C.XMLName.Space) != 0 {
  1113  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
  1114  	}
  1115  }
  1116  
  1117  func TestIssue20396(t *testing.T) {
  1118  
  1119  	var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
  1120  
  1121  	testCases := []struct {
  1122  		s       string
  1123  		wantErr error
  1124  	}{
  1125  		{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
  1126  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1127  		{`<a:te=st xmlns:a="abcd"/>`, attrError},
  1128  		{`<a:te&st xmlns:a="abcd"/>`, attrError},
  1129  		{`<a:test xmlns:a="abcd"/>`, nil},
  1130  		{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
  1131  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1132  		{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
  1133  		{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
  1134  		{`<a:test xmlns:a="abcd">1</a:test>`, nil},
  1135  	}
  1136  
  1137  	var dest string
  1138  	for _, tc := range testCases {
  1139  		if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
  1140  			if got == nil {
  1141  				t.Errorf("%s: Unexpected success, want %v", tc.s, want)
  1142  			} else if want == nil {
  1143  				t.Errorf("%s: Unexpected error, got %v", tc.s, got)
  1144  			} else if got.Error() != want.Error() {
  1145  				t.Errorf("%s: got %v, want %v", tc.s, got, want)
  1146  			}
  1147  		}
  1148  	}
  1149  }
  1150  
  1151  func TestIssue20685(t *testing.T) {
  1152  	testCases := []struct {
  1153  		s  string
  1154  		ok bool
  1155  	}{
  1156  		{`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
  1157  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
  1158  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
  1159  		{`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
  1160  		{`<x:book xmlns:x="abcd">one</y:book>`, false},
  1161  		{`<x:book>one</y:book>`, false},
  1162  		{`<xbook>one</ybook>`, false},
  1163  	}
  1164  	for _, tc := range testCases {
  1165  		d := NewDecoder(strings.NewReader(tc.s))
  1166  		var err error
  1167  		for {
  1168  			_, err = d.Token()
  1169  			if err != nil {
  1170  				if err == io.EOF {
  1171  					err = nil
  1172  				}
  1173  				break
  1174  			}
  1175  		}
  1176  		if err != nil && tc.ok {
  1177  			t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
  1178  			continue
  1179  		}
  1180  		if err == nil && !tc.ok {
  1181  			t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
  1182  		}
  1183  	}
  1184  }
  1185  
  1186  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
  1187  	return func(src TokenReader) TokenReader {
  1188  		return mapper{
  1189  			t: src,
  1190  			f: mapping,
  1191  		}
  1192  	}
  1193  }
  1194  
  1195  type mapper struct {
  1196  	t TokenReader
  1197  	f func(Token) Token
  1198  }
  1199  
  1200  func (m mapper) Token() (Token, error) {
  1201  	tok, err := m.t.Token()
  1202  	if err != nil {
  1203  		return nil, err
  1204  	}
  1205  	return m.f(tok), nil
  1206  }
  1207  
  1208  func TestNewTokenDecoderIdempotent(t *testing.T) {
  1209  	d := NewDecoder(strings.NewReader(`<br>`))
  1210  	d2 := NewTokenDecoder(d)
  1211  	if d != d2 {
  1212  		t.Error("NewTokenDecoder did not detect underlying Decoder")
  1213  	}
  1214  }
  1215  
  1216  func TestWrapDecoder(t *testing.T) {
  1217  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
  1218  	m := tokenMap(func(t Token) Token {
  1219  		switch tok := t.(type) {
  1220  		case StartElement:
  1221  			if tok.Name.Local == "quote" {
  1222  				tok.Name.Local = "blocking"
  1223  				return tok
  1224  			}
  1225  		case EndElement:
  1226  			if tok.Name.Local == "quote" {
  1227  				tok.Name.Local = "blocking"
  1228  				return tok
  1229  			}
  1230  		}
  1231  		return t
  1232  	})
  1233  
  1234  	d = NewTokenDecoder(m(d))
  1235  
  1236  	o := struct {
  1237  		XMLName  Name   `xml:"blocking"`
  1238  		Chardata string `xml:",chardata"`
  1239  	}{}
  1240  
  1241  	if err := d.Decode(&o); err != nil {
  1242  		t.Fatal("Got unexpected error while decoding:", err)
  1243  	}
  1244  
  1245  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
  1246  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
  1247  	}
  1248  }
  1249  
  1250  type tokReader struct{}
  1251  
  1252  func (tokReader) Token() (Token, error) {
  1253  	return StartElement{}, nil
  1254  }
  1255  
  1256  type Failure struct{}
  1257  
  1258  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
  1259  	return nil
  1260  }
  1261  
  1262  func TestTokenUnmarshaler(t *testing.T) {
  1263  	defer func() {
  1264  		if r := recover(); r != nil {
  1265  			t.Error("Unexpected panic using custom token unmarshaler")
  1266  		}
  1267  	}()
  1268  
  1269  	d := NewTokenDecoder(tokReader{})
  1270  	d.Decode(&Failure{})
  1271  }
  1272  
  1273  func testRoundTrip(t *testing.T, input string) {
  1274  	d := NewDecoder(strings.NewReader(input))
  1275  	var tokens []Token
  1276  	var buf bytes.Buffer
  1277  	e := NewEncoder(&buf)
  1278  	for {
  1279  		tok, err := d.Token()
  1280  		if err == io.EOF {
  1281  			break
  1282  		}
  1283  		if err != nil {
  1284  			t.Fatalf("invalid input: %v", err)
  1285  		}
  1286  		if err := e.EncodeToken(tok); err != nil {
  1287  			t.Fatalf("failed to re-encode input: %v", err)
  1288  		}
  1289  		tokens = append(tokens, CopyToken(tok))
  1290  	}
  1291  	if err := e.Flush(); err != nil {
  1292  		t.Fatal(err)
  1293  	}
  1294  
  1295  	d = NewDecoder(&buf)
  1296  	for {
  1297  		tok, err := d.Token()
  1298  		if err == io.EOF {
  1299  			break
  1300  		}
  1301  		if err != nil {
  1302  			t.Fatalf("failed to decode output: %v", err)
  1303  		}
  1304  		if len(tokens) == 0 {
  1305  			t.Fatalf("unexpected token: %#v", tok)
  1306  		}
  1307  		a, b := tokens[0], tok
  1308  		if !reflect.DeepEqual(a, b) {
  1309  			t.Fatalf("token mismatch: %#v vs %#v", a, b)
  1310  		}
  1311  		tokens = tokens[1:]
  1312  	}
  1313  	if len(tokens) > 0 {
  1314  		t.Fatalf("lost tokens: %#v", tokens)
  1315  	}
  1316  }
  1317  
  1318  func TestRoundTrip(t *testing.T) {
  1319  	tests := map[string]string{
  1320  		"trailing colon":         `<foo abc:="x"></foo>`,
  1321  		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
  1322  	}
  1323  	for name, input := range tests {
  1324  		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
  1325  	}
  1326  }
  1327  
  1328  func TestParseErrors(t *testing.T) {
  1329  	withDefaultHeader := func(s string) string {
  1330  		return `<?xml version="1.0" encoding="UTF-8"?>` + s
  1331  	}
  1332  	tests := []struct {
  1333  		src string
  1334  		err string
  1335  	}{
  1336  		{withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
  1337  		{withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
  1338  		{withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
  1339  		{withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
  1340  		{withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
  1341  		{withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
  1342  		{withDefaultHeader("\xf1"), `invalid UTF-8`},
  1343  
  1344  		// Header-related errors.
  1345  		{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
  1346  
  1347  		// Cases below are for "no errors".
  1348  		{withDefaultHeader(`<?ok?>`), ``},
  1349  		{withDefaultHeader(`<?ok version="ok"?>`), ``},
  1350  	}
  1351  
  1352  	for _, test := range tests {
  1353  		d := NewDecoder(strings.NewReader(test.src))
  1354  		var err error
  1355  		for {
  1356  			_, err = d.Token()
  1357  			if err != nil {
  1358  				break
  1359  			}
  1360  		}
  1361  		if test.err == "" {
  1362  			if err != io.EOF {
  1363  				t.Errorf("parse %s: have %q error, expected none", test.src, err)
  1364  			}
  1365  			continue
  1366  		}
  1367  		// Inv: err != nil
  1368  		if err == io.EOF {
  1369  			t.Errorf("parse %s: unexpected EOF", test.src)
  1370  			continue
  1371  		}
  1372  		if !strings.Contains(err.Error(), test.err) {
  1373  			t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err)
  1374  			continue
  1375  		}
  1376  	}
  1377  }
  1378  
  1379  const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
  1380  <br>
  1381  <br/><br/>
  1382  <br><br>
  1383  <br></br>
  1384  <BR>
  1385  <BR/><BR/>
  1386  <Br></Br>
  1387  <BR><span id="test">abc</span><br/><br/>`
  1388  
  1389  func BenchmarkHTMLAutoClose(b *testing.B) {
  1390  	b.RunParallel(func(p *testing.PB) {
  1391  		for p.Next() {
  1392  			d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1393  			d.Strict = false
  1394  			d.AutoClose = HTMLAutoClose
  1395  			d.Entity = HTMLEntity
  1396  			for {
  1397  				_, err := d.Token()
  1398  				if err != nil {
  1399  					if err == io.EOF {
  1400  						break
  1401  					}
  1402  					b.Fatalf("unexpected error: %v", err)
  1403  				}
  1404  			}
  1405  		}
  1406  	})
  1407  }
  1408  
  1409  func TestHTMLAutoClose(t *testing.T) {
  1410  	wantTokens := []Token{
  1411  		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
  1412  		CharData("\n"),
  1413  		StartElement{Name{"", "br"}, []Attr{}},
  1414  		EndElement{Name{"", "br"}},
  1415  		CharData("\n"),
  1416  		StartElement{Name{"", "br"}, []Attr{}},
  1417  		EndElement{Name{"", "br"}},
  1418  		StartElement{Name{"", "br"}, []Attr{}},
  1419  		EndElement{Name{"", "br"}},
  1420  		CharData("\n"),
  1421  		StartElement{Name{"", "br"}, []Attr{}},
  1422  		EndElement{Name{"", "br"}},
  1423  		StartElement{Name{"", "br"}, []Attr{}},
  1424  		EndElement{Name{"", "br"}},
  1425  		CharData("\n"),
  1426  		StartElement{Name{"", "br"}, []Attr{}},
  1427  		EndElement{Name{"", "br"}},
  1428  		CharData("\n"),
  1429  		StartElement{Name{"", "BR"}, []Attr{}},
  1430  		EndElement{Name{"", "BR"}},
  1431  		CharData("\n"),
  1432  		StartElement{Name{"", "BR"}, []Attr{}},
  1433  		EndElement{Name{"", "BR"}},
  1434  		StartElement{Name{"", "BR"}, []Attr{}},
  1435  		EndElement{Name{"", "BR"}},
  1436  		CharData("\n"),
  1437  		StartElement{Name{"", "Br"}, []Attr{}},
  1438  		EndElement{Name{"", "Br"}},
  1439  		CharData("\n"),
  1440  		StartElement{Name{"", "BR"}, []Attr{}},
  1441  		EndElement{Name{"", "BR"}},
  1442  		StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
  1443  		CharData("abc"),
  1444  		EndElement{Name{"", "span"}},
  1445  		StartElement{Name{"", "br"}, []Attr{}},
  1446  		EndElement{Name{"", "br"}},
  1447  		StartElement{Name{"", "br"}, []Attr{}},
  1448  		EndElement{Name{"", "br"}},
  1449  	}
  1450  
  1451  	d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1452  	d.Strict = false
  1453  	d.AutoClose = HTMLAutoClose
  1454  	d.Entity = HTMLEntity
  1455  	var haveTokens []Token
  1456  	for {
  1457  		tok, err := d.Token()
  1458  		if err != nil {
  1459  			if err == io.EOF {
  1460  				break
  1461  			}
  1462  			t.Fatalf("unexpected error: %v", err)
  1463  		}
  1464  		haveTokens = append(haveTokens, CopyToken(tok))
  1465  	}
  1466  	if len(haveTokens) != len(wantTokens) {
  1467  		t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
  1468  	}
  1469  	for i, want := range wantTokens {
  1470  		if i >= len(haveTokens) {
  1471  			t.Errorf("token[%d] expected %#v, have no token", i, want)
  1472  		} else {
  1473  			have := haveTokens[i]
  1474  			if !reflect.DeepEqual(have, want) {
  1475  				t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
  1476  			}
  1477  		}
  1478  	}
  1479  }
  1480  

View as plain text