...
Run Format

Source file src/encoding/xml/xml_test.go

Documentation: encoding/xml

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  const testInput = `
    18  <?xml version="1.0" encoding="UTF-8"?>
    19  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22  	"\r\n\t" + `  >
    23    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24    <query>&何; &is-it;</query>
    25    <goodbye />
    26    <outer foo:attr="value" xmlns:tag="ns4">
    27      <inner/>
    28    </outer>
    29    <tag:name>
    30      <![CDATA[Some text here.]]>
    31    </tag:name>
    32  </body><!-- missing final newline -->`
    33  
    34  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35  
    36  var rawTokens = []Token{
    37  	CharData("\n"),
    38  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39  	CharData("\n"),
    40  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42  	CharData("\n"),
    43  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44  	CharData("\n  "),
    45  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46  	CharData("World <>'\" 白鵬翔"),
    47  	EndElement{Name{"", "hello"}},
    48  	CharData("\n  "),
    49  	StartElement{Name{"", "query"}, []Attr{}},
    50  	CharData("What is it?"),
    51  	EndElement{Name{"", "query"}},
    52  	CharData("\n  "),
    53  	StartElement{Name{"", "goodbye"}, []Attr{}},
    54  	EndElement{Name{"", "goodbye"}},
    55  	CharData("\n  "),
    56  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57  	CharData("\n    "),
    58  	StartElement{Name{"", "inner"}, []Attr{}},
    59  	EndElement{Name{"", "inner"}},
    60  	CharData("\n  "),
    61  	EndElement{Name{"", "outer"}},
    62  	CharData("\n  "),
    63  	StartElement{Name{"tag", "name"}, []Attr{}},
    64  	CharData("\n    "),
    65  	CharData("Some text here."),
    66  	CharData("\n  "),
    67  	EndElement{Name{"tag", "name"}},
    68  	CharData("\n"),
    69  	EndElement{Name{"", "body"}},
    70  	Comment(" missing final newline "),
    71  }
    72  
    73  var cookedTokens = []Token{
    74  	CharData("\n"),
    75  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76  	CharData("\n"),
    77  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79  	CharData("\n"),
    80  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81  	CharData("\n  "),
    82  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83  	CharData("World <>'\" 白鵬翔"),
    84  	EndElement{Name{"ns2", "hello"}},
    85  	CharData("\n  "),
    86  	StartElement{Name{"ns2", "query"}, []Attr{}},
    87  	CharData("What is it?"),
    88  	EndElement{Name{"ns2", "query"}},
    89  	CharData("\n  "),
    90  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91  	EndElement{Name{"ns2", "goodbye"}},
    92  	CharData("\n  "),
    93  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94  	CharData("\n    "),
    95  	StartElement{Name{"ns2", "inner"}, []Attr{}},
    96  	EndElement{Name{"ns2", "inner"}},
    97  	CharData("\n  "),
    98  	EndElement{Name{"ns2", "outer"}},
    99  	CharData("\n  "),
   100  	StartElement{Name{"ns3", "name"}, []Attr{}},
   101  	CharData("\n    "),
   102  	CharData("Some text here."),
   103  	CharData("\n  "),
   104  	EndElement{Name{"ns3", "name"}},
   105  	CharData("\n"),
   106  	EndElement{Name{"ns2", "body"}},
   107  	Comment(" missing final newline "),
   108  }
   109  
   110  const testInputAltEncoding = `
   111  <?xml version="1.0" encoding="x-testing-uppercase"?>
   112  <TAG>VALUE</TAG>`
   113  
   114  var rawTokensAltEncoding = []Token{
   115  	CharData("\n"),
   116  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117  	CharData("\n"),
   118  	StartElement{Name{"", "tag"}, []Attr{}},
   119  	CharData("value"),
   120  	EndElement{Name{"", "tag"}},
   121  }
   122  
   123  var xmlInput = []string{
   124  	// unexpected EOF cases
   125  	"<",
   126  	"<t",
   127  	"<t ",
   128  	"<t/",
   129  	"<!",
   130  	"<!-",
   131  	"<!--",
   132  	"<!--c-",
   133  	"<!--c--",
   134  	"<!d",
   135  	"<t></",
   136  	"<t></t",
   137  	"<?",
   138  	"<?p",
   139  	"<t a",
   140  	"<t a=",
   141  	"<t a='",
   142  	"<t a=''",
   143  	"<t/><![",
   144  	"<t/><![C",
   145  	"<t/><![CDATA[d",
   146  	"<t/><![CDATA[d]",
   147  	"<t/><![CDATA[d]]",
   148  
   149  	// other Syntax errors
   150  	"<>",
   151  	"<t/a",
   152  	"<0 />",
   153  	"<?0 >",
   154  	//	"<!0 >",	// let the Token() caller handle
   155  	"</0>",
   156  	"<t 0=''>",
   157  	"<t a='&'>",
   158  	"<t a='<'>",
   159  	"<t>&nbspc;</t>",
   160  	"<t a>",
   161  	"<t a=>",
   162  	"<t a=v>",
   163  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   164  	"<t></e>",
   165  	"<t></>",
   166  	"<t></t!",
   167  	"<t>cdata]]></t>",
   168  }
   169  
   170  func TestRawToken(t *testing.T) {
   171  	d := NewDecoder(strings.NewReader(testInput))
   172  	d.Entity = testEntity
   173  	testRawToken(t, d, testInput, rawTokens)
   174  }
   175  
   176  const nonStrictInput = `
   177  <tag>non&entity</tag>
   178  <tag>&unknown;entity</tag>
   179  <tag>&#123</tag>
   180  <tag>&#zzz;</tag>
   181  <tag>&なまえ3;</tag>
   182  <tag>&lt-gt;</tag>
   183  <tag>&;</tag>
   184  <tag>&0a;</tag>
   185  `
   186  
   187  var nonStrictTokens = []Token{
   188  	CharData("\n"),
   189  	StartElement{Name{"", "tag"}, []Attr{}},
   190  	CharData("non&entity"),
   191  	EndElement{Name{"", "tag"}},
   192  	CharData("\n"),
   193  	StartElement{Name{"", "tag"}, []Attr{}},
   194  	CharData("&unknown;entity"),
   195  	EndElement{Name{"", "tag"}},
   196  	CharData("\n"),
   197  	StartElement{Name{"", "tag"}, []Attr{}},
   198  	CharData("&#123"),
   199  	EndElement{Name{"", "tag"}},
   200  	CharData("\n"),
   201  	StartElement{Name{"", "tag"}, []Attr{}},
   202  	CharData("&#zzz;"),
   203  	EndElement{Name{"", "tag"}},
   204  	CharData("\n"),
   205  	StartElement{Name{"", "tag"}, []Attr{}},
   206  	CharData("&なまえ3;"),
   207  	EndElement{Name{"", "tag"}},
   208  	CharData("\n"),
   209  	StartElement{Name{"", "tag"}, []Attr{}},
   210  	CharData("&lt-gt;"),
   211  	EndElement{Name{"", "tag"}},
   212  	CharData("\n"),
   213  	StartElement{Name{"", "tag"}, []Attr{}},
   214  	CharData("&;"),
   215  	EndElement{Name{"", "tag"}},
   216  	CharData("\n"),
   217  	StartElement{Name{"", "tag"}, []Attr{}},
   218  	CharData("&0a;"),
   219  	EndElement{Name{"", "tag"}},
   220  	CharData("\n"),
   221  }
   222  
   223  func TestNonStrictRawToken(t *testing.T) {
   224  	d := NewDecoder(strings.NewReader(nonStrictInput))
   225  	d.Strict = false
   226  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   227  }
   228  
   229  type downCaser struct {
   230  	t *testing.T
   231  	r io.ByteReader
   232  }
   233  
   234  func (d *downCaser) ReadByte() (c byte, err error) {
   235  	c, err = d.r.ReadByte()
   236  	if c >= 'A' && c <= 'Z' {
   237  		c += 'a' - 'A'
   238  	}
   239  	return
   240  }
   241  
   242  func (d *downCaser) Read(p []byte) (int, error) {
   243  	d.t.Fatalf("unexpected Read call on downCaser reader")
   244  	panic("unreachable")
   245  }
   246  
   247  func TestRawTokenAltEncoding(t *testing.T) {
   248  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   249  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   250  		if charset != "x-testing-uppercase" {
   251  			t.Fatalf("unexpected charset %q", charset)
   252  		}
   253  		return &downCaser{t, input.(io.ByteReader)}, nil
   254  	}
   255  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   256  }
   257  
   258  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   259  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   260  	token, err := d.RawToken()
   261  	if token == nil {
   262  		t.Fatalf("expected a token on first RawToken call")
   263  	}
   264  	if err != nil {
   265  		t.Fatal(err)
   266  	}
   267  	token, err = d.RawToken()
   268  	if token != nil {
   269  		t.Errorf("expected a nil token; got %#v", token)
   270  	}
   271  	if err == nil {
   272  		t.Fatalf("expected an error on second RawToken call")
   273  	}
   274  	const encoding = "x-testing-uppercase"
   275  	if !strings.Contains(err.Error(), encoding) {
   276  		t.Errorf("expected error to contain %q; got error: %v",
   277  			encoding, err)
   278  	}
   279  }
   280  
   281  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   282  	lastEnd := int64(0)
   283  	for i, want := range rawTokens {
   284  		start := d.InputOffset()
   285  		have, err := d.RawToken()
   286  		end := d.InputOffset()
   287  		if err != nil {
   288  			t.Fatalf("token %d: unexpected error: %s", i, err)
   289  		}
   290  		if !reflect.DeepEqual(have, want) {
   291  			var shave, swant string
   292  			if _, ok := have.(CharData); ok {
   293  				shave = fmt.Sprintf("CharData(%q)", have)
   294  			} else {
   295  				shave = fmt.Sprintf("%#v", have)
   296  			}
   297  			if _, ok := want.(CharData); ok {
   298  				swant = fmt.Sprintf("CharData(%q)", want)
   299  			} else {
   300  				swant = fmt.Sprintf("%#v", want)
   301  			}
   302  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   303  		}
   304  
   305  		// Check that InputOffset returned actual token.
   306  		switch {
   307  		case start < lastEnd:
   308  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   309  		case start >= end:
   310  			// Special case: EndElement can be synthesized.
   311  			if start == end && end == lastEnd {
   312  				break
   313  			}
   314  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   315  		case end > int64(len(raw)):
   316  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   317  		default:
   318  			text := raw[start:end]
   319  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   320  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   321  			}
   322  		}
   323  		lastEnd = end
   324  	}
   325  }
   326  
   327  // Ensure that directives (specifically !DOCTYPE) include the complete
   328  // text of any nested directives, noting that < and > do not change
   329  // nesting depth if they are in single or double quotes.
   330  
   331  var nestedDirectivesInput = `
   332  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   333  <!DOCTYPE [<!ENTITY xlt ">">]>
   334  <!DOCTYPE [<!ENTITY xlt "<">]>
   335  <!DOCTYPE [<!ENTITY xlt '>'>]>
   336  <!DOCTYPE [<!ENTITY xlt '<'>]>
   337  <!DOCTYPE [<!ENTITY xlt '">'>]>
   338  <!DOCTYPE [<!ENTITY xlt "'<">]>
   339  `
   340  
   341  var nestedDirectivesTokens = []Token{
   342  	CharData("\n"),
   343  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   344  	CharData("\n"),
   345  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   346  	CharData("\n"),
   347  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   348  	CharData("\n"),
   349  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   350  	CharData("\n"),
   351  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   352  	CharData("\n"),
   353  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   354  	CharData("\n"),
   355  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   356  	CharData("\n"),
   357  }
   358  
   359  func TestNestedDirectives(t *testing.T) {
   360  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   361  
   362  	for i, want := range nestedDirectivesTokens {
   363  		have, err := d.Token()
   364  		if err != nil {
   365  			t.Fatalf("token %d: unexpected error: %s", i, err)
   366  		}
   367  		if !reflect.DeepEqual(have, want) {
   368  			t.Errorf("token %d = %#v want %#v", i, have, want)
   369  		}
   370  	}
   371  }
   372  
   373  func TestToken(t *testing.T) {
   374  	d := NewDecoder(strings.NewReader(testInput))
   375  	d.Entity = testEntity
   376  
   377  	for i, want := range cookedTokens {
   378  		have, err := d.Token()
   379  		if err != nil {
   380  			t.Fatalf("token %d: unexpected error: %s", i, err)
   381  		}
   382  		if !reflect.DeepEqual(have, want) {
   383  			t.Errorf("token %d = %#v want %#v", i, have, want)
   384  		}
   385  	}
   386  }
   387  
   388  func TestSyntax(t *testing.T) {
   389  	for i := range xmlInput {
   390  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   391  		var err error
   392  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   393  		}
   394  		if _, ok := err.(*SyntaxError); !ok {
   395  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   396  		}
   397  	}
   398  }
   399  
   400  type allScalars struct {
   401  	True1     bool
   402  	True2     bool
   403  	False1    bool
   404  	False2    bool
   405  	Int       int
   406  	Int8      int8
   407  	Int16     int16
   408  	Int32     int32
   409  	Int64     int64
   410  	Uint      int
   411  	Uint8     uint8
   412  	Uint16    uint16
   413  	Uint32    uint32
   414  	Uint64    uint64
   415  	Uintptr   uintptr
   416  	Float32   float32
   417  	Float64   float64
   418  	String    string
   419  	PtrString *string
   420  }
   421  
   422  var all = allScalars{
   423  	True1:     true,
   424  	True2:     true,
   425  	False1:    false,
   426  	False2:    false,
   427  	Int:       1,
   428  	Int8:      -2,
   429  	Int16:     3,
   430  	Int32:     -4,
   431  	Int64:     5,
   432  	Uint:      6,
   433  	Uint8:     7,
   434  	Uint16:    8,
   435  	Uint32:    9,
   436  	Uint64:    10,
   437  	Uintptr:   11,
   438  	Float32:   13.0,
   439  	Float64:   14.0,
   440  	String:    "15",
   441  	PtrString: &sixteen,
   442  }
   443  
   444  var sixteen = "16"
   445  
   446  const testScalarsInput = `<allscalars>
   447  	<True1>true</True1>
   448  	<True2>1</True2>
   449  	<False1>false</False1>
   450  	<False2>0</False2>
   451  	<Int>1</Int>
   452  	<Int8>-2</Int8>
   453  	<Int16>3</Int16>
   454  	<Int32>-4</Int32>
   455  	<Int64>5</Int64>
   456  	<Uint>6</Uint>
   457  	<Uint8>7</Uint8>
   458  	<Uint16>8</Uint16>
   459  	<Uint32>9</Uint32>
   460  	<Uint64>10</Uint64>
   461  	<Uintptr>11</Uintptr>
   462  	<Float>12.0</Float>
   463  	<Float32>13.0</Float32>
   464  	<Float64>14.0</Float64>
   465  	<String>15</String>
   466  	<PtrString>16</PtrString>
   467  </allscalars>`
   468  
   469  func TestAllScalars(t *testing.T) {
   470  	var a allScalars
   471  	err := Unmarshal([]byte(testScalarsInput), &a)
   472  
   473  	if err != nil {
   474  		t.Fatal(err)
   475  	}
   476  	if !reflect.DeepEqual(a, all) {
   477  		t.Errorf("have %+v want %+v", a, all)
   478  	}
   479  }
   480  
   481  type item struct {
   482  	FieldA string
   483  }
   484  
   485  func TestIssue569(t *testing.T) {
   486  	data := `<item><FieldA>abcd</FieldA></item>`
   487  	var i item
   488  	err := Unmarshal([]byte(data), &i)
   489  
   490  	if err != nil || i.FieldA != "abcd" {
   491  		t.Fatal("Expecting abcd")
   492  	}
   493  }
   494  
   495  func TestUnquotedAttrs(t *testing.T) {
   496  	data := "<tag attr=azAZ09:-_\t>"
   497  	d := NewDecoder(strings.NewReader(data))
   498  	d.Strict = false
   499  	token, err := d.Token()
   500  	if _, ok := err.(*SyntaxError); ok {
   501  		t.Errorf("Unexpected error: %v", err)
   502  	}
   503  	if token.(StartElement).Name.Local != "tag" {
   504  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   505  	}
   506  	attr := token.(StartElement).Attr[0]
   507  	if attr.Value != "azAZ09:-_" {
   508  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   509  	}
   510  	if attr.Name.Local != "attr" {
   511  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   512  	}
   513  }
   514  
   515  func TestValuelessAttrs(t *testing.T) {
   516  	tests := [][3]string{
   517  		{"<p nowrap>", "p", "nowrap"},
   518  		{"<p nowrap >", "p", "nowrap"},
   519  		{"<input checked/>", "input", "checked"},
   520  		{"<input checked />", "input", "checked"},
   521  	}
   522  	for _, test := range tests {
   523  		d := NewDecoder(strings.NewReader(test[0]))
   524  		d.Strict = false
   525  		token, err := d.Token()
   526  		if _, ok := err.(*SyntaxError); ok {
   527  			t.Errorf("Unexpected error: %v", err)
   528  		}
   529  		if token.(StartElement).Name.Local != test[1] {
   530  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   531  		}
   532  		attr := token.(StartElement).Attr[0]
   533  		if attr.Value != test[2] {
   534  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   535  		}
   536  		if attr.Name.Local != test[2] {
   537  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   538  		}
   539  	}
   540  }
   541  
   542  func TestCopyTokenCharData(t *testing.T) {
   543  	data := []byte("same data")
   544  	var tok1 Token = CharData(data)
   545  	tok2 := CopyToken(tok1)
   546  	if !reflect.DeepEqual(tok1, tok2) {
   547  		t.Error("CopyToken(CharData) != CharData")
   548  	}
   549  	data[1] = 'o'
   550  	if reflect.DeepEqual(tok1, tok2) {
   551  		t.Error("CopyToken(CharData) uses same buffer.")
   552  	}
   553  }
   554  
   555  func TestCopyTokenStartElement(t *testing.T) {
   556  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   557  	var tok1 Token = elt
   558  	tok2 := CopyToken(tok1)
   559  	if tok1.(StartElement).Attr[0].Value != "en" {
   560  		t.Error("CopyToken overwrote Attr[0]")
   561  	}
   562  	if !reflect.DeepEqual(tok1, tok2) {
   563  		t.Error("CopyToken(StartElement) != StartElement")
   564  	}
   565  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   566  	if reflect.DeepEqual(tok1, tok2) {
   567  		t.Error("CopyToken(CharData) uses same buffer.")
   568  	}
   569  }
   570  
   571  func TestSyntaxErrorLineNum(t *testing.T) {
   572  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   573  	d := NewDecoder(strings.NewReader(testInput))
   574  	var err error
   575  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   576  	}
   577  	synerr, ok := err.(*SyntaxError)
   578  	if !ok {
   579  		t.Error("Expected SyntaxError.")
   580  	}
   581  	if synerr.Line != 3 {
   582  		t.Error("SyntaxError didn't have correct line number.")
   583  	}
   584  }
   585  
   586  func TestTrailingRawToken(t *testing.T) {
   587  	input := `<FOO></FOO>  `
   588  	d := NewDecoder(strings.NewReader(input))
   589  	var err error
   590  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   591  	}
   592  	if err != io.EOF {
   593  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   594  	}
   595  }
   596  
   597  func TestTrailingToken(t *testing.T) {
   598  	input := `<FOO></FOO>  `
   599  	d := NewDecoder(strings.NewReader(input))
   600  	var err error
   601  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   602  	}
   603  	if err != io.EOF {
   604  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   605  	}
   606  }
   607  
   608  func TestEntityInsideCDATA(t *testing.T) {
   609  	input := `<test><![CDATA[ &val=foo ]]></test>`
   610  	d := NewDecoder(strings.NewReader(input))
   611  	var err error
   612  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   613  	}
   614  	if err != io.EOF {
   615  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   616  	}
   617  }
   618  
   619  var characterTests = []struct {
   620  	in  string
   621  	err string
   622  }{
   623  	{"\x12<doc/>", "illegal character code U+0012"},
   624  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   625  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   626  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   627  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   628  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   629  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   630  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   631  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   632  }
   633  
   634  func TestDisallowedCharacters(t *testing.T) {
   635  
   636  	for i, tt := range characterTests {
   637  		d := NewDecoder(strings.NewReader(tt.in))
   638  		var err error
   639  
   640  		for err == nil {
   641  			_, err = d.Token()
   642  		}
   643  		synerr, ok := err.(*SyntaxError)
   644  		if !ok {
   645  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   646  		}
   647  		if synerr.Msg != tt.err {
   648  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   649  		}
   650  	}
   651  }
   652  
   653  func TestIsInCharacterRange(t *testing.T) {
   654  	invalid := []rune{
   655  		utf8.MaxRune + 1,
   656  		0xD800, // surrogate min
   657  		0xDFFF, // surrogate max
   658  		-1,
   659  	}
   660  	for _, r := range invalid {
   661  		if isInCharacterRange(r) {
   662  			t.Errorf("rune %U considered valid", r)
   663  		}
   664  	}
   665  }
   666  
   667  var procInstTests = []struct {
   668  	input  string
   669  	expect [2]string
   670  }{
   671  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   672  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   673  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   674  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   675  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   676  }
   677  
   678  func TestProcInstEncoding(t *testing.T) {
   679  	for _, test := range procInstTests {
   680  		if got := procInst("version", test.input); got != test.expect[0] {
   681  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   682  		}
   683  		if got := procInst("encoding", test.input); got != test.expect[1] {
   684  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   685  		}
   686  	}
   687  }
   688  
   689  // Ensure that directives with comments include the complete
   690  // text of any nested directives.
   691  
   692  var directivesWithCommentsInput = `
   693  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   694  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   695  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   696  `
   697  
   698  var directivesWithCommentsTokens = []Token{
   699  	CharData("\n"),
   700  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   701  	CharData("\n"),
   702  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   703  	CharData("\n"),
   704  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   705  	CharData("\n"),
   706  }
   707  
   708  func TestDirectivesWithComments(t *testing.T) {
   709  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   710  
   711  	for i, want := range directivesWithCommentsTokens {
   712  		have, err := d.Token()
   713  		if err != nil {
   714  			t.Fatalf("token %d: unexpected error: %s", i, err)
   715  		}
   716  		if !reflect.DeepEqual(have, want) {
   717  			t.Errorf("token %d = %#v want %#v", i, have, want)
   718  		}
   719  	}
   720  }
   721  
   722  // Writer whose Write method always returns an error.
   723  type errWriter struct{}
   724  
   725  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   726  
   727  func TestEscapeTextIOErrors(t *testing.T) {
   728  	expectErr := "unwritable"
   729  	err := EscapeText(errWriter{}, []byte{'A'})
   730  
   731  	if err == nil || err.Error() != expectErr {
   732  		t.Errorf("have %v, want %v", err, expectErr)
   733  	}
   734  }
   735  
   736  func TestEscapeTextInvalidChar(t *testing.T) {
   737  	input := []byte("A \x00 terminated string.")
   738  	expected := "A \uFFFD terminated string."
   739  
   740  	buff := new(bytes.Buffer)
   741  	if err := EscapeText(buff, input); err != nil {
   742  		t.Fatalf("have %v, want nil", err)
   743  	}
   744  	text := buff.String()
   745  
   746  	if text != expected {
   747  		t.Errorf("have %v, want %v", text, expected)
   748  	}
   749  }
   750  
   751  func TestIssue5880(t *testing.T) {
   752  	type T []byte
   753  	data, err := Marshal(T{192, 168, 0, 1})
   754  	if err != nil {
   755  		t.Errorf("Marshal error: %v", err)
   756  	}
   757  	if !utf8.Valid(data) {
   758  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   759  	}
   760  }
   761  
   762  func TestIssue11405(t *testing.T) {
   763  	testCases := []string{
   764  		"<root>",
   765  		"<root><foo>",
   766  		"<root><foo></foo>",
   767  	}
   768  	for _, tc := range testCases {
   769  		d := NewDecoder(strings.NewReader(tc))
   770  		var err error
   771  		for {
   772  			_, err = d.Token()
   773  			if err != nil {
   774  				break
   775  			}
   776  		}
   777  		if _, ok := err.(*SyntaxError); !ok {
   778  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
   779  		}
   780  	}
   781  }
   782  
   783  func TestIssue12417(t *testing.T) {
   784  	testCases := []struct {
   785  		s  string
   786  		ok bool
   787  	}{
   788  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
   789  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
   790  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
   791  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
   792  	}
   793  	for _, tc := range testCases {
   794  		d := NewDecoder(strings.NewReader(tc.s))
   795  		var err error
   796  		for {
   797  			_, err = d.Token()
   798  			if err != nil {
   799  				if err == io.EOF {
   800  					err = nil
   801  				}
   802  				break
   803  			}
   804  		}
   805  		if err != nil && tc.ok {
   806  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
   807  			continue
   808  		}
   809  		if err == nil && !tc.ok {
   810  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
   811  		}
   812  	}
   813  }
   814  
   815  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
   816  	return func(src TokenReader) TokenReader {
   817  		return mapper{
   818  			t: src,
   819  			f: mapping,
   820  		}
   821  	}
   822  }
   823  
   824  type mapper struct {
   825  	t TokenReader
   826  	f func(Token) Token
   827  }
   828  
   829  func (m mapper) Token() (Token, error) {
   830  	tok, err := m.t.Token()
   831  	if err != nil {
   832  		return nil, err
   833  	}
   834  	return m.f(tok), nil
   835  }
   836  
   837  func TestNewTokenDecoderIdempotent(t *testing.T) {
   838  	d := NewDecoder(strings.NewReader(`<br/>`))
   839  	d2 := NewTokenDecoder(d)
   840  	if d != d2 {
   841  		t.Error("NewTokenDecoder did not detect underlying Decoder")
   842  	}
   843  }
   844  
   845  func TestWrapDecoder(t *testing.T) {
   846  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
   847  	m := tokenMap(func(t Token) Token {
   848  		switch tok := t.(type) {
   849  		case StartElement:
   850  			if tok.Name.Local == "quote" {
   851  				tok.Name.Local = "blocking"
   852  				return tok
   853  			}
   854  		case EndElement:
   855  			if tok.Name.Local == "quote" {
   856  				tok.Name.Local = "blocking"
   857  				return tok
   858  			}
   859  		}
   860  		return t
   861  	})
   862  
   863  	d = NewTokenDecoder(m(d))
   864  
   865  	o := struct {
   866  		XMLName  Name   `xml:"blocking"`
   867  		Chardata string `xml:",chardata"`
   868  	}{}
   869  
   870  	if err := d.Decode(&o); err != nil {
   871  		t.Fatal("Got unexpected error while decoding:", err)
   872  	}
   873  
   874  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
   875  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
   876  	}
   877  }
   878  
   879  type tokReader struct{}
   880  
   881  func (tokReader) Token() (Token, error) {
   882  	return StartElement{}, nil
   883  }
   884  
   885  type Failure struct{}
   886  
   887  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
   888  	return nil
   889  }
   890  
   891  func TestTokenUnmarshaler(t *testing.T) {
   892  	defer func() {
   893  		if r := recover(); r != nil {
   894  			t.Error("Unexpected panic using custom token unmarshaler")
   895  		}
   896  	}()
   897  
   898  	d := NewTokenDecoder(tokReader{})
   899  	d.Decode(&Failure{})
   900  }
   901  

View as plain text