...
Run Format

Source file src/encoding/xml/xml_test.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package xml
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"io"
    11		"reflect"
    12		"strings"
    13		"testing"
    14		"unicode/utf8"
    15	)
    16	
    17	const testInput = `
    18	<?xml version="1.0" encoding="UTF-8"?>
    19	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21	<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22		"\r\n\t" + `  >
    23	  <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24	  <query>&何; &is-it;</query>
    25	  <goodbye />
    26	  <outer foo:attr="value" xmlns:tag="ns4">
    27	    <inner/>
    28	  </outer>
    29	  <tag:name>
    30	    <![CDATA[Some text here.]]>
    31	  </tag:name>
    32	</body><!-- missing final newline -->`
    33	
    34	var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35	
    36	var rawTokens = []Token{
    37		CharData("\n"),
    38		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39		CharData("\n"),
    40		Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42		CharData("\n"),
    43		StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44		CharData("\n  "),
    45		StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46		CharData("World <>'\" 白鵬翔"),
    47		EndElement{Name{"", "hello"}},
    48		CharData("\n  "),
    49		StartElement{Name{"", "query"}, []Attr{}},
    50		CharData("What is it?"),
    51		EndElement{Name{"", "query"}},
    52		CharData("\n  "),
    53		StartElement{Name{"", "goodbye"}, []Attr{}},
    54		EndElement{Name{"", "goodbye"}},
    55		CharData("\n  "),
    56		StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57		CharData("\n    "),
    58		StartElement{Name{"", "inner"}, []Attr{}},
    59		EndElement{Name{"", "inner"}},
    60		CharData("\n  "),
    61		EndElement{Name{"", "outer"}},
    62		CharData("\n  "),
    63		StartElement{Name{"tag", "name"}, []Attr{}},
    64		CharData("\n    "),
    65		CharData("Some text here."),
    66		CharData("\n  "),
    67		EndElement{Name{"tag", "name"}},
    68		CharData("\n"),
    69		EndElement{Name{"", "body"}},
    70		Comment(" missing final newline "),
    71	}
    72	
    73	var cookedTokens = []Token{
    74		CharData("\n"),
    75		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76		CharData("\n"),
    77		Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79		CharData("\n"),
    80		StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81		CharData("\n  "),
    82		StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83		CharData("World <>'\" 白鵬翔"),
    84		EndElement{Name{"ns2", "hello"}},
    85		CharData("\n  "),
    86		StartElement{Name{"ns2", "query"}, []Attr{}},
    87		CharData("What is it?"),
    88		EndElement{Name{"ns2", "query"}},
    89		CharData("\n  "),
    90		StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91		EndElement{Name{"ns2", "goodbye"}},
    92		CharData("\n  "),
    93		StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94		CharData("\n    "),
    95		StartElement{Name{"ns2", "inner"}, []Attr{}},
    96		EndElement{Name{"ns2", "inner"}},
    97		CharData("\n  "),
    98		EndElement{Name{"ns2", "outer"}},
    99		CharData("\n  "),
   100		StartElement{Name{"ns3", "name"}, []Attr{}},
   101		CharData("\n    "),
   102		CharData("Some text here."),
   103		CharData("\n  "),
   104		EndElement{Name{"ns3", "name"}},
   105		CharData("\n"),
   106		EndElement{Name{"ns2", "body"}},
   107		Comment(" missing final newline "),
   108	}
   109	
   110	const testInputAltEncoding = `
   111	<?xml version="1.0" encoding="x-testing-uppercase"?>
   112	<TAG>VALUE</TAG>`
   113	
   114	var rawTokensAltEncoding = []Token{
   115		CharData("\n"),
   116		ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117		CharData("\n"),
   118		StartElement{Name{"", "tag"}, []Attr{}},
   119		CharData("value"),
   120		EndElement{Name{"", "tag"}},
   121	}
   122	
   123	var xmlInput = []string{
   124		// unexpected EOF cases
   125		"<",
   126		"<t",
   127		"<t ",
   128		"<t/",
   129		"<!",
   130		"<!-",
   131		"<!--",
   132		"<!--c-",
   133		"<!--c--",
   134		"<!d",
   135		"<t></",
   136		"<t></t",
   137		"<?",
   138		"<?p",
   139		"<t a",
   140		"<t a=",
   141		"<t a='",
   142		"<t a=''",
   143		"<t/><![",
   144		"<t/><![C",
   145		"<t/><![CDATA[d",
   146		"<t/><![CDATA[d]",
   147		"<t/><![CDATA[d]]",
   148	
   149		// other Syntax errors
   150		"<>",
   151		"<t/a",
   152		"<0 />",
   153		"<?0 >",
   154		//	"<!0 >",	// let the Token() caller handle
   155		"</0>",
   156		"<t 0=''>",
   157		"<t a='&'>",
   158		"<t a='<'>",
   159		"<t>&nbspc;</t>",
   160		"<t a>",
   161		"<t a=>",
   162		"<t a=v>",
   163		//	"<![CDATA[d]]>",	// let the Token() caller handle
   164		"<t></e>",
   165		"<t></>",
   166		"<t></t!",
   167		"<t>cdata]]></t>",
   168	}
   169	
   170	func TestRawToken(t *testing.T) {
   171		d := NewDecoder(strings.NewReader(testInput))
   172		d.Entity = testEntity
   173		testRawToken(t, d, testInput, rawTokens)
   174	}
   175	
   176	const nonStrictInput = `
   177	<tag>non&entity</tag>
   178	<tag>&unknown;entity</tag>
   179	<tag>&#123</tag>
   180	<tag>&#zzz;</tag>
   181	<tag>&なまえ3;</tag>
   182	<tag>&lt-gt;</tag>
   183	<tag>&;</tag>
   184	<tag>&0a;</tag>
   185	`
   186	
   187	var nonStrictTokens = []Token{
   188		CharData("\n"),
   189		StartElement{Name{"", "tag"}, []Attr{}},
   190		CharData("non&entity"),
   191		EndElement{Name{"", "tag"}},
   192		CharData("\n"),
   193		StartElement{Name{"", "tag"}, []Attr{}},
   194		CharData("&unknown;entity"),
   195		EndElement{Name{"", "tag"}},
   196		CharData("\n"),
   197		StartElement{Name{"", "tag"}, []Attr{}},
   198		CharData("&#123"),
   199		EndElement{Name{"", "tag"}},
   200		CharData("\n"),
   201		StartElement{Name{"", "tag"}, []Attr{}},
   202		CharData("&#zzz;"),
   203		EndElement{Name{"", "tag"}},
   204		CharData("\n"),
   205		StartElement{Name{"", "tag"}, []Attr{}},
   206		CharData("&なまえ3;"),
   207		EndElement{Name{"", "tag"}},
   208		CharData("\n"),
   209		StartElement{Name{"", "tag"}, []Attr{}},
   210		CharData("&lt-gt;"),
   211		EndElement{Name{"", "tag"}},
   212		CharData("\n"),
   213		StartElement{Name{"", "tag"}, []Attr{}},
   214		CharData("&;"),
   215		EndElement{Name{"", "tag"}},
   216		CharData("\n"),
   217		StartElement{Name{"", "tag"}, []Attr{}},
   218		CharData("&0a;"),
   219		EndElement{Name{"", "tag"}},
   220		CharData("\n"),
   221	}
   222	
   223	func TestNonStrictRawToken(t *testing.T) {
   224		d := NewDecoder(strings.NewReader(nonStrictInput))
   225		d.Strict = false
   226		testRawToken(t, d, nonStrictInput, nonStrictTokens)
   227	}
   228	
   229	type downCaser struct {
   230		t *testing.T
   231		r io.ByteReader
   232	}
   233	
   234	func (d *downCaser) ReadByte() (c byte, err error) {
   235		c, err = d.r.ReadByte()
   236		if c >= 'A' && c <= 'Z' {
   237			c += 'a' - 'A'
   238		}
   239		return
   240	}
   241	
   242	func (d *downCaser) Read(p []byte) (int, error) {
   243		d.t.Fatalf("unexpected Read call on downCaser reader")
   244		panic("unreachable")
   245	}
   246	
   247	func TestRawTokenAltEncoding(t *testing.T) {
   248		d := NewDecoder(strings.NewReader(testInputAltEncoding))
   249		d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   250			if charset != "x-testing-uppercase" {
   251				t.Fatalf("unexpected charset %q", charset)
   252			}
   253			return &downCaser{t, input.(io.ByteReader)}, nil
   254		}
   255		testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   256	}
   257	
   258	func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   259		d := NewDecoder(strings.NewReader(testInputAltEncoding))
   260		token, err := d.RawToken()
   261		if token == nil {
   262			t.Fatalf("expected a token on first RawToken call")
   263		}
   264		if err != nil {
   265			t.Fatal(err)
   266		}
   267		token, err = d.RawToken()
   268		if token != nil {
   269			t.Errorf("expected a nil token; got %#v", token)
   270		}
   271		if err == nil {
   272			t.Fatalf("expected an error on second RawToken call")
   273		}
   274		const encoding = "x-testing-uppercase"
   275		if !strings.Contains(err.Error(), encoding) {
   276			t.Errorf("expected error to contain %q; got error: %v",
   277				encoding, err)
   278		}
   279	}
   280	
   281	func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   282		lastEnd := int64(0)
   283		for i, want := range rawTokens {
   284			start := d.InputOffset()
   285			have, err := d.RawToken()
   286			end := d.InputOffset()
   287			if err != nil {
   288				t.Fatalf("token %d: unexpected error: %s", i, err)
   289			}
   290			if !reflect.DeepEqual(have, want) {
   291				var shave, swant string
   292				if _, ok := have.(CharData); ok {
   293					shave = fmt.Sprintf("CharData(%q)", have)
   294				} else {
   295					shave = fmt.Sprintf("%#v", have)
   296				}
   297				if _, ok := want.(CharData); ok {
   298					swant = fmt.Sprintf("CharData(%q)", want)
   299				} else {
   300					swant = fmt.Sprintf("%#v", want)
   301				}
   302				t.Errorf("token %d = %s, want %s", i, shave, swant)
   303			}
   304	
   305			// Check that InputOffset returned actual token.
   306			switch {
   307			case start < lastEnd:
   308				t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   309			case start >= end:
   310				// Special case: EndElement can be synthesized.
   311				if start == end && end == lastEnd {
   312					break
   313				}
   314				t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   315			case end > int64(len(raw)):
   316				t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   317			default:
   318				text := raw[start:end]
   319				if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   320					t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   321				}
   322			}
   323			lastEnd = end
   324		}
   325	}
   326	
   327	// Ensure that directives (specifically !DOCTYPE) include the complete
   328	// text of any nested directives, noting that < and > do not change
   329	// nesting depth if they are in single or double quotes.
   330	
   331	var nestedDirectivesInput = `
   332	<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   333	<!DOCTYPE [<!ENTITY xlt ">">]>
   334	<!DOCTYPE [<!ENTITY xlt "<">]>
   335	<!DOCTYPE [<!ENTITY xlt '>'>]>
   336	<!DOCTYPE [<!ENTITY xlt '<'>]>
   337	<!DOCTYPE [<!ENTITY xlt '">'>]>
   338	<!DOCTYPE [<!ENTITY xlt "'<">]>
   339	`
   340	
   341	var nestedDirectivesTokens = []Token{
   342		CharData("\n"),
   343		Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   344		CharData("\n"),
   345		Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   346		CharData("\n"),
   347		Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   348		CharData("\n"),
   349		Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   350		CharData("\n"),
   351		Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   352		CharData("\n"),
   353		Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   354		CharData("\n"),
   355		Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   356		CharData("\n"),
   357	}
   358	
   359	func TestNestedDirectives(t *testing.T) {
   360		d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   361	
   362		for i, want := range nestedDirectivesTokens {
   363			have, err := d.Token()
   364			if err != nil {
   365				t.Fatalf("token %d: unexpected error: %s", i, err)
   366			}
   367			if !reflect.DeepEqual(have, want) {
   368				t.Errorf("token %d = %#v want %#v", i, have, want)
   369			}
   370		}
   371	}
   372	
   373	func TestToken(t *testing.T) {
   374		d := NewDecoder(strings.NewReader(testInput))
   375		d.Entity = testEntity
   376	
   377		for i, want := range cookedTokens {
   378			have, err := d.Token()
   379			if err != nil {
   380				t.Fatalf("token %d: unexpected error: %s", i, err)
   381			}
   382			if !reflect.DeepEqual(have, want) {
   383				t.Errorf("token %d = %#v want %#v", i, have, want)
   384			}
   385		}
   386	}
   387	
   388	func TestSyntax(t *testing.T) {
   389		for i := range xmlInput {
   390			d := NewDecoder(strings.NewReader(xmlInput[i]))
   391			var err error
   392			for _, err = d.Token(); err == nil; _, err = d.Token() {
   393			}
   394			if _, ok := err.(*SyntaxError); !ok {
   395				t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   396			}
   397		}
   398	}
   399	
   400	type allScalars struct {
   401		True1     bool
   402		True2     bool
   403		False1    bool
   404		False2    bool
   405		Int       int
   406		Int8      int8
   407		Int16     int16
   408		Int32     int32
   409		Int64     int64
   410		Uint      int
   411		Uint8     uint8
   412		Uint16    uint16
   413		Uint32    uint32
   414		Uint64    uint64
   415		Uintptr   uintptr
   416		Float32   float32
   417		Float64   float64
   418		String    string
   419		PtrString *string
   420	}
   421	
   422	var all = allScalars{
   423		True1:     true,
   424		True2:     true,
   425		False1:    false,
   426		False2:    false,
   427		Int:       1,
   428		Int8:      -2,
   429		Int16:     3,
   430		Int32:     -4,
   431		Int64:     5,
   432		Uint:      6,
   433		Uint8:     7,
   434		Uint16:    8,
   435		Uint32:    9,
   436		Uint64:    10,
   437		Uintptr:   11,
   438		Float32:   13.0,
   439		Float64:   14.0,
   440		String:    "15",
   441		PtrString: &sixteen,
   442	}
   443	
   444	var sixteen = "16"
   445	
   446	const testScalarsInput = `<allscalars>
   447		<True1>true</True1>
   448		<True2>1</True2>
   449		<False1>false</False1>
   450		<False2>0</False2>
   451		<Int>1</Int>
   452		<Int8>-2</Int8>
   453		<Int16>3</Int16>
   454		<Int32>-4</Int32>
   455		<Int64>5</Int64>
   456		<Uint>6</Uint>
   457		<Uint8>7</Uint8>
   458		<Uint16>8</Uint16>
   459		<Uint32>9</Uint32>
   460		<Uint64>10</Uint64>
   461		<Uintptr>11</Uintptr>
   462		<Float>12.0</Float>
   463		<Float32>13.0</Float32>
   464		<Float64>14.0</Float64>
   465		<String>15</String>
   466		<PtrString>16</PtrString>
   467	</allscalars>`
   468	
   469	func TestAllScalars(t *testing.T) {
   470		var a allScalars
   471		err := Unmarshal([]byte(testScalarsInput), &a)
   472	
   473		if err != nil {
   474			t.Fatal(err)
   475		}
   476		if !reflect.DeepEqual(a, all) {
   477			t.Errorf("have %+v want %+v", a, all)
   478		}
   479	}
   480	
   481	type item struct {
   482		Field_a string
   483	}
   484	
   485	func TestIssue569(t *testing.T) {
   486		data := `<item><Field_a>abcd</Field_a></item>`
   487		var i item
   488		err := Unmarshal([]byte(data), &i)
   489	
   490		if err != nil || i.Field_a != "abcd" {
   491			t.Fatal("Expecting abcd")
   492		}
   493	}
   494	
   495	func TestUnquotedAttrs(t *testing.T) {
   496		data := "<tag attr=azAZ09:-_\t>"
   497		d := NewDecoder(strings.NewReader(data))
   498		d.Strict = false
   499		token, err := d.Token()
   500		if _, ok := err.(*SyntaxError); ok {
   501			t.Errorf("Unexpected error: %v", err)
   502		}
   503		if token.(StartElement).Name.Local != "tag" {
   504			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   505		}
   506		attr := token.(StartElement).Attr[0]
   507		if attr.Value != "azAZ09:-_" {
   508			t.Errorf("Unexpected attribute value: %v", attr.Value)
   509		}
   510		if attr.Name.Local != "attr" {
   511			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   512		}
   513	}
   514	
   515	func TestValuelessAttrs(t *testing.T) {
   516		tests := [][3]string{
   517			{"<p nowrap>", "p", "nowrap"},
   518			{"<p nowrap >", "p", "nowrap"},
   519			{"<input checked/>", "input", "checked"},
   520			{"<input checked />", "input", "checked"},
   521		}
   522		for _, test := range tests {
   523			d := NewDecoder(strings.NewReader(test[0]))
   524			d.Strict = false
   525			token, err := d.Token()
   526			if _, ok := err.(*SyntaxError); ok {
   527				t.Errorf("Unexpected error: %v", err)
   528			}
   529			if token.(StartElement).Name.Local != test[1] {
   530				t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   531			}
   532			attr := token.(StartElement).Attr[0]
   533			if attr.Value != test[2] {
   534				t.Errorf("Unexpected attribute value: %v", attr.Value)
   535			}
   536			if attr.Name.Local != test[2] {
   537				t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   538			}
   539		}
   540	}
   541	
   542	func TestCopyTokenCharData(t *testing.T) {
   543		data := []byte("same data")
   544		var tok1 Token = CharData(data)
   545		tok2 := CopyToken(tok1)
   546		if !reflect.DeepEqual(tok1, tok2) {
   547			t.Error("CopyToken(CharData) != CharData")
   548		}
   549		data[1] = 'o'
   550		if reflect.DeepEqual(tok1, tok2) {
   551			t.Error("CopyToken(CharData) uses same buffer.")
   552		}
   553	}
   554	
   555	func TestCopyTokenStartElement(t *testing.T) {
   556		elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   557		var tok1 Token = elt
   558		tok2 := CopyToken(tok1)
   559		if tok1.(StartElement).Attr[0].Value != "en" {
   560			t.Error("CopyToken overwrote Attr[0]")
   561		}
   562		if !reflect.DeepEqual(tok1, tok2) {
   563			t.Error("CopyToken(StartElement) != StartElement")
   564		}
   565		tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   566		if reflect.DeepEqual(tok1, tok2) {
   567			t.Error("CopyToken(CharData) uses same buffer.")
   568		}
   569	}
   570	
   571	func TestSyntaxErrorLineNum(t *testing.T) {
   572		testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   573		d := NewDecoder(strings.NewReader(testInput))
   574		var err error
   575		for _, err = d.Token(); err == nil; _, err = d.Token() {
   576		}
   577		synerr, ok := err.(*SyntaxError)
   578		if !ok {
   579			t.Error("Expected SyntaxError.")
   580		}
   581		if synerr.Line != 3 {
   582			t.Error("SyntaxError didn't have correct line number.")
   583		}
   584	}
   585	
   586	func TestTrailingRawToken(t *testing.T) {
   587		input := `<FOO></FOO>  `
   588		d := NewDecoder(strings.NewReader(input))
   589		var err error
   590		for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   591		}
   592		if err != io.EOF {
   593			t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   594		}
   595	}
   596	
   597	func TestTrailingToken(t *testing.T) {
   598		input := `<FOO></FOO>  `
   599		d := NewDecoder(strings.NewReader(input))
   600		var err error
   601		for _, err = d.Token(); err == nil; _, err = d.Token() {
   602		}
   603		if err != io.EOF {
   604			t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   605		}
   606	}
   607	
   608	func TestEntityInsideCDATA(t *testing.T) {
   609		input := `<test><![CDATA[ &val=foo ]]></test>`
   610		d := NewDecoder(strings.NewReader(input))
   611		var err error
   612		for _, err = d.Token(); err == nil; _, err = d.Token() {
   613		}
   614		if err != io.EOF {
   615			t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   616		}
   617	}
   618	
   619	var characterTests = []struct {
   620		in  string
   621		err string
   622	}{
   623		{"\x12<doc/>", "illegal character code U+0012"},
   624		{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   625		{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   626		{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   627		{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   628		{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   629		{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   630		{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   631		{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   632	}
   633	
   634	func TestDisallowedCharacters(t *testing.T) {
   635	
   636		for i, tt := range characterTests {
   637			d := NewDecoder(strings.NewReader(tt.in))
   638			var err error
   639	
   640			for err == nil {
   641				_, err = d.Token()
   642			}
   643			synerr, ok := err.(*SyntaxError)
   644			if !ok {
   645				t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   646			}
   647			if synerr.Msg != tt.err {
   648				t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   649			}
   650		}
   651	}
   652	
   653	var procInstTests = []struct {
   654		input  string
   655		expect [2]string
   656	}{
   657		{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   658		{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   659		{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   660		{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   661		{`encoding="FOO" `, [2]string{"", "FOO"}},
   662	}
   663	
   664	func TestProcInstEncoding(t *testing.T) {
   665		for _, test := range procInstTests {
   666			if got := procInst("version", test.input); got != test.expect[0] {
   667				t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   668			}
   669			if got := procInst("encoding", test.input); got != test.expect[1] {
   670				t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   671			}
   672		}
   673	}
   674	
   675	// Ensure that directives with comments include the complete
   676	// text of any nested directives.
   677	
   678	var directivesWithCommentsInput = `
   679	<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   680	<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   681	<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   682	`
   683	
   684	var directivesWithCommentsTokens = []Token{
   685		CharData("\n"),
   686		Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   687		CharData("\n"),
   688		Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   689		CharData("\n"),
   690		Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   691		CharData("\n"),
   692	}
   693	
   694	func TestDirectivesWithComments(t *testing.T) {
   695		d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   696	
   697		for i, want := range directivesWithCommentsTokens {
   698			have, err := d.Token()
   699			if err != nil {
   700				t.Fatalf("token %d: unexpected error: %s", i, err)
   701			}
   702			if !reflect.DeepEqual(have, want) {
   703				t.Errorf("token %d = %#v want %#v", i, have, want)
   704			}
   705		}
   706	}
   707	
   708	// Writer whose Write method always returns an error.
   709	type errWriter struct{}
   710	
   711	func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   712	
   713	func TestEscapeTextIOErrors(t *testing.T) {
   714		expectErr := "unwritable"
   715		err := EscapeText(errWriter{}, []byte{'A'})
   716	
   717		if err == nil || err.Error() != expectErr {
   718			t.Errorf("have %v, want %v", err, expectErr)
   719		}
   720	}
   721	
   722	func TestEscapeTextInvalidChar(t *testing.T) {
   723		input := []byte("A \x00 terminated string.")
   724		expected := "A \uFFFD terminated string."
   725	
   726		buff := new(bytes.Buffer)
   727		if err := EscapeText(buff, input); err != nil {
   728			t.Fatalf("have %v, want nil", err)
   729		}
   730		text := buff.String()
   731	
   732		if text != expected {
   733			t.Errorf("have %v, want %v", text, expected)
   734		}
   735	}
   736	
   737	func TestIssue5880(t *testing.T) {
   738		type T []byte
   739		data, err := Marshal(T{192, 168, 0, 1})
   740		if err != nil {
   741			t.Errorf("Marshal error: %v", err)
   742		}
   743		if !utf8.Valid(data) {
   744			t.Errorf("Marshal generated invalid UTF-8: %x", data)
   745		}
   746	}
   747	
   748	func TestIssue11405(t *testing.T) {
   749		testCases := []string{
   750			"<root>",
   751			"<root><foo>",
   752			"<root><foo></foo>",
   753		}
   754		for _, tc := range testCases {
   755			d := NewDecoder(strings.NewReader(tc))
   756			var err error
   757			for {
   758				_, err = d.Token()
   759				if err != nil {
   760					break
   761				}
   762			}
   763			if _, ok := err.(*SyntaxError); !ok {
   764				t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
   765			}
   766		}
   767	}
   768	
   769	func TestIssue12417(t *testing.T) {
   770		testCases := []struct {
   771			s  string
   772			ok bool
   773		}{
   774			{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
   775			{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
   776			{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
   777			{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
   778		}
   779		for _, tc := range testCases {
   780			d := NewDecoder(strings.NewReader(tc.s))
   781			var err error
   782			for {
   783				_, err = d.Token()
   784				if err != nil {
   785					if err == io.EOF {
   786						err = nil
   787					}
   788					break
   789				}
   790			}
   791			if err != nil && tc.ok {
   792				t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
   793				continue
   794			}
   795			if err == nil && !tc.ok {
   796				t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
   797			}
   798		}
   799	}
   800	

View as plain text