...
Run Format

Source file src/encoding/xml/xml_test.go

     1	// Copyright 2009 The Go Authors.  All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package xml
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"io"
    11		"reflect"
    12		"strings"
    13		"testing"
    14		"unicode/utf8"
    15	)
    16	
    17	const testInput = `
    18	<?xml version="1.0" encoding="UTF-8"?>
    19	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21	<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22		"\r\n\t" + `  >
    23	  <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24	  <query>&何; &is-it;</query>
    25	  <goodbye />
    26	  <outer foo:attr="value" xmlns:tag="ns4">
    27	    <inner/>
    28	  </outer>
    29	  <tag:name>
    30	    <![CDATA[Some text here.]]>
    31	  </tag:name>
    32	</body><!-- missing final newline -->`
    33	
    34	var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35	
    36	var rawTokens = []Token{
    37		CharData("\n"),
    38		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39		CharData("\n"),
    40		Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42		CharData("\n"),
    43		StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44		CharData("\n  "),
    45		StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46		CharData("World <>'\" 白鵬翔"),
    47		EndElement{Name{"", "hello"}},
    48		CharData("\n  "),
    49		StartElement{Name{"", "query"}, []Attr{}},
    50		CharData("What is it?"),
    51		EndElement{Name{"", "query"}},
    52		CharData("\n  "),
    53		StartElement{Name{"", "goodbye"}, []Attr{}},
    54		EndElement{Name{"", "goodbye"}},
    55		CharData("\n  "),
    56		StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57		CharData("\n    "),
    58		StartElement{Name{"", "inner"}, []Attr{}},
    59		EndElement{Name{"", "inner"}},
    60		CharData("\n  "),
    61		EndElement{Name{"", "outer"}},
    62		CharData("\n  "),
    63		StartElement{Name{"tag", "name"}, []Attr{}},
    64		CharData("\n    "),
    65		CharData("Some text here."),
    66		CharData("\n  "),
    67		EndElement{Name{"tag", "name"}},
    68		CharData("\n"),
    69		EndElement{Name{"", "body"}},
    70		Comment(" missing final newline "),
    71	}
    72	
    73	var cookedTokens = []Token{
    74		CharData("\n"),
    75		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76		CharData("\n"),
    77		Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79		CharData("\n"),
    80		StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81		CharData("\n  "),
    82		StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83		CharData("World <>'\" 白鵬翔"),
    84		EndElement{Name{"ns2", "hello"}},
    85		CharData("\n  "),
    86		StartElement{Name{"ns2", "query"}, []Attr{}},
    87		CharData("What is it?"),
    88		EndElement{Name{"ns2", "query"}},
    89		CharData("\n  "),
    90		StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91		EndElement{Name{"ns2", "goodbye"}},
    92		CharData("\n  "),
    93		StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94		CharData("\n    "),
    95		StartElement{Name{"ns2", "inner"}, []Attr{}},
    96		EndElement{Name{"ns2", "inner"}},
    97		CharData("\n  "),
    98		EndElement{Name{"ns2", "outer"}},
    99		CharData("\n  "),
   100		StartElement{Name{"ns3", "name"}, []Attr{}},
   101		CharData("\n    "),
   102		CharData("Some text here."),
   103		CharData("\n  "),
   104		EndElement{Name{"ns3", "name"}},
   105		CharData("\n"),
   106		EndElement{Name{"ns2", "body"}},
   107		Comment(" missing final newline "),
   108	}
   109	
   110	const testInputAltEncoding = `
   111	<?xml version="1.0" encoding="x-testing-uppercase"?>
   112	<TAG>VALUE</TAG>`
   113	
   114	var rawTokensAltEncoding = []Token{
   115		CharData("\n"),
   116		ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117		CharData("\n"),
   118		StartElement{Name{"", "tag"}, []Attr{}},
   119		CharData("value"),
   120		EndElement{Name{"", "tag"}},
   121	}
   122	
   123	var xmlInput = []string{
   124		// unexpected EOF cases
   125		"<",
   126		"<t",
   127		"<t ",
   128		"<t/",
   129		"<!",
   130		"<!-",
   131		"<!--",
   132		"<!--c-",
   133		"<!--c--",
   134		"<!d",
   135		"<t></",
   136		"<t></t",
   137		"<?",
   138		"<?p",
   139		"<t a",
   140		"<t a=",
   141		"<t a='",
   142		"<t a=''",
   143		"<t/><![",
   144		"<t/><![C",
   145		"<t/><![CDATA[d",
   146		"<t/><![CDATA[d]",
   147		"<t/><![CDATA[d]]",
   148	
   149		// other Syntax errors
   150		"<>",
   151		"<t/a",
   152		"<0 />",
   153		"<?0 >",
   154		//	"<!0 >",	// let the Token() caller handle
   155		"</0>",
   156		"<t 0=''>",
   157		"<t a='&'>",
   158		"<t a='<'>",
   159		"<t>&nbspc;</t>",
   160		"<t a>",
   161		"<t a=>",
   162		"<t a=v>",
   163		//	"<![CDATA[d]]>",	// let the Token() caller handle
   164		"<t></e>",
   165		"<t></>",
   166		"<t></t!",
   167		"<t>cdata]]></t>",
   168	}
   169	
   170	func TestRawToken(t *testing.T) {
   171		d := NewDecoder(strings.NewReader(testInput))
   172		d.Entity = testEntity
   173		testRawToken(t, d, testInput, rawTokens)
   174	}
   175	
   176	const nonStrictInput = `
   177	<tag>non&entity</tag>
   178	<tag>&unknown;entity</tag>
   179	<tag>&#123</tag>
   180	<tag>&#zzz;</tag>
   181	<tag>&なまえ3;</tag>
   182	<tag>&lt-gt;</tag>
   183	<tag>&;</tag>
   184	<tag>&0a;</tag>
   185	`
   186	
   187	var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
   188	
   189	var nonStrictTokens = []Token{
   190		CharData("\n"),
   191		StartElement{Name{"", "tag"}, []Attr{}},
   192		CharData("non&entity"),
   193		EndElement{Name{"", "tag"}},
   194		CharData("\n"),
   195		StartElement{Name{"", "tag"}, []Attr{}},
   196		CharData("&unknown;entity"),
   197		EndElement{Name{"", "tag"}},
   198		CharData("\n"),
   199		StartElement{Name{"", "tag"}, []Attr{}},
   200		CharData("&#123"),
   201		EndElement{Name{"", "tag"}},
   202		CharData("\n"),
   203		StartElement{Name{"", "tag"}, []Attr{}},
   204		CharData("&#zzz;"),
   205		EndElement{Name{"", "tag"}},
   206		CharData("\n"),
   207		StartElement{Name{"", "tag"}, []Attr{}},
   208		CharData("&なまえ3;"),
   209		EndElement{Name{"", "tag"}},
   210		CharData("\n"),
   211		StartElement{Name{"", "tag"}, []Attr{}},
   212		CharData("&lt-gt;"),
   213		EndElement{Name{"", "tag"}},
   214		CharData("\n"),
   215		StartElement{Name{"", "tag"}, []Attr{}},
   216		CharData("&;"),
   217		EndElement{Name{"", "tag"}},
   218		CharData("\n"),
   219		StartElement{Name{"", "tag"}, []Attr{}},
   220		CharData("&0a;"),
   221		EndElement{Name{"", "tag"}},
   222		CharData("\n"),
   223	}
   224	
   225	func TestNonStrictRawToken(t *testing.T) {
   226		d := NewDecoder(strings.NewReader(nonStrictInput))
   227		d.Strict = false
   228		testRawToken(t, d, nonStrictInput, nonStrictTokens)
   229	}
   230	
   231	type downCaser struct {
   232		t *testing.T
   233		r io.ByteReader
   234	}
   235	
   236	func (d *downCaser) ReadByte() (c byte, err error) {
   237		c, err = d.r.ReadByte()
   238		if c >= 'A' && c <= 'Z' {
   239			c += 'a' - 'A'
   240		}
   241		return
   242	}
   243	
   244	func (d *downCaser) Read(p []byte) (int, error) {
   245		d.t.Fatalf("unexpected Read call on downCaser reader")
   246		panic("unreachable")
   247	}
   248	
   249	func TestRawTokenAltEncoding(t *testing.T) {
   250		d := NewDecoder(strings.NewReader(testInputAltEncoding))
   251		d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   252			if charset != "x-testing-uppercase" {
   253				t.Fatalf("unexpected charset %q", charset)
   254			}
   255			return &downCaser{t, input.(io.ByteReader)}, nil
   256		}
   257		testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   258	}
   259	
   260	func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   261		d := NewDecoder(strings.NewReader(testInputAltEncoding))
   262		token, err := d.RawToken()
   263		if token == nil {
   264			t.Fatalf("expected a token on first RawToken call")
   265		}
   266		if err != nil {
   267			t.Fatal(err)
   268		}
   269		token, err = d.RawToken()
   270		if token != nil {
   271			t.Errorf("expected a nil token; got %#v", token)
   272		}
   273		if err == nil {
   274			t.Fatalf("expected an error on second RawToken call")
   275		}
   276		const encoding = "x-testing-uppercase"
   277		if !strings.Contains(err.Error(), encoding) {
   278			t.Errorf("expected error to contain %q; got error: %v",
   279				encoding, err)
   280		}
   281	}
   282	
   283	func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   284		lastEnd := int64(0)
   285		for i, want := range rawTokens {
   286			start := d.InputOffset()
   287			have, err := d.RawToken()
   288			end := d.InputOffset()
   289			if err != nil {
   290				t.Fatalf("token %d: unexpected error: %s", i, err)
   291			}
   292			if !reflect.DeepEqual(have, want) {
   293				var shave, swant string
   294				if _, ok := have.(CharData); ok {
   295					shave = fmt.Sprintf("CharData(%q)", have)
   296				} else {
   297					shave = fmt.Sprintf("%#v", have)
   298				}
   299				if _, ok := want.(CharData); ok {
   300					swant = fmt.Sprintf("CharData(%q)", want)
   301				} else {
   302					swant = fmt.Sprintf("%#v", want)
   303				}
   304				t.Errorf("token %d = %s, want %s", i, shave, swant)
   305			}
   306	
   307			// Check that InputOffset returned actual token.
   308			switch {
   309			case start < lastEnd:
   310				t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   311			case start >= end:
   312				// Special case: EndElement can be synthesized.
   313				if start == end && end == lastEnd {
   314					break
   315				}
   316				t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   317			case end > int64(len(raw)):
   318				t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   319			default:
   320				text := raw[start:end]
   321				if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   322					t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   323				}
   324			}
   325			lastEnd = end
   326		}
   327	}
   328	
   329	// Ensure that directives (specifically !DOCTYPE) include the complete
   330	// text of any nested directives, noting that < and > do not change
   331	// nesting depth if they are in single or double quotes.
   332	
   333	var nestedDirectivesInput = `
   334	<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   335	<!DOCTYPE [<!ENTITY xlt ">">]>
   336	<!DOCTYPE [<!ENTITY xlt "<">]>
   337	<!DOCTYPE [<!ENTITY xlt '>'>]>
   338	<!DOCTYPE [<!ENTITY xlt '<'>]>
   339	<!DOCTYPE [<!ENTITY xlt '">'>]>
   340	<!DOCTYPE [<!ENTITY xlt "'<">]>
   341	`
   342	
   343	var nestedDirectivesTokens = []Token{
   344		CharData("\n"),
   345		Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   346		CharData("\n"),
   347		Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   348		CharData("\n"),
   349		Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   350		CharData("\n"),
   351		Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   352		CharData("\n"),
   353		Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   354		CharData("\n"),
   355		Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   356		CharData("\n"),
   357		Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   358		CharData("\n"),
   359	}
   360	
   361	func TestNestedDirectives(t *testing.T) {
   362		d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   363	
   364		for i, want := range nestedDirectivesTokens {
   365			have, err := d.Token()
   366			if err != nil {
   367				t.Fatalf("token %d: unexpected error: %s", i, err)
   368			}
   369			if !reflect.DeepEqual(have, want) {
   370				t.Errorf("token %d = %#v want %#v", i, have, want)
   371			}
   372		}
   373	}
   374	
   375	func TestToken(t *testing.T) {
   376		d := NewDecoder(strings.NewReader(testInput))
   377		d.Entity = testEntity
   378	
   379		for i, want := range cookedTokens {
   380			have, err := d.Token()
   381			if err != nil {
   382				t.Fatalf("token %d: unexpected error: %s", i, err)
   383			}
   384			if !reflect.DeepEqual(have, want) {
   385				t.Errorf("token %d = %#v want %#v", i, have, want)
   386			}
   387		}
   388	}
   389	
   390	func TestSyntax(t *testing.T) {
   391		for i := range xmlInput {
   392			d := NewDecoder(strings.NewReader(xmlInput[i]))
   393			var err error
   394			for _, err = d.Token(); err == nil; _, err = d.Token() {
   395			}
   396			if _, ok := err.(*SyntaxError); !ok {
   397				t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   398			}
   399		}
   400	}
   401	
   402	type allScalars struct {
   403		True1     bool
   404		True2     bool
   405		False1    bool
   406		False2    bool
   407		Int       int
   408		Int8      int8
   409		Int16     int16
   410		Int32     int32
   411		Int64     int64
   412		Uint      int
   413		Uint8     uint8
   414		Uint16    uint16
   415		Uint32    uint32
   416		Uint64    uint64
   417		Uintptr   uintptr
   418		Float32   float32
   419		Float64   float64
   420		String    string
   421		PtrString *string
   422	}
   423	
   424	var all = allScalars{
   425		True1:     true,
   426		True2:     true,
   427		False1:    false,
   428		False2:    false,
   429		Int:       1,
   430		Int8:      -2,
   431		Int16:     3,
   432		Int32:     -4,
   433		Int64:     5,
   434		Uint:      6,
   435		Uint8:     7,
   436		Uint16:    8,
   437		Uint32:    9,
   438		Uint64:    10,
   439		Uintptr:   11,
   440		Float32:   13.0,
   441		Float64:   14.0,
   442		String:    "15",
   443		PtrString: &sixteen,
   444	}
   445	
   446	var sixteen = "16"
   447	
   448	const testScalarsInput = `<allscalars>
   449		<True1>true</True1>
   450		<True2>1</True2>
   451		<False1>false</False1>
   452		<False2>0</False2>
   453		<Int>1</Int>
   454		<Int8>-2</Int8>
   455		<Int16>3</Int16>
   456		<Int32>-4</Int32>
   457		<Int64>5</Int64>
   458		<Uint>6</Uint>
   459		<Uint8>7</Uint8>
   460		<Uint16>8</Uint16>
   461		<Uint32>9</Uint32>
   462		<Uint64>10</Uint64>
   463		<Uintptr>11</Uintptr>
   464		<Float>12.0</Float>
   465		<Float32>13.0</Float32>
   466		<Float64>14.0</Float64>
   467		<String>15</String>
   468		<PtrString>16</PtrString>
   469	</allscalars>`
   470	
   471	func TestAllScalars(t *testing.T) {
   472		var a allScalars
   473		err := Unmarshal([]byte(testScalarsInput), &a)
   474	
   475		if err != nil {
   476			t.Fatal(err)
   477		}
   478		if !reflect.DeepEqual(a, all) {
   479			t.Errorf("have %+v want %+v", a, all)
   480		}
   481	}
   482	
   483	type item struct {
   484		Field_a string
   485	}
   486	
   487	func TestIssue569(t *testing.T) {
   488		data := `<item><Field_a>abcd</Field_a></item>`
   489		var i item
   490		err := Unmarshal([]byte(data), &i)
   491	
   492		if err != nil || i.Field_a != "abcd" {
   493			t.Fatal("Expecting abcd")
   494		}
   495	}
   496	
   497	func TestUnquotedAttrs(t *testing.T) {
   498		data := "<tag attr=azAZ09:-_\t>"
   499		d := NewDecoder(strings.NewReader(data))
   500		d.Strict = false
   501		token, err := d.Token()
   502		if _, ok := err.(*SyntaxError); ok {
   503			t.Errorf("Unexpected error: %v", err)
   504		}
   505		if token.(StartElement).Name.Local != "tag" {
   506			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   507		}
   508		attr := token.(StartElement).Attr[0]
   509		if attr.Value != "azAZ09:-_" {
   510			t.Errorf("Unexpected attribute value: %v", attr.Value)
   511		}
   512		if attr.Name.Local != "attr" {
   513			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   514		}
   515	}
   516	
   517	func TestValuelessAttrs(t *testing.T) {
   518		tests := [][3]string{
   519			{"<p nowrap>", "p", "nowrap"},
   520			{"<p nowrap >", "p", "nowrap"},
   521			{"<input checked/>", "input", "checked"},
   522			{"<input checked />", "input", "checked"},
   523		}
   524		for _, test := range tests {
   525			d := NewDecoder(strings.NewReader(test[0]))
   526			d.Strict = false
   527			token, err := d.Token()
   528			if _, ok := err.(*SyntaxError); ok {
   529				t.Errorf("Unexpected error: %v", err)
   530			}
   531			if token.(StartElement).Name.Local != test[1] {
   532				t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   533			}
   534			attr := token.(StartElement).Attr[0]
   535			if attr.Value != test[2] {
   536				t.Errorf("Unexpected attribute value: %v", attr.Value)
   537			}
   538			if attr.Name.Local != test[2] {
   539				t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   540			}
   541		}
   542	}
   543	
   544	func TestCopyTokenCharData(t *testing.T) {
   545		data := []byte("same data")
   546		var tok1 Token = CharData(data)
   547		tok2 := CopyToken(tok1)
   548		if !reflect.DeepEqual(tok1, tok2) {
   549			t.Error("CopyToken(CharData) != CharData")
   550		}
   551		data[1] = 'o'
   552		if reflect.DeepEqual(tok1, tok2) {
   553			t.Error("CopyToken(CharData) uses same buffer.")
   554		}
   555	}
   556	
   557	func TestCopyTokenStartElement(t *testing.T) {
   558		elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   559		var tok1 Token = elt
   560		tok2 := CopyToken(tok1)
   561		if tok1.(StartElement).Attr[0].Value != "en" {
   562			t.Error("CopyToken overwrote Attr[0]")
   563		}
   564		if !reflect.DeepEqual(tok1, tok2) {
   565			t.Error("CopyToken(StartElement) != StartElement")
   566		}
   567		tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   568		if reflect.DeepEqual(tok1, tok2) {
   569			t.Error("CopyToken(CharData) uses same buffer.")
   570		}
   571	}
   572	
   573	func TestSyntaxErrorLineNum(t *testing.T) {
   574		testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   575		d := NewDecoder(strings.NewReader(testInput))
   576		var err error
   577		for _, err = d.Token(); err == nil; _, err = d.Token() {
   578		}
   579		synerr, ok := err.(*SyntaxError)
   580		if !ok {
   581			t.Error("Expected SyntaxError.")
   582		}
   583		if synerr.Line != 3 {
   584			t.Error("SyntaxError didn't have correct line number.")
   585		}
   586	}
   587	
   588	func TestTrailingRawToken(t *testing.T) {
   589		input := `<FOO></FOO>  `
   590		d := NewDecoder(strings.NewReader(input))
   591		var err error
   592		for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   593		}
   594		if err != io.EOF {
   595			t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   596		}
   597	}
   598	
   599	func TestTrailingToken(t *testing.T) {
   600		input := `<FOO></FOO>  `
   601		d := NewDecoder(strings.NewReader(input))
   602		var err error
   603		for _, err = d.Token(); err == nil; _, err = d.Token() {
   604		}
   605		if err != io.EOF {
   606			t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   607		}
   608	}
   609	
   610	func TestEntityInsideCDATA(t *testing.T) {
   611		input := `<test><![CDATA[ &val=foo ]]></test>`
   612		d := NewDecoder(strings.NewReader(input))
   613		var err error
   614		for _, err = d.Token(); err == nil; _, err = d.Token() {
   615		}
   616		if err != io.EOF {
   617			t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   618		}
   619	}
   620	
   621	var characterTests = []struct {
   622		in  string
   623		err string
   624	}{
   625		{"\x12<doc/>", "illegal character code U+0012"},
   626		{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   627		{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   628		{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   629		{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   630		{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   631		{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   632		{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   633		{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   634	}
   635	
   636	func TestDisallowedCharacters(t *testing.T) {
   637	
   638		for i, tt := range characterTests {
   639			d := NewDecoder(strings.NewReader(tt.in))
   640			var err error
   641	
   642			for err == nil {
   643				_, err = d.Token()
   644			}
   645			synerr, ok := err.(*SyntaxError)
   646			if !ok {
   647				t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   648			}
   649			if synerr.Msg != tt.err {
   650				t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   651			}
   652		}
   653	}
   654	
   655	type procInstEncodingTest struct {
   656		expect, got string
   657	}
   658	
   659	var procInstTests = []struct {
   660		input, expect string
   661	}{
   662		{`version="1.0" encoding="utf-8"`, "utf-8"},
   663		{`version="1.0" encoding='utf-8'`, "utf-8"},
   664		{`version="1.0" encoding='utf-8' `, "utf-8"},
   665		{`version="1.0" encoding=utf-8`, ""},
   666		{`encoding="FOO" `, "FOO"},
   667	}
   668	
   669	func TestProcInstEncoding(t *testing.T) {
   670		for _, test := range procInstTests {
   671			got := procInstEncoding(test.input)
   672			if got != test.expect {
   673				t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)
   674			}
   675		}
   676	}
   677	
   678	// Ensure that directives with comments include the complete
   679	// text of any nested directives.
   680	
   681	var directivesWithCommentsInput = `
   682	<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   683	<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   684	<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   685	`
   686	
   687	var directivesWithCommentsTokens = []Token{
   688		CharData("\n"),
   689		Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   690		CharData("\n"),
   691		Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   692		CharData("\n"),
   693		Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   694		CharData("\n"),
   695	}
   696	
   697	func TestDirectivesWithComments(t *testing.T) {
   698		d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   699	
   700		for i, want := range directivesWithCommentsTokens {
   701			have, err := d.Token()
   702			if err != nil {
   703				t.Fatalf("token %d: unexpected error: %s", i, err)
   704			}
   705			if !reflect.DeepEqual(have, want) {
   706				t.Errorf("token %d = %#v want %#v", i, have, want)
   707			}
   708		}
   709	}
   710	
   711	// Writer whose Write method always returns an error.
   712	type errWriter struct{}
   713	
   714	func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   715	
   716	func TestEscapeTextIOErrors(t *testing.T) {
   717		expectErr := "unwritable"
   718		err := EscapeText(errWriter{}, []byte{'A'})
   719	
   720		if err == nil || err.Error() != expectErr {
   721			t.Errorf("have %v, want %v", err, expectErr)
   722		}
   723	}
   724	
   725	func TestEscapeTextInvalidChar(t *testing.T) {
   726		input := []byte("A \x00 terminated string.")
   727		expected := "A \uFFFD terminated string."
   728	
   729		buff := new(bytes.Buffer)
   730		if err := EscapeText(buff, input); err != nil {
   731			t.Fatalf("have %v, want nil", err)
   732		}
   733		text := buff.String()
   734	
   735		if text != expected {
   736			t.Errorf("have %v, want %v", text, expected)
   737		}
   738	}
   739	
   740	func TestIssue5880(t *testing.T) {
   741		type T []byte
   742		data, err := Marshal(T{192, 168, 0, 1})
   743		if err != nil {
   744			t.Errorf("Marshal error: %v", err)
   745		}
   746		if !utf8.Valid(data) {
   747			t.Errorf("Marshal generated invalid UTF-8: %x", data)
   748		}
   749	}
   750	

View as plain text