Source file src/pkg/encoding/xml/xml.go
1
2
3
4
5
6
7 package xml
8
9
10
11
12
13
14
15
16 import (
17 "bufio"
18 "bytes"
19 "fmt"
20 "io"
21 "strconv"
22 "strings"
23 "unicode"
24 "unicode/utf8"
25 )
26
27
28 type SyntaxError struct {
29 Msg string
30 Line int
31 }
32
33 func (e *SyntaxError) Error() string {
34 return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg
35 }
36
37
38
39
40
41
42 type Name struct {
43 Space, Local string
44 }
45
46
47 type Attr struct {
48 Name Name
49 Value string
50 }
51
52
53
54 type Token interface{}
55
56
57 type StartElement struct {
58 Name Name
59 Attr []Attr
60 }
61
62 func (e StartElement) Copy() StartElement {
63 attrs := make([]Attr, len(e.Attr))
64 copy(attrs, e.Attr)
65 e.Attr = attrs
66 return e
67 }
68
69
70 type EndElement struct {
71 Name Name
72 }
73
74
75
76
77 type CharData []byte
78
79 func makeCopy(b []byte) []byte {
80 b1 := make([]byte, len(b))
81 copy(b1, b)
82 return b1
83 }
84
85 func (c CharData) Copy() CharData { return CharData(makeCopy(c)) }
86
87
88
89 type Comment []byte
90
91 func (c Comment) Copy() Comment { return Comment(makeCopy(c)) }
92
93
94 type ProcInst struct {
95 Target string
96 Inst []byte
97 }
98
99 func (p ProcInst) Copy() ProcInst {
100 p.Inst = makeCopy(p.Inst)
101 return p
102 }
103
104
105
106 type Directive []byte
107
108 func (d Directive) Copy() Directive { return Directive(makeCopy(d)) }
109
110
111 func CopyToken(t Token) Token {
112 switch v := t.(type) {
113 case CharData:
114 return v.Copy()
115 case Comment:
116 return v.Copy()
117 case Directive:
118 return v.Copy()
119 case ProcInst:
120 return v.Copy()
121 case StartElement:
122 return v.Copy()
123 }
124 return t
125 }
126
127
128
129 type Decoder struct {
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147 Strict bool
148
149
150
151
152 AutoClose []string
153
154
155
156
157
158
159
160
161
162
163 Entity map[string]string
164
165
166
167
168
169
170 CharsetReader func(charset string, input io.Reader) (io.Reader, error)
171
172
173
174
175 DefaultSpace string
176
177 r io.ByteReader
178 buf bytes.Buffer
179 saved *bytes.Buffer
180 stk *stack
181 free *stack
182 needClose bool
183 toClose Name
184 nextToken Token
185 nextByte int
186 ns map[string]string
187 err error
188 line int
189 }
190
191
192 func NewDecoder(r io.Reader) *Decoder {
193 d := &Decoder{
194 ns: make(map[string]string),
195 nextByte: -1,
196 line: 1,
197 Strict: true,
198 }
199 d.switchToReader(r)
200 return d
201 }
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225 func (d *Decoder) Token() (t Token, err error) {
226 if d.nextToken != nil {
227 t = d.nextToken
228 d.nextToken = nil
229 } else if t, err = d.RawToken(); err != nil {
230 return
231 }
232
233 if !d.Strict {
234 if t1, ok := d.autoClose(t); ok {
235 d.nextToken = t
236 t = t1
237 }
238 }
239 switch t1 := t.(type) {
240 case StartElement:
241
242
243
244
245 for _, a := range t1.Attr {
246 if a.Name.Space == "xmlns" {
247 v, ok := d.ns[a.Name.Local]
248 d.pushNs(a.Name.Local, v, ok)
249 d.ns[a.Name.Local] = a.Value
250 }
251 if a.Name.Space == "" && a.Name.Local == "xmlns" {
252
253 v, ok := d.ns[""]
254 d.pushNs("", v, ok)
255 d.ns[""] = a.Value
256 }
257 }
258
259 d.translate(&t1.Name, true)
260 for i := range t1.Attr {
261 d.translate(&t1.Attr[i].Name, false)
262 }
263 d.pushElement(t1.Name)
264 t = t1
265
266 case EndElement:
267 d.translate(&t1.Name, true)
268 if !d.popElement(&t1) {
269 return nil, d.err
270 }
271 t = t1
272 }
273 return
274 }
275
276 const xmlURL = "http://www.w3.org/XML/1998/namespace"
277
278
279
280
281 func (d *Decoder) translate(n *Name, isElementName bool) {
282 switch {
283 case n.Space == "xmlns":
284 return
285 case n.Space == "" && !isElementName:
286 return
287 case n.Space == "xml":
288 n.Space = xmlURL
289 case n.Space == "" && n.Local == "xmlns":
290 return
291 }
292 if v, ok := d.ns[n.Space]; ok {
293 n.Space = v
294 } else if n.Space == "" {
295 n.Space = d.DefaultSpace
296 }
297 }
298
299 func (d *Decoder) switchToReader(r io.Reader) {
300
301
302
303
304 if rb, ok := r.(io.ByteReader); ok {
305 d.r = rb
306 } else {
307 d.r = bufio.NewReader(r)
308 }
309 }
310
311
312
313
314
315 type stack struct {
316 next *stack
317 kind int
318 name Name
319 ok bool
320 }
321
322 const (
323 stkStart = iota
324 stkNs
325 )
326
327 func (d *Decoder) push(kind int) *stack {
328 s := d.free
329 if s != nil {
330 d.free = s.next
331 } else {
332 s = new(stack)
333 }
334 s.next = d.stk
335 s.kind = kind
336 d.stk = s
337 return s
338 }
339
340 func (d *Decoder) pop() *stack {
341 s := d.stk
342 if s != nil {
343 d.stk = s.next
344 s.next = d.free
345 d.free = s
346 }
347 return s
348 }
349
350
351 func (d *Decoder) pushElement(name Name) {
352 s := d.push(stkStart)
353 s.name = name
354 }
355
356
357
358 func (d *Decoder) pushNs(local string, url string, ok bool) {
359 s := d.push(stkNs)
360 s.name.Local = local
361 s.name.Space = url
362 s.ok = ok
363 }
364
365
366 func (d *Decoder) syntaxError(msg string) error {
367 return &SyntaxError{Msg: msg, Line: d.line}
368 }
369
370
371
372
373
374
375
376 func (d *Decoder) popElement(t *EndElement) bool {
377 s := d.pop()
378 name := t.Name
379 switch {
380 case s == nil || s.kind != stkStart:
381 d.err = d.syntaxError("unexpected end element </" + name.Local + ">")
382 return false
383 case s.name.Local != name.Local:
384 if !d.Strict {
385 d.needClose = true
386 d.toClose = t.Name
387 t.Name = s.name
388 return true
389 }
390 d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
391 return false
392 case s.name.Space != name.Space:
393 d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
394 "closed by </" + name.Local + "> in space " + name.Space)
395 return false
396 }
397
398
399
400 for d.stk != nil && d.stk.kind != stkStart {
401 s := d.pop()
402 if s.ok {
403 d.ns[s.name.Local] = s.name.Space
404 } else {
405 delete(d.ns, s.name.Local)
406 }
407 }
408
409 return true
410 }
411
412
413
414 func (d *Decoder) autoClose(t Token) (Token, bool) {
415 if d.stk == nil || d.stk.kind != stkStart {
416 return nil, false
417 }
418 name := strings.ToLower(d.stk.name.Local)
419 for _, s := range d.AutoClose {
420 if strings.ToLower(s) == name {
421
422 et, ok := t.(EndElement)
423 if !ok || et.Name.Local != name {
424 return EndElement{d.stk.name}, true
425 }
426 break
427 }
428 }
429 return nil, false
430 }
431
432
433
434
435 func (d *Decoder) RawToken() (Token, error) {
436 if d.err != nil {
437 return nil, d.err
438 }
439 if d.needClose {
440
441
442
443 d.needClose = false
444 return EndElement{d.toClose}, nil
445 }
446
447 b, ok := d.getc()
448 if !ok {
449 return nil, d.err
450 }
451
452 if b != '<' {
453
454 d.ungetc(b)
455 data := d.text(-1, false)
456 if data == nil {
457 return nil, d.err
458 }
459 return CharData(data), nil
460 }
461
462 if b, ok = d.mustgetc(); !ok {
463 return nil, d.err
464 }
465 switch b {
466 case '/':
467
468 var name Name
469 if name, ok = d.nsname(); !ok {
470 if d.err == nil {
471 d.err = d.syntaxError("expected element name after </")
472 }
473 return nil, d.err
474 }
475 d.space()
476 if b, ok = d.mustgetc(); !ok {
477 return nil, d.err
478 }
479 if b != '>' {
480 d.err = d.syntaxError("invalid characters between </" + name.Local + " and >")
481 return nil, d.err
482 }
483 return EndElement{name}, nil
484
485 case '?':
486
487
488
489 var target string
490 if target, ok = d.name(); !ok {
491 if d.err == nil {
492 d.err = d.syntaxError("expected target name after <?")
493 }
494 return nil, d.err
495 }
496 d.space()
497 d.buf.Reset()
498 var b0 byte
499 for {
500 if b, ok = d.mustgetc(); !ok {
501 return nil, d.err
502 }
503 d.buf.WriteByte(b)
504 if b0 == '?' && b == '>' {
505 break
506 }
507 b0 = b
508 }
509 data := d.buf.Bytes()
510 data = data[0 : len(data)-2]
511
512 if target == "xml" {
513 enc := procInstEncoding(string(data))
514 if enc != "" && enc != "utf-8" && enc != "UTF-8" {
515 if d.CharsetReader == nil {
516 d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc)
517 return nil, d.err
518 }
519 newr, err := d.CharsetReader(enc, d.r.(io.Reader))
520 if err != nil {
521 d.err = fmt.Errorf("xml: opening charset %q: %v", enc, err)
522 return nil, d.err
523 }
524 if newr == nil {
525 panic("CharsetReader returned a nil Reader for charset " + enc)
526 }
527 d.switchToReader(newr)
528 }
529 }
530 return ProcInst{target, data}, nil
531
532 case '!':
533
534 if b, ok = d.mustgetc(); !ok {
535 return nil, d.err
536 }
537 switch b {
538 case '-':
539
540 if b, ok = d.mustgetc(); !ok {
541 return nil, d.err
542 }
543 if b != '-' {
544 d.err = d.syntaxError("invalid sequence <!- not part of <!--")
545 return nil, d.err
546 }
547
548 d.buf.Reset()
549 var b0, b1 byte
550 for {
551 if b, ok = d.mustgetc(); !ok {
552 return nil, d.err
553 }
554 d.buf.WriteByte(b)
555 if b0 == '-' && b1 == '-' && b == '>' {
556 break
557 }
558 b0, b1 = b1, b
559 }
560 data := d.buf.Bytes()
561 data = data[0 : len(data)-3]
562 return Comment(data), nil
563
564 case '[':
565
566 for i := 0; i < 6; i++ {
567 if b, ok = d.mustgetc(); !ok {
568 return nil, d.err
569 }
570 if b != "CDATA["[i] {
571 d.err = d.syntaxError("invalid <![ sequence")
572 return nil, d.err
573 }
574 }
575
576 data := d.text(-1, true)
577 if data == nil {
578 return nil, d.err
579 }
580 return CharData(data), nil
581 }
582
583
584
585
586 d.buf.Reset()
587 d.buf.WriteByte(b)
588 inquote := uint8(0)
589 depth := 0
590 for {
591 if b, ok = d.mustgetc(); !ok {
592 return nil, d.err
593 }
594 if inquote == 0 && b == '>' && depth == 0 {
595 break
596 }
597 HandleB:
598 d.buf.WriteByte(b)
599 switch {
600 case b == inquote:
601 inquote = 0
602
603 case inquote != 0:
604
605
606 case b == '\'' || b == '"':
607 inquote = b
608
609 case b == '>' && inquote == 0:
610 depth--
611
612 case b == '<' && inquote == 0:
613
614 s := "!--"
615 for i := 0; i < len(s); i++ {
616 if b, ok = d.mustgetc(); !ok {
617 return nil, d.err
618 }
619 if b != s[i] {
620 for j := 0; j < i; j++ {
621 d.buf.WriteByte(s[j])
622 }
623 depth++
624 goto HandleB
625 }
626 }
627
628
629 d.buf.Truncate(d.buf.Len() - 1)
630
631
632 var b0, b1 byte
633 for {
634 if b, ok = d.mustgetc(); !ok {
635 return nil, d.err
636 }
637 if b0 == '-' && b1 == '-' && b == '>' {
638 break
639 }
640 b0, b1 = b1, b
641 }
642 }
643 }
644 return Directive(d.buf.Bytes()), nil
645 }
646
647
648 d.ungetc(b)
649
650 var (
651 name Name
652 empty bool
653 attr []Attr
654 )
655 if name, ok = d.nsname(); !ok {
656 if d.err == nil {
657 d.err = d.syntaxError("expected element name after <")
658 }
659 return nil, d.err
660 }
661
662 attr = make([]Attr, 0, 4)
663 for {
664 d.space()
665 if b, ok = d.mustgetc(); !ok {
666 return nil, d.err
667 }
668 if b == '/' {
669 empty = true
670 if b, ok = d.mustgetc(); !ok {
671 return nil, d.err
672 }
673 if b != '>' {
674 d.err = d.syntaxError("expected /> in element")
675 return nil, d.err
676 }
677 break
678 }
679 if b == '>' {
680 break
681 }
682 d.ungetc(b)
683
684 n := len(attr)
685 if n >= cap(attr) {
686 nattr := make([]Attr, n, 2*cap(attr))
687 copy(nattr, attr)
688 attr = nattr
689 }
690 attr = attr[0 : n+1]
691 a := &attr[n]
692 if a.Name, ok = d.nsname(); !ok {
693 if d.err == nil {
694 d.err = d.syntaxError("expected attribute name in element")
695 }
696 return nil, d.err
697 }
698 d.space()
699 if b, ok = d.mustgetc(); !ok {
700 return nil, d.err
701 }
702 if b != '=' {
703 if d.Strict {
704 d.err = d.syntaxError("attribute name without = in element")
705 return nil, d.err
706 } else {
707 d.ungetc(b)
708 a.Value = a.Name.Local
709 }
710 } else {
711 d.space()
712 data := d.attrval()
713 if data == nil {
714 return nil, d.err
715 }
716 a.Value = string(data)
717 }
718 }
719 if empty {
720 d.needClose = true
721 d.toClose = name
722 }
723 return StartElement{name, attr}, nil
724 }
725
726 func (d *Decoder) attrval() []byte {
727 b, ok := d.mustgetc()
728 if !ok {
729 return nil
730 }
731
732 if b == '"' || b == '\'' {
733 return d.text(int(b), false)
734 }
735
736 if d.Strict {
737 d.err = d.syntaxError("unquoted or missing attribute value in element")
738 return nil
739 }
740
741 d.ungetc(b)
742 d.buf.Reset()
743 for {
744 b, ok = d.mustgetc()
745 if !ok {
746 return nil
747 }
748
749 if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' ||
750 '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' {
751 d.buf.WriteByte(b)
752 } else {
753 d.ungetc(b)
754 break
755 }
756 }
757 return d.buf.Bytes()
758 }
759
760
761 func (d *Decoder) space() {
762 for {
763 b, ok := d.getc()
764 if !ok {
765 return
766 }
767 switch b {
768 case ' ', '\r', '\n', '\t':
769 default:
770 d.ungetc(b)
771 return
772 }
773 }
774 }
775
776
777
778
779
780 func (d *Decoder) getc() (b byte, ok bool) {
781 if d.err != nil {
782 return 0, false
783 }
784 if d.nextByte >= 0 {
785 b = byte(d.nextByte)
786 d.nextByte = -1
787 } else {
788 b, d.err = d.r.ReadByte()
789 if d.err != nil {
790 return 0, false
791 }
792 if d.saved != nil {
793 d.saved.WriteByte(b)
794 }
795 }
796 if b == '\n' {
797 d.line++
798 }
799 return b, true
800 }
801
802
803
804 func (d *Decoder) savedOffset() int {
805 n := d.saved.Len()
806 if d.nextByte >= 0 {
807 n--
808 }
809 return n
810 }
811
812
813
814
815
816 func (d *Decoder) mustgetc() (b byte, ok bool) {
817 if b, ok = d.getc(); !ok {
818 if d.err == io.EOF {
819 d.err = d.syntaxError("unexpected EOF")
820 }
821 }
822 return
823 }
824
825
826 func (d *Decoder) ungetc(b byte) {
827 if b == '\n' {
828 d.line--
829 }
830 d.nextByte = int(b)
831 }
832
833 var entity = map[string]int{
834 "lt": '<',
835 "gt": '>',
836 "amp": '&',
837 "apos": '\'',
838 "quot": '"',
839 }
840
841
842
843
844
845 func (d *Decoder) text(quote int, cdata bool) []byte {
846 var b0, b1 byte
847 var trunc int
848 d.buf.Reset()
849 Input:
850 for {
851 b, ok := d.getc()
852 if !ok {
853 if cdata {
854 if d.err == io.EOF {
855 d.err = d.syntaxError("unexpected EOF in CDATA section")
856 }
857 return nil
858 }
859 break Input
860 }
861
862
863
864 if b0 == ']' && b1 == ']' && b == '>' {
865 if cdata {
866 trunc = 2
867 break Input
868 }
869 d.err = d.syntaxError("unescaped ]]> not in CDATA section")
870 return nil
871 }
872
873
874 if b == '<' && !cdata {
875 if quote >= 0 {
876 d.err = d.syntaxError("unescaped < inside quoted string")
877 return nil
878 }
879 d.ungetc('<')
880 break Input
881 }
882 if quote >= 0 && b == byte(quote) {
883 break Input
884 }
885 if b == '&' && !cdata {
886
887
888
889
890
891 before := d.buf.Len()
892 d.buf.WriteByte('&')
893 var ok bool
894 var text string
895 var haveText bool
896 if b, ok = d.mustgetc(); !ok {
897 return nil
898 }
899 if b == '#' {
900 d.buf.WriteByte(b)
901 if b, ok = d.mustgetc(); !ok {
902 return nil
903 }
904 base := 10
905 if b == 'x' {
906 base = 16
907 d.buf.WriteByte(b)
908 if b, ok = d.mustgetc(); !ok {
909 return nil
910 }
911 }
912 start := d.buf.Len()
913 for '0' <= b && b <= '9' ||
914 base == 16 && 'a' <= b && b <= 'f' ||
915 base == 16 && 'A' <= b && b <= 'F' {
916 d.buf.WriteByte(b)
917 if b, ok = d.mustgetc(); !ok {
918 return nil
919 }
920 }
921 if b != ';' {
922 d.ungetc(b)
923 } else {
924 s := string(d.buf.Bytes()[start:])
925 d.buf.WriteByte(';')
926 n, err := strconv.ParseUint(s, base, 64)
927 if err == nil && n <= unicode.MaxRune {
928 text = string(n)
929 haveText = true
930 }
931 }
932 } else {
933 d.ungetc(b)
934 if !d.readName() {
935 if d.err != nil {
936 return nil
937 }
938 ok = false
939 }
940 if b, ok = d.mustgetc(); !ok {
941 return nil
942 }
943 if b != ';' {
944 d.ungetc(b)
945 } else {
946 name := d.buf.Bytes()[before+1:]
947 d.buf.WriteByte(';')
948 if isName(name) {
949 s := string(name)
950 if r, ok := entity[s]; ok {
951 text = string(r)
952 haveText = true
953 } else if d.Entity != nil {
954 text, haveText = d.Entity[s]
955 }
956 }
957 }
958 }
959
960 if haveText {
961 d.buf.Truncate(before)
962 d.buf.Write([]byte(text))
963 b0, b1 = 0, 0
964 continue Input
965 }
966 if !d.Strict {
967 b0, b1 = 0, 0
968 continue Input
969 }
970 ent := string(d.buf.Bytes()[before:])
971 if ent[len(ent)-1] != ';' {
972 ent += " (no semicolon)"
973 }
974 d.err = d.syntaxError("invalid character entity " + ent)
975 return nil
976 }
977
978
979 if b == '\r' {
980 d.buf.WriteByte('\n')
981 } else if b1 == '\r' && b == '\n' {
982
983 } else {
984 d.buf.WriteByte(b)
985 }
986
987 b0, b1 = b1, b
988 }
989 data := d.buf.Bytes()
990 data = data[0 : len(data)-trunc]
991
992
993 buf := data
994 for len(buf) > 0 {
995 r, size := utf8.DecodeRune(buf)
996 if r == utf8.RuneError && size == 1 {
997 d.err = d.syntaxError("invalid UTF-8")
998 return nil
999 }
1000 buf = buf[size:]
1001 if !isInCharacterRange(r) {
1002 d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r))
1003 return nil
1004 }
1005 }
1006
1007 return data
1008 }
1009
1010
1011
1012
1013 func isInCharacterRange(r rune) (inrange bool) {
1014 return r == 0x09 ||
1015 r == 0x0A ||
1016 r == 0x0D ||
1017 r >= 0x20 && r <= 0xDF77 ||
1018 r >= 0xE000 && r <= 0xFFFD ||
1019 r >= 0x10000 && r <= 0x10FFFF
1020 }
1021
1022
1023
1024 func (d *Decoder) nsname() (name Name, ok bool) {
1025 s, ok := d.name()
1026 if !ok {
1027 return
1028 }
1029 i := strings.Index(s, ":")
1030 if i < 0 {
1031 name.Local = s
1032 } else {
1033 name.Space = s[0:i]
1034 name.Local = s[i+1:]
1035 }
1036 return name, true
1037 }
1038
1039
1040
1041
1042 func (d *Decoder) name() (s string, ok bool) {
1043 d.buf.Reset()
1044 if !d.readName() {
1045 return "", false
1046 }
1047
1048
1049 s = d.buf.String()
1050 if !isName([]byte(s)) {
1051 d.err = d.syntaxError("invalid XML name: " + s)
1052 return "", false
1053 }
1054 return s, true
1055 }
1056
1057
1058
1059
1060 func (d *Decoder) readName() (ok bool) {
1061 var b byte
1062 if b, ok = d.mustgetc(); !ok {
1063 return
1064 }
1065 if b < utf8.RuneSelf && !isNameByte(b) {
1066 d.ungetc(b)
1067 return false
1068 }
1069 d.buf.WriteByte(b)
1070
1071 for {
1072 if b, ok = d.mustgetc(); !ok {
1073 return
1074 }
1075 if b < utf8.RuneSelf && !isNameByte(b) {
1076 d.ungetc(b)
1077 break
1078 }
1079 d.buf.WriteByte(b)
1080 }
1081 return true
1082 }
1083
1084 func isNameByte(c byte) bool {
1085 return 'A' <= c && c <= 'Z' ||
1086 'a' <= c && c <= 'z' ||
1087 '0' <= c && c <= '9' ||
1088 c == '_' || c == ':' || c == '.' || c == '-'
1089 }
1090
1091 func isName(s []byte) bool {
1092 if len(s) == 0 {
1093 return false
1094 }
1095 c, n := utf8.DecodeRune(s)
1096 if c == utf8.RuneError && n == 1 {
1097 return false
1098 }
1099 if !unicode.Is(first, c) {
1100 return false
1101 }
1102 for n < len(s) {
1103 s = s[n:]
1104 c, n = utf8.DecodeRune(s)
1105 if c == utf8.RuneError && n == 1 {
1106 return false
1107 }
1108 if !unicode.Is(first, c) && !unicode.Is(second, c) {
1109 return false
1110 }
1111 }
1112 return true
1113 }
1114
1115
1116
1117
1118
1119
1120 var first = &unicode.RangeTable{
1121 R16: []unicode.Range16{
1122 {0x003A, 0x003A, 1},
1123 {0x0041, 0x005A, 1},
1124 {0x005F, 0x005F, 1},
1125 {0x0061, 0x007A, 1},
1126 {0x00C0, 0x00D6, 1},
1127 {0x00D8, 0x00F6, 1},
1128 {0x00F8, 0x00FF, 1},
1129 {0x0100, 0x0131, 1},
1130 {0x0134, 0x013E, 1},
1131 {0x0141, 0x0148, 1},
1132 {0x014A, 0x017E, 1},
1133 {0x0180, 0x01C3, 1},
1134 {0x01CD, 0x01F0, 1},
1135 {0x01F4, 0x01F5, 1},
1136 {0x01FA, 0x0217, 1},
1137 {0x0250, 0x02A8, 1},
1138 {0x02BB, 0x02C1, 1},
1139 {0x0386, 0x0386, 1},
1140 {0x0388, 0x038A, 1},
1141 {0x038C, 0x038C, 1},
1142 {0x038E, 0x03A1, 1},
1143 {0x03A3, 0x03CE, 1},
1144 {0x03D0, 0x03D6, 1},
1145 {0x03DA, 0x03E0, 2},
1146 {0x03E2, 0x03F3, 1},
1147 {0x0401, 0x040C, 1},
1148 {0x040E, 0x044F, 1},
1149 {0x0451, 0x045C, 1},
1150 {0x045E, 0x0481, 1},
1151 {0x0490, 0x04C4, 1},
1152 {0x04C7, 0x04C8, 1},
1153 {0x04CB, 0x04CC, 1},
1154 {0x04D0, 0x04EB, 1},
1155 {0x04EE, 0x04F5, 1},
1156 {0x04F8, 0x04F9, 1},
1157 {0x0531, 0x0556, 1},
1158 {0x0559, 0x0559, 1},
1159 {0x0561, 0x0586, 1},
1160 {0x05D0, 0x05EA, 1},
1161 {0x05F0, 0x05F2, 1},
1162 {0x0621, 0x063A, 1},
1163 {0x0641, 0x064A, 1},
1164 {0x0671, 0x06B7, 1},
1165 {0x06BA, 0x06BE, 1},
1166 {0x06C0, 0x06CE, 1},
1167 {0x06D0, 0x06D3, 1},
1168 {0x06D5, 0x06D5, 1},
1169 {0x06E5, 0x06E6, 1},
1170 {0x0905, 0x0939, 1},
1171 {0x093D, 0x093D, 1},
1172 {0x0958, 0x0961, 1},
1173 {0x0985, 0x098C, 1},
1174 {0x098F, 0x0990, 1},
1175 {0x0993, 0x09A8, 1},
1176 {0x09AA, 0x09B0, 1},
1177 {0x09B2, 0x09B2, 1},
1178 {0x09B6, 0x09B9, 1},
1179 {0x09DC, 0x09DD, 1},
1180 {0x09DF, 0x09E1, 1},
1181 {0x09F0, 0x09F1, 1},
1182 {0x0A05, 0x0A0A, 1},
1183 {0x0A0F, 0x0A10, 1},
1184 {0x0A13, 0x0A28, 1},
1185 {0x0A2A, 0x0A30, 1},
1186 {0x0A32, 0x0A33, 1},
1187 {0x0A35, 0x0A36, 1},
1188 {0x0A38, 0x0A39, 1},
1189 {0x0A59, 0x0A5C, 1},
1190 {0x0A5E, 0x0A5E, 1},
1191 {0x0A72, 0x0A74, 1},
1192 {0x0A85, 0x0A8B, 1},
1193 {0x0A8D, 0x0A8D, 1},
1194 {0x0A8F, 0x0A91, 1},
1195 {0x0A93, 0x0AA8, 1},
1196 {0x0AAA, 0x0AB0, 1},
1197 {0x0AB2, 0x0AB3, 1},
1198 {0x0AB5, 0x0AB9, 1},
1199 {0x0ABD, 0x0AE0, 0x23},
1200 {0x0B05, 0x0B0C, 1},
1201 {0x0B0F, 0x0B10, 1},
1202 {0x0B13, 0x0B28, 1},
1203 {0x0B2A, 0x0B30, 1},
1204 {0x0B32, 0x0B33, 1},
1205 {0x0B36, 0x0B39, 1},
1206 {0x0B3D, 0x0B3D, 1},
1207 {0x0B5C, 0x0B5D, 1},
1208 {0x0B5F, 0x0B61, 1},
1209 {0x0B85, 0x0B8A, 1},
1210 {0x0B8E, 0x0B90, 1},
1211 {0x0B92, 0x0B95, 1},
1212 {0x0B99, 0x0B9A, 1},
1213 {0x0B9C, 0x0B9C, 1},
1214 {0x0B9E, 0x0B9F, 1},
1215 {0x0BA3, 0x0BA4, 1},
1216 {0x0BA8, 0x0BAA, 1},
1217 {0x0BAE, 0x0BB5, 1},
1218 {0x0BB7, 0x0BB9, 1},
1219 {0x0C05, 0x0C0C, 1},
1220 {0x0C0E, 0x0C10, 1},
1221 {0x0C12, 0x0C28, 1},
1222 {0x0C2A, 0x0C33, 1},
1223 {0x0C35, 0x0C39, 1},
1224 {0x0C60, 0x0C61, 1},
1225 {0x0C85, 0x0C8C, 1},
1226 {0x0C8E, 0x0C90, 1},
1227 {0x0C92, 0x0CA8, 1},
1228 {0x0CAA, 0x0CB3, 1},
1229 {0x0CB5, 0x0CB9, 1},
1230 {0x0CDE, 0x0CDE, 1},
1231 {0x0CE0, 0x0CE1, 1},
1232 {0x0D05, 0x0D0C, 1},
1233 {0x0D0E, 0x0D10, 1},
1234 {0x0D12, 0x0D28, 1},
1235 {0x0D2A, 0x0D39, 1},
1236 {0x0D60, 0x0D61, 1},
1237 {0x0E01, 0x0E2E, 1},
1238 {0x0E30, 0x0E30, 1},
1239 {0x0E32, 0x0E33, 1},
1240 {0x0E40, 0x0E45, 1},
1241 {0x0E81, 0x0E82, 1},
1242 {0x0E84, 0x0E84, 1},
1243 {0x0E87, 0x0E88, 1},
1244 {0x0E8A, 0x0E8D, 3},
1245 {0x0E94, 0x0E97, 1},
1246 {0x0E99, 0x0E9F, 1},
1247 {0x0EA1, 0x0EA3, 1},
1248 {0x0EA5, 0x0EA7, 2},
1249 {0x0EAA, 0x0EAB, 1},
1250 {0x0EAD, 0x0EAE, 1},
1251 {0x0EB0, 0x0EB0, 1},
1252 {0x0EB2, 0x0EB3, 1},
1253 {0x0EBD, 0x0EBD, 1},
1254 {0x0EC0, 0x0EC4, 1},
1255 {0x0F40, 0x0F47, 1},
1256 {0x0F49, 0x0F69, 1},
1257 {0x10A0, 0x10C5, 1},
1258 {0x10D0, 0x10F6, 1},
1259 {0x1100, 0x1100, 1},
1260 {0x1102, 0x1103, 1},
1261 {0x1105, 0x1107, 1},
1262 {0x1109, 0x1109, 1},
1263 {0x110B, 0x110C, 1},
1264 {0x110E, 0x1112, 1},
1265 {0x113C, 0x1140, 2},
1266 {0x114C, 0x1150, 2},
1267 {0x1154, 0x1155, 1},
1268 {0x1159, 0x1159, 1},
1269 {0x115F, 0x1161, 1},
1270 {0x1163, 0x1169, 2},
1271 {0x116D, 0x116E, 1},
1272 {0x1172, 0x1173, 1},
1273 {0x1175, 0x119E, 0x119E - 0x1175},
1274 {0x11A8, 0x11AB, 0x11AB - 0x11A8},
1275 {0x11AE, 0x11AF, 1},
1276 {0x11B7, 0x11B8, 1},
1277 {0x11BA, 0x11BA, 1},
1278 {0x11BC, 0x11C2, 1},
1279 {0x11EB, 0x11F0, 0x11F0 - 0x11EB},
1280 {0x11F9, 0x11F9, 1},
1281 {0x1E00, 0x1E9B, 1},
1282 {0x1EA0, 0x1EF9, 1},
1283 {0x1F00, 0x1F15, 1},
1284 {0x1F18, 0x1F1D, 1},
1285 {0x1F20, 0x1F45, 1},
1286 {0x1F48, 0x1F4D, 1},
1287 {0x1F50, 0x1F57, 1},
1288 {0x1F59, 0x1F5B, 0x1F5B - 0x1F59},
1289 {0x1F5D, 0x1F5D, 1},
1290 {0x1F5F, 0x1F7D, 1},
1291 {0x1F80, 0x1FB4, 1},
1292 {0x1FB6, 0x1FBC, 1},
1293 {0x1FBE, 0x1FBE, 1},
1294 {0x1FC2, 0x1FC4, 1},
1295 {0x1FC6, 0x1FCC, 1},
1296 {0x1FD0, 0x1FD3, 1},
1297 {0x1FD6, 0x1FDB, 1},
1298 {0x1FE0, 0x1FEC, 1},
1299 {0x1FF2, 0x1FF4, 1},
1300 {0x1FF6, 0x1FFC, 1},
1301 {0x2126, 0x2126, 1},
1302 {0x212A, 0x212B, 1},
1303 {0x212E, 0x212E, 1},
1304 {0x2180, 0x2182, 1},
1305 {0x3007, 0x3007, 1},
1306 {0x3021, 0x3029, 1},
1307 {0x3041, 0x3094, 1},
1308 {0x30A1, 0x30FA, 1},
1309 {0x3105, 0x312C, 1},
1310 {0x4E00, 0x9FA5, 1},
1311 {0xAC00, 0xD7A3, 1},
1312 },
1313 }
1314
1315 var second = &unicode.RangeTable{
1316 R16: []unicode.Range16{
1317 {0x002D, 0x002E, 1},
1318 {0x0030, 0x0039, 1},
1319 {0x00B7, 0x00B7, 1},
1320 {0x02D0, 0x02D1, 1},
1321 {0x0300, 0x0345, 1},
1322 {0x0360, 0x0361, 1},
1323 {0x0387, 0x0387, 1},
1324 {0x0483, 0x0486, 1},
1325 {0x0591, 0x05A1, 1},
1326 {0x05A3, 0x05B9, 1},
1327 {0x05BB, 0x05BD, 1},
1328 {0x05BF, 0x05BF, 1},
1329 {0x05C1, 0x05C2, 1},
1330 {0x05C4, 0x0640, 0x0640 - 0x05C4},
1331 {0x064B, 0x0652, 1},
1332 {0x0660, 0x0669, 1},
1333 {0x0670, 0x0670, 1},
1334 {0x06D6, 0x06DC, 1},
1335 {0x06DD, 0x06DF, 1},
1336 {0x06E0, 0x06E4, 1},
1337 {0x06E7, 0x06E8, 1},
1338 {0x06EA, 0x06ED, 1},
1339 {0x06F0, 0x06F9, 1},
1340 {0x0901, 0x0903, 1},
1341 {0x093C, 0x093C, 1},
1342 {0x093E, 0x094C, 1},
1343 {0x094D, 0x094D, 1},
1344 {0x0951, 0x0954, 1},
1345 {0x0962, 0x0963, 1},
1346 {0x0966, 0x096F, 1},
1347 {0x0981, 0x0983, 1},
1348 {0x09BC, 0x09BC, 1},
1349 {0x09BE, 0x09BF, 1},
1350 {0x09C0, 0x09C4, 1},
1351 {0x09C7, 0x09C8, 1},
1352 {0x09CB, 0x09CD, 1},
1353 {0x09D7, 0x09D7, 1},
1354 {0x09E2, 0x09E3, 1},
1355 {0x09E6, 0x09EF, 1},
1356 {0x0A02, 0x0A3C, 0x3A},
1357 {0x0A3E, 0x0A3F, 1},
1358 {0x0A40, 0x0A42, 1},
1359 {0x0A47, 0x0A48, 1},
1360 {0x0A4B, 0x0A4D, 1},
1361 {0x0A66, 0x0A6F, 1},
1362 {0x0A70, 0x0A71, 1},
1363 {0x0A81, 0x0A83, 1},
1364 {0x0ABC, 0x0ABC, 1},
1365 {0x0ABE, 0x0AC5, 1},
1366 {0x0AC7, 0x0AC9, 1},
1367 {0x0ACB, 0x0ACD, 1},
1368 {0x0AE6, 0x0AEF, 1},
1369 {0x0B01, 0x0B03, 1},
1370 {0x0B3C, 0x0B3C, 1},
1371 {0x0B3E, 0x0B43, 1},
1372 {0x0B47, 0x0B48, 1},
1373 {0x0B4B, 0x0B4D, 1},
1374 {0x0B56, 0x0B57, 1},
1375 {0x0B66, 0x0B6F, 1},
1376 {0x0B82, 0x0B83, 1},
1377 {0x0BBE, 0x0BC2, 1},
1378 {0x0BC6, 0x0BC8, 1},
1379 {0x0BCA, 0x0BCD, 1},
1380 {0x0BD7, 0x0BD7, 1},
1381 {0x0BE7, 0x0BEF, 1},
1382 {0x0C01, 0x0C03, 1},
1383 {0x0C3E, 0x0C44, 1},
1384 {0x0C46, 0x0C48, 1},
1385 {0x0C4A, 0x0C4D, 1},
1386 {0x0C55, 0x0C56, 1},
1387 {0x0C66, 0x0C6F, 1},
1388 {0x0C82, 0x0C83, 1},
1389 {0x0CBE, 0x0CC4, 1},
1390 {0x0CC6, 0x0CC8, 1},
1391 {0x0CCA, 0x0CCD, 1},
1392 {0x0CD5, 0x0CD6, 1},
1393 {0x0CE6, 0x0CEF, 1},
1394 {0x0D02, 0x0D03, 1},
1395 {0x0D3E, 0x0D43, 1},
1396 {0x0D46, 0x0D48, 1},
1397 {0x0D4A, 0x0D4D, 1},
1398 {0x0D57, 0x0D57, 1},
1399 {0x0D66, 0x0D6F, 1},
1400 {0x0E31, 0x0E31, 1},
1401 {0x0E34, 0x0E3A, 1},
1402 {0x0E46, 0x0E46, 1},
1403 {0x0E47, 0x0E4E, 1},
1404 {0x0E50, 0x0E59, 1},
1405 {0x0EB1, 0x0EB1, 1},
1406 {0x0EB4, 0x0EB9, 1},
1407 {0x0EBB, 0x0EBC, 1},
1408 {0x0EC6, 0x0EC6, 1},
1409 {0x0EC8, 0x0ECD, 1},
1410 {0x0ED0, 0x0ED9, 1},
1411 {0x0F18, 0x0F19, 1},
1412 {0x0F20, 0x0F29, 1},
1413 {0x0F35, 0x0F39, 2},
1414 {0x0F3E, 0x0F3F, 1},
1415 {0x0F71, 0x0F84, 1},
1416 {0x0F86, 0x0F8B, 1},
1417 {0x0F90, 0x0F95, 1},
1418 {0x0F97, 0x0F97, 1},
1419 {0x0F99, 0x0FAD, 1},
1420 {0x0FB1, 0x0FB7, 1},
1421 {0x0FB9, 0x0FB9, 1},
1422 {0x20D0, 0x20DC, 1},
1423 {0x20E1, 0x3005, 0x3005 - 0x20E1},
1424 {0x302A, 0x302F, 1},
1425 {0x3031, 0x3035, 1},
1426 {0x3099, 0x309A, 1},
1427 {0x309D, 0x309E, 1},
1428 {0x30FC, 0x30FE, 1},
1429 },
1430 }
1431
1432
1433
1434 var HTMLEntity = htmlEntity
1435
1436 var htmlEntity = map[string]string{
1437 1438 1439 1440 1441 1442 1443 1444
1445 "nbsp": "\u00A0",
1446 "iexcl": "\u00A1",
1447 "cent": "\u00A2",
1448 "pound": "\u00A3",
1449 "curren": "\u00A4",
1450 "yen": "\u00A5",
1451 "brvbar": "\u00A6",
1452 "sect": "\u00A7",
1453 "uml": "\u00A8",
1454 "copy": "\u00A9",
1455 "ordf": "\u00AA",
1456 "laquo": "\u00AB",
1457 "not": "\u00AC",
1458 "shy": "\u00AD",
1459 "reg": "\u00AE",
1460 "macr": "\u00AF",
1461 "deg": "\u00B0",
1462 "plusmn": "\u00B1",
1463 "sup2": "\u00B2",
1464 "sup3": "\u00B3",
1465 "acute": "\u00B4",
1466 "micro": "\u00B5",
1467 "para": "\u00B6",
1468 "middot": "\u00B7",
1469 "cedil": "\u00B8",
1470 "sup1": "\u00B9",
1471 "ordm": "\u00BA",
1472 "raquo": "\u00BB",
1473 "frac14": "\u00BC",
1474 "frac12": "\u00BD",
1475 "frac34": "\u00BE",
1476 "iquest": "\u00BF",
1477 "Agrave": "\u00C0",
1478 "Aacute": "\u00C1",
1479 "Acirc": "\u00C2",
1480 "Atilde": "\u00C3",
1481 "Auml": "\u00C4",
1482 "Aring": "\u00C5",
1483 "AElig": "\u00C6",
1484 "Ccedil": "\u00C7",
1485 "Egrave": "\u00C8",
1486 "Eacute": "\u00C9",
1487 "Ecirc": "\u00CA",
1488 "Euml": "\u00CB",
1489 "Igrave": "\u00CC",
1490 "Iacute": "\u00CD",
1491 "Icirc": "\u00CE",
1492 "Iuml": "\u00CF",
1493 "ETH": "\u00D0",
1494 "Ntilde": "\u00D1",
1495 "Ograve": "\u00D2",
1496 "Oacute": "\u00D3",
1497 "Ocirc": "\u00D4",
1498 "Otilde": "\u00D5",
1499 "Ouml": "\u00D6",
1500 "times": "\u00D7",
1501 "Oslash": "\u00D8",
1502 "Ugrave": "\u00D9",
1503 "Uacute": "\u00DA",
1504 "Ucirc": "\u00DB",
1505 "Uuml": "\u00DC",
1506 "Yacute": "\u00DD",
1507 "THORN": "\u00DE",
1508 "szlig": "\u00DF",
1509 "agrave": "\u00E0",
1510 "aacute": "\u00E1",
1511 "acirc": "\u00E2",
1512 "atilde": "\u00E3",
1513 "auml": "\u00E4",
1514 "aring": "\u00E5",
1515 "aelig": "\u00E6",
1516 "ccedil": "\u00E7",
1517 "egrave": "\u00E8",
1518 "eacute": "\u00E9",
1519 "ecirc": "\u00EA",
1520 "euml": "\u00EB",
1521 "igrave": "\u00EC",
1522 "iacute": "\u00ED",
1523 "icirc": "\u00EE",
1524 "iuml": "\u00EF",
1525 "eth": "\u00F0",
1526 "ntilde": "\u00F1",
1527 "ograve": "\u00F2",
1528 "oacute": "\u00F3",
1529 "ocirc": "\u00F4",
1530 "otilde": "\u00F5",
1531 "ouml": "\u00F6",
1532 "divide": "\u00F7",
1533 "oslash": "\u00F8",
1534 "ugrave": "\u00F9",
1535 "uacute": "\u00FA",
1536 "ucirc": "\u00FB",
1537 "uuml": "\u00FC",
1538 "yacute": "\u00FD",
1539 "thorn": "\u00FE",
1540 "yuml": "\u00FF",
1541 "fnof": "\u0192",
1542 "Alpha": "\u0391",
1543 "Beta": "\u0392",
1544 "Gamma": "\u0393",
1545 "Delta": "\u0394",
1546 "Epsilon": "\u0395",
1547 "Zeta": "\u0396",
1548 "Eta": "\u0397",
1549 "Theta": "\u0398",
1550 "Iota": "\u0399",
1551 "Kappa": "\u039A",
1552 "Lambda": "\u039B",
1553 "Mu": "\u039C",
1554 "Nu": "\u039D",
1555 "Xi": "\u039E",
1556 "Omicron": "\u039F",
1557 "Pi": "\u03A0",
1558 "Rho": "\u03A1",
1559 "Sigma": "\u03A3",
1560 "Tau": "\u03A4",
1561 "Upsilon": "\u03A5",
1562 "Phi": "\u03A6",
1563 "Chi": "\u03A7",
1564 "Psi": "\u03A8",
1565 "Omega": "\u03A9",
1566 "alpha": "\u03B1",
1567 "beta": "\u03B2",
1568 "gamma": "\u03B3",
1569 "delta": "\u03B4",
1570 "epsilon": "\u03B5",
1571 "zeta": "\u03B6",
1572 "eta": "\u03B7",
1573 "theta": "\u03B8",
1574 "iota": "\u03B9",
1575 "kappa": "\u03BA",
1576 "lambda": "\u03BB",
1577 "mu": "\u03BC",
1578 "nu": "\u03BD",
1579 "xi": "\u03BE",
1580 "omicron": "\u03BF",
1581 "pi": "\u03C0",
1582 "rho": "\u03C1",
1583 "sigmaf": "\u03C2",
1584 "sigma": "\u03C3",
1585 "tau": "\u03C4",
1586 "upsilon": "\u03C5",
1587 "phi": "\u03C6",
1588 "chi": "\u03C7",
1589 "psi": "\u03C8",
1590 "omega": "\u03C9",
1591 "thetasym": "\u03D1",
1592 "upsih": "\u03D2",
1593 "piv": "\u03D6",
1594 "bull": "\u2022",
1595 "hellip": "\u2026",
1596 "prime": "\u2032",
1597 "Prime": "\u2033",
1598 "oline": "\u203E",
1599 "frasl": "\u2044",
1600 "weierp": "\u2118",
1601 "image": "\u2111",
1602 "real": "\u211C",
1603 "trade": "\u2122",
1604 "alefsym": "\u2135",
1605 "larr": "\u2190",
1606 "uarr": "\u2191",
1607 "rarr": "\u2192",
1608 "darr": "\u2193",
1609 "harr": "\u2194",
1610 "crarr": "\u21B5",
1611 "lArr": "\u21D0",
1612 "uArr": "\u21D1",
1613 "rArr": "\u21D2",
1614 "dArr": "\u21D3",
1615 "hArr": "\u21D4",
1616 "forall": "\u2200",
1617 "part": "\u2202",
1618 "exist": "\u2203",
1619 "empty": "\u2205",
1620 "nabla": "\u2207",
1621 "isin": "\u2208",
1622 "notin": "\u2209",
1623 "ni": "\u220B",
1624 "prod": "\u220F",
1625 "sum": "\u2211",
1626 "minus": "\u2212",
1627 "lowast": "\u2217",
1628 "radic": "\u221A",
1629 "prop": "\u221D",
1630 "infin": "\u221E",
1631 "ang": "\u2220",
1632 "and": "\u2227",
1633 "or": "\u2228",
1634 "cap": "\u2229",
1635 "cup": "\u222A",
1636 "int": "\u222B",
1637 "there4": "\u2234",
1638 "sim": "\u223C",
1639 "cong": "\u2245",
1640 "asymp": "\u2248",
1641 "ne": "\u2260",
1642 "equiv": "\u2261",
1643 "le": "\u2264",
1644 "ge": "\u2265",
1645 "sub": "\u2282",
1646 "sup": "\u2283",
1647 "nsub": "\u2284",
1648 "sube": "\u2286",
1649 "supe": "\u2287",
1650 "oplus": "\u2295",
1651 "otimes": "\u2297",
1652 "perp": "\u22A5",
1653 "sdot": "\u22C5",
1654 "lceil": "\u2308",
1655 "rceil": "\u2309",
1656 "lfloor": "\u230A",
1657 "rfloor": "\u230B",
1658 "lang": "\u2329",
1659 "rang": "\u232A",
1660 "loz": "\u25CA",
1661 "spades": "\u2660",
1662 "clubs": "\u2663",
1663 "hearts": "\u2665",
1664 "diams": "\u2666",
1665 "quot": "\u0022",
1666 "amp": "\u0026",
1667 "lt": "\u003C",
1668 "gt": "\u003E",
1669 "OElig": "\u0152",
1670 "oelig": "\u0153",
1671 "Scaron": "\u0160",
1672 "scaron": "\u0161",
1673 "Yuml": "\u0178",
1674 "circ": "\u02C6",
1675 "tilde": "\u02DC",
1676 "ensp": "\u2002",
1677 "emsp": "\u2003",
1678 "thinsp": "\u2009",
1679 "zwnj": "\u200C",
1680 "zwj": "\u200D",
1681 "lrm": "\u200E",
1682 "rlm": "\u200F",
1683 "ndash": "\u2013",
1684 "mdash": "\u2014",
1685 "lsquo": "\u2018",
1686 "rsquo": "\u2019",
1687 "sbquo": "\u201A",
1688 "ldquo": "\u201C",
1689 "rdquo": "\u201D",
1690 "bdquo": "\u201E",
1691 "dagger": "\u2020",
1692 "Dagger": "\u2021",
1693 "permil": "\u2030",
1694 "lsaquo": "\u2039",
1695 "rsaquo": "\u203A",
1696 "euro": "\u20AC",
1697 }
1698
1699
1700
1701 var HTMLAutoClose = htmlAutoClose
1702
1703 var htmlAutoClose = []string{
1704 1705 1706 1707
1708 "basefont",
1709 "br",
1710 "area",
1711 "link",
1712 "img",
1713 "param",
1714 "hr",
1715 "input",
1716 "col",
1717 "frame",
1718 "isindex",
1719 "base",
1720 "meta",
1721 }
1722
1723 var (
1724 esc_quot = []byte(""")
1725 esc_apos = []byte("'")
1726 esc_amp = []byte("&")
1727 esc_lt = []byte("<")
1728 esc_gt = []byte(">")
1729 esc_tab = []byte("	")
1730 esc_nl = []byte("
")
1731 esc_cr = []byte("
")
1732 esc_fffd = []byte("\uFFFD")
1733 )
1734
1735
1736
1737 func EscapeText(w io.Writer, s []byte) error {
1738 var esc []byte
1739 last := 0
1740 for i := 0; i < len(s); {
1741 r, width := utf8.DecodeRune(s[i:])
1742 i += width
1743 switch r {
1744 case '"':
1745 esc = esc_quot
1746 case '\'':
1747 esc = esc_apos
1748 case '&':
1749 esc = esc_amp
1750 case '<':
1751 esc = esc_lt
1752 case '>':
1753 esc = esc_gt
1754 case '\t':
1755 esc = esc_tab
1756 case '\n':
1757 esc = esc_nl
1758 case '\r':
1759 esc = esc_cr
1760 default:
1761 if !isInCharacterRange(r) {
1762 esc = esc_fffd
1763 break
1764 }
1765 continue
1766 }
1767 if _, err := w.Write(s[last : i-width]); err != nil {
1768 return err
1769 }
1770 if _, err := w.Write(esc); err != nil {
1771 return err
1772 }
1773 last = i
1774 }
1775 if _, err := w.Write(s[last:]); err != nil {
1776 return err
1777 }
1778 return nil
1779 }
1780
1781
1782
1783
1784 func Escape(w io.Writer, s []byte) {
1785 EscapeText(w, s)
1786 }
1787
1788
1789
1790 func procInstEncoding(s string) string {
1791
1792
1793 idx := strings.Index(s, "encoding=")
1794 if idx == -1 {
1795 return ""
1796 }
1797 v := s[idx+len("encoding="):]
1798 if v == "" {
1799 return ""
1800 }
1801 if v[0] != '\'' && v[0] != '"' {
1802 return ""
1803 }
1804 idx = strings.IndexRune(v[1:], rune(v[0]))
1805 if idx == -1 {
1806 return ""
1807 }
1808 return v[1 : idx+1]
1809 }
View as plain text