// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "strings" "testing" ) type unescapeTest struct { // A short description of the test case. desc string // The HTML text. html string // The unescaped text. unescaped string } var unescapeTests = []unescapeTest{ // Handle no entities. { "copy", "A\ttext\nstring", "A\ttext\nstring", }, // Handle simple named entities. { "simple", "& > <", "& > <", }, // Handle hitting the end of the string. { "stringEnd", "& &", "& &", }, // Handle entities with two codepoints. { "multiCodepoint", "text ⋛︀ blah", "text \u22db\ufe00 blah", }, // Handle decimal numeric entities. { "decimalEntity", "Delta = Δ ", "Delta = Δ ", }, // Handle hexadecimal numeric entities. { "hexadecimalEntity", "Lambda = λ = λ ", "Lambda = λ = λ ", }, // Handle numeric early termination. { "numericEnds", "&# &#x €43 © = ©f = ©", "&# &#x €43 © = ©f = ©", }, // Handle numeric ISO-8859-1 entity replacements. { "numericReplacements", "Footnote‡", "Footnote‡", }, // Handle single ampersand. { "copySingleAmpersand", "&", "&", }, // Handle ampersand followed by non-entity. { "copyAmpersandNonEntity", "text &test", "text &test", }, // Handle "&#". { "copyAmpersandHash", "text &#", "text &#", }, } func TestUnescape(t *testing.T) { for _, tt := range unescapeTests { unescaped := UnescapeString(tt.html) if unescaped != tt.unescaped { t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped) } } } func TestUnescapeEscape(t *testing.T) { ss := []string{ ``, `abc def`, `a & b`, `a&b`, `a & b`, `"`, `"`, `"<&>"`, `"<&>"`, `3&5==1 && 0<1, "0<1", a+acute=á`, `The special characters are: <, >, &, ' and "`, } for _, s := range ss { if got := UnescapeString(EscapeString(s)); got != s { t.Errorf("got %q want %q", got, s) } } } var ( benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100) benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100) benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&", 10) benchUnescapeDense = strings.Repeat("&< & <", 100) ) func BenchmarkEscape(b *testing.B) { n := 0 for i := 0; i < b.N; i++ { n += len(EscapeString(benchEscapeData)) } } func BenchmarkEscapeNone(b *testing.B) { n := 0 for i := 0; i < b.N; i++ { n += len(EscapeString(benchEscapeNone)) } } func BenchmarkUnescape(b *testing.B) { s := EscapeString(benchEscapeData) n := 0 for i := 0; i < b.N; i++ { n += len(UnescapeString(s)) } } func BenchmarkUnescapeNone(b *testing.B) { s := EscapeString(benchEscapeNone) n := 0 for i := 0; i < b.N; i++ { n += len(UnescapeString(s)) } } func BenchmarkUnescapeSparse(b *testing.B) { n := 0 for i := 0; i < b.N; i++ { n += len(UnescapeString(benchUnescapeSparse)) } } func BenchmarkUnescapeDense(b *testing.B) { n := 0 for i := 0; i < b.N; i++ { n += len(UnescapeString(benchUnescapeDense)) } }