The Go Programming Language

Source file src/pkg/strconv/quote.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package strconv
     6	
     7	import (
     8		"bytes"
     9		"os"
    10		"strings"
    11		"unicode"
    12		"utf8"
    13	)
    14	
    15	const lowerhex = "0123456789abcdef"
    16	
    17	func quoteWith(s string, quote byte, ASCIIonly bool) string {
    18		var buf bytes.Buffer
    19		buf.WriteByte(quote)
    20		for width := 0; len(s) > 0; s = s[width:] {
    21			rune := int(s[0])
    22			width = 1
    23			if rune >= utf8.RuneSelf {
    24				rune, width = utf8.DecodeRuneInString(s)
    25			}
    26			if width == 1 && rune == utf8.RuneError {
    27				buf.WriteString(`\x`)
    28				buf.WriteByte(lowerhex[s[0]>>4])
    29				buf.WriteByte(lowerhex[s[0]&0xF])
    30				continue
    31			}
    32			if rune == int(quote) || rune == '\\' { // always backslashed
    33				buf.WriteByte('\\')
    34				buf.WriteByte(byte(rune))
    35				continue
    36			}
    37			if ASCIIonly {
    38				if rune <= unicode.MaxASCII && unicode.IsPrint(rune) {
    39					buf.WriteRune(rune)
    40					continue
    41				}
    42			} else if unicode.IsPrint(rune) {
    43				buf.WriteRune(rune)
    44				continue
    45			}
    46			switch rune {
    47			case '\a':
    48				buf.WriteString(`\a`)
    49			case '\b':
    50				buf.WriteString(`\b`)
    51			case '\f':
    52				buf.WriteString(`\f`)
    53			case '\n':
    54				buf.WriteString(`\n`)
    55			case '\r':
    56				buf.WriteString(`\r`)
    57			case '\t':
    58				buf.WriteString(`\t`)
    59			case '\v':
    60				buf.WriteString(`\v`)
    61			default:
    62				switch {
    63				case rune < ' ':
    64					buf.WriteString(`\x`)
    65					buf.WriteByte(lowerhex[s[0]>>4])
    66					buf.WriteByte(lowerhex[s[0]&0xF])
    67				case rune > unicode.MaxRune:
    68					rune = 0xFFFD
    69					fallthrough
    70				case rune < 0x10000:
    71					buf.WriteString(`\u`)
    72					for s := 12; s >= 0; s -= 4 {
    73						buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
    74					}
    75				default:
    76					buf.WriteString(`\U`)
    77					for s := 28; s >= 0; s -= 4 {
    78						buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
    79					}
    80				}
    81			}
    82		}
    83		buf.WriteByte(quote)
    84		return buf.String()
    85	
    86	}
    87	
    88	// Quote returns a double-quoted Go string literal representing s.  The
    89	// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
    90	// control characters and non-printable characters as defined by
    91	// unicode.IsPrint.
    92	func Quote(s string) string {
    93		return quoteWith(s, '"', false)
    94	}
    95	
    96	// QuoteToASCII returns a double-quoted Go string literal representing s.
    97	// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
    98	// non-ASCII characters and non-printable characters as defined by
    99	// unicode.IsPrint.
   100	func QuoteToASCII(s string) string {
   101		return quoteWith(s, '"', true)
   102	}
   103	
   104	// QuoteRune returns a single-quoted Go character literal representing the
   105	// rune.  The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
   106	// for control characters and non-printable characters as defined by
   107	// unicode.IsPrint.
   108	func QuoteRune(rune int) string {
   109		// TODO: avoid the allocation here.
   110		return quoteWith(string(rune), '\'', false)
   111	}
   112	
   113	// QuoteRuneToASCII returns a single-quoted Go character literal representing
   114	// the rune.  The returned string uses Go escape sequences (\t, \n, \xFF,
   115	// \u0100) for non-ASCII characters and non-printable characters as defined
   116	// by unicode.IsPrint.
   117	func QuoteRuneToASCII(rune int) string {
   118		// TODO: avoid the allocation here.
   119		return quoteWith(string(rune), '\'', true)
   120	}
   121	
   122	// CanBackquote returns whether the string s would be
   123	// a valid Go string literal if enclosed in backquotes.
   124	func CanBackquote(s string) bool {
   125		for i := 0; i < len(s); i++ {
   126			if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
   127				return false
   128			}
   129		}
   130		return true
   131	}
   132	
   133	func unhex(b byte) (v int, ok bool) {
   134		c := int(b)
   135		switch {
   136		case '0' <= c && c <= '9':
   137			return c - '0', true
   138		case 'a' <= c && c <= 'f':
   139			return c - 'a' + 10, true
   140		case 'A' <= c && c <= 'F':
   141			return c - 'A' + 10, true
   142		}
   143		return
   144	}
   145	
   146	// UnquoteChar decodes the first character or byte in the escaped string
   147	// or character literal represented by the string s.
   148	// It returns four values:
   149	//
   150	//	1) value, the decoded Unicode code point or byte value;
   151	//	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
   152	//	3) tail, the remainder of the string after the character; and
   153	//	4) an error that will be nil if the character is syntactically valid.
   154	//
   155	// The second argument, quote, specifies the type of literal being parsed
   156	// and therefore which escaped quote character is permitted.
   157	// If set to a single quote, it permits the sequence \' and disallows unescaped '.
   158	// If set to a double quote, it permits \" and disallows unescaped ".
   159	// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
   160	func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, err os.Error) {
   161		// easy cases
   162		switch c := s[0]; {
   163		case c == quote && (quote == '\'' || quote == '"'):
   164			err = os.EINVAL
   165			return
   166		case c >= utf8.RuneSelf:
   167			r, size := utf8.DecodeRuneInString(s)
   168			return r, true, s[size:], nil
   169		case c != '\\':
   170			return int(s[0]), false, s[1:], nil
   171		}
   172	
   173		// hard case: c is backslash
   174		if len(s) <= 1 {
   175			err = os.EINVAL
   176			return
   177		}
   178		c := s[1]
   179		s = s[2:]
   180	
   181		switch c {
   182		case 'a':
   183			value = '\a'
   184		case 'b':
   185			value = '\b'
   186		case 'f':
   187			value = '\f'
   188		case 'n':
   189			value = '\n'
   190		case 'r':
   191			value = '\r'
   192		case 't':
   193			value = '\t'
   194		case 'v':
   195			value = '\v'
   196		case 'x', 'u', 'U':
   197			n := 0
   198			switch c {
   199			case 'x':
   200				n = 2
   201			case 'u':
   202				n = 4
   203			case 'U':
   204				n = 8
   205			}
   206			v := 0
   207			if len(s) < n {
   208				err = os.EINVAL
   209				return
   210			}
   211			for j := 0; j < n; j++ {
   212				x, ok := unhex(s[j])
   213				if !ok {
   214					err = os.EINVAL
   215					return
   216				}
   217				v = v<<4 | x
   218			}
   219			s = s[n:]
   220			if c == 'x' {
   221				// single-byte string, possibly not UTF-8
   222				value = v
   223				break
   224			}
   225			if v > unicode.MaxRune {
   226				err = os.EINVAL
   227				return
   228			}
   229			value = v
   230			multibyte = true
   231		case '0', '1', '2', '3', '4', '5', '6', '7':
   232			v := int(c) - '0'
   233			if len(s) < 2 {
   234				err = os.EINVAL
   235				return
   236			}
   237			for j := 0; j < 2; j++ { // one digit already; two more
   238				x := int(s[j]) - '0'
   239				if x < 0 || x > 7 {
   240					return
   241				}
   242				v = (v << 3) | x
   243			}
   244			s = s[2:]
   245			if v > 255 {
   246				err = os.EINVAL
   247				return
   248			}
   249			value = v
   250		case '\\':
   251			value = '\\'
   252		case '\'', '"':
   253			if c != quote {
   254				err = os.EINVAL
   255				return
   256			}
   257			value = int(c)
   258		default:
   259			err = os.EINVAL
   260			return
   261		}
   262		tail = s
   263		return
   264	}
   265	
   266	// Unquote interprets s as a single-quoted, double-quoted,
   267	// or backquoted Go string literal, returning the string value
   268	// that s quotes.  (If s is single-quoted, it would be a Go
   269	// character literal; Unquote returns the corresponding
   270	// one-character string.)
   271	func Unquote(s string) (t string, err os.Error) {
   272		n := len(s)
   273		if n < 2 {
   274			return "", os.EINVAL
   275		}
   276		quote := s[0]
   277		if quote != s[n-1] {
   278			return "", os.EINVAL
   279		}
   280		s = s[1 : n-1]
   281	
   282		if quote == '`' {
   283			if strings.Contains(s, "`") {
   284				return "", os.EINVAL
   285			}
   286			return s, nil
   287		}
   288		if quote != '"' && quote != '\'' {
   289			return "", os.EINVAL
   290		}
   291	
   292		var buf bytes.Buffer
   293		for len(s) > 0 {
   294			c, multibyte, ss, err := UnquoteChar(s, quote)
   295			if err != nil {
   296				return "", err
   297			}
   298			s = ss
   299			if c < utf8.RuneSelf || !multibyte {
   300				buf.WriteByte(byte(c))
   301			} else {
   302				buf.WriteString(string(c))
   303			}
   304			if quote == '\'' && len(s) != 0 {
   305				// single-quoted must be single character
   306				return "", os.EINVAL
   307			}
   308		}
   309		return buf.String(), nil
   310	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.