...
Run Format

Source file src/mime/mediatype.go

Documentation: mime

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"sort"
    11  	"strings"
    12  	"unicode"
    13  )
    14  
    15  // FormatMediaType serializes mediatype t and the parameters
    16  // param as a media type conforming to RFC 2045 and RFC 2616.
    17  // The type and parameter names are written in lower-case.
    18  // When any of the arguments result in a standard violation then
    19  // FormatMediaType returns the empty string.
    20  func FormatMediaType(t string, param map[string]string) string {
    21  	var b strings.Builder
    22  	if slash := strings.Index(t, "/"); slash == -1 {
    23  		if !isToken(t) {
    24  			return ""
    25  		}
    26  		b.WriteString(strings.ToLower(t))
    27  	} else {
    28  		major, sub := t[:slash], t[slash+1:]
    29  		if !isToken(major) || !isToken(sub) {
    30  			return ""
    31  		}
    32  		b.WriteString(strings.ToLower(major))
    33  		b.WriteByte('/')
    34  		b.WriteString(strings.ToLower(sub))
    35  	}
    36  
    37  	attrs := make([]string, 0, len(param))
    38  	for a := range param {
    39  		attrs = append(attrs, a)
    40  	}
    41  	sort.Strings(attrs)
    42  
    43  	for _, attribute := range attrs {
    44  		value := param[attribute]
    45  		b.WriteByte(';')
    46  		b.WriteByte(' ')
    47  		if !isToken(attribute) {
    48  			return ""
    49  		}
    50  		b.WriteString(strings.ToLower(attribute))
    51  		b.WriteByte('=')
    52  		if isToken(value) {
    53  			b.WriteString(value)
    54  			continue
    55  		}
    56  
    57  		b.WriteByte('"')
    58  		offset := 0
    59  		for index, character := range value {
    60  			if character == '"' || character == '\\' {
    61  				b.WriteString(value[offset:index])
    62  				offset = index
    63  				b.WriteByte('\\')
    64  			}
    65  			if character&0x80 != 0 {
    66  				return ""
    67  			}
    68  		}
    69  		b.WriteString(value[offset:])
    70  		b.WriteByte('"')
    71  	}
    72  	return b.String()
    73  }
    74  
    75  func checkMediaTypeDisposition(s string) error {
    76  	typ, rest := consumeToken(s)
    77  	if typ == "" {
    78  		return errors.New("mime: no media type")
    79  	}
    80  	if rest == "" {
    81  		return nil
    82  	}
    83  	if !strings.HasPrefix(rest, "/") {
    84  		return errors.New("mime: expected slash after first token")
    85  	}
    86  	subtype, rest := consumeToken(rest[1:])
    87  	if subtype == "" {
    88  		return errors.New("mime: expected token after slash")
    89  	}
    90  	if rest != "" {
    91  		return errors.New("mime: unexpected content after media subtype")
    92  	}
    93  	return nil
    94  }
    95  
    96  // ErrInvalidMediaParameter is returned by ParseMediaType if
    97  // the media type value was found but there was an error parsing
    98  // the optional parameters
    99  var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
   100  
   101  // ParseMediaType parses a media type value and any optional
   102  // parameters, per RFC 1521.  Media types are the values in
   103  // Content-Type and Content-Disposition headers (RFC 2183).
   104  // On success, ParseMediaType returns the media type converted
   105  // to lowercase and trimmed of white space and a non-nil map.
   106  // If there is an error parsing the optional parameter,
   107  // the media type will be returned along with the error
   108  // ErrInvalidMediaParameter.
   109  // The returned map, params, maps from the lowercase
   110  // attribute to the attribute value with its case preserved.
   111  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   112  	i := strings.Index(v, ";")
   113  	if i == -1 {
   114  		i = len(v)
   115  	}
   116  	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
   117  
   118  	err = checkMediaTypeDisposition(mediatype)
   119  	if err != nil {
   120  		return "", nil, err
   121  	}
   122  
   123  	params = make(map[string]string)
   124  
   125  	// Map of base parameter name -> parameter name -> value
   126  	// for parameters containing a '*' character.
   127  	// Lazily initialized.
   128  	var continuation map[string]map[string]string
   129  
   130  	v = v[i:]
   131  	for len(v) > 0 {
   132  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   133  		if len(v) == 0 {
   134  			break
   135  		}
   136  		key, value, rest := consumeMediaParam(v)
   137  		if key == "" {
   138  			if strings.TrimSpace(rest) == ";" {
   139  				// Ignore trailing semicolons.
   140  				// Not an error.
   141  				return
   142  			}
   143  			// Parse error.
   144  			return mediatype, nil, ErrInvalidMediaParameter
   145  		}
   146  
   147  		pmap := params
   148  		if idx := strings.Index(key, "*"); idx != -1 {
   149  			baseName := key[:idx]
   150  			if continuation == nil {
   151  				continuation = make(map[string]map[string]string)
   152  			}
   153  			var ok bool
   154  			if pmap, ok = continuation[baseName]; !ok {
   155  				continuation[baseName] = make(map[string]string)
   156  				pmap = continuation[baseName]
   157  			}
   158  		}
   159  		if _, exists := pmap[key]; exists {
   160  			// Duplicate parameter name is bogus.
   161  			return "", nil, errors.New("mime: duplicate parameter name")
   162  		}
   163  		pmap[key] = value
   164  		v = rest
   165  	}
   166  
   167  	// Stitch together any continuations or things with stars
   168  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   169  	var buf strings.Builder
   170  	for key, pieceMap := range continuation {
   171  		singlePartKey := key + "*"
   172  		if v, ok := pieceMap[singlePartKey]; ok {
   173  			if decv, ok := decode2231Enc(v); ok {
   174  				params[key] = decv
   175  			}
   176  			continue
   177  		}
   178  
   179  		buf.Reset()
   180  		valid := false
   181  		for n := 0; ; n++ {
   182  			simplePart := fmt.Sprintf("%s*%d", key, n)
   183  			if v, ok := pieceMap[simplePart]; ok {
   184  				valid = true
   185  				buf.WriteString(v)
   186  				continue
   187  			}
   188  			encodedPart := simplePart + "*"
   189  			v, ok := pieceMap[encodedPart]
   190  			if !ok {
   191  				break
   192  			}
   193  			valid = true
   194  			if n == 0 {
   195  				if decv, ok := decode2231Enc(v); ok {
   196  					buf.WriteString(decv)
   197  				}
   198  			} else {
   199  				decv, _ := percentHexUnescape(v)
   200  				buf.WriteString(decv)
   201  			}
   202  		}
   203  		if valid {
   204  			params[key] = buf.String()
   205  		}
   206  	}
   207  
   208  	return
   209  }
   210  
   211  func decode2231Enc(v string) (string, bool) {
   212  	sv := strings.SplitN(v, "'", 3)
   213  	if len(sv) != 3 {
   214  		return "", false
   215  	}
   216  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   217  	// need to decide how to expose it in the API. But I'm not sure
   218  	// anybody uses it in practice.
   219  	charset := strings.ToLower(sv[0])
   220  	if len(charset) == 0 {
   221  		return "", false
   222  	}
   223  	if charset != "us-ascii" && charset != "utf-8" {
   224  		// TODO: unsupported encoding
   225  		return "", false
   226  	}
   227  	encv, err := percentHexUnescape(sv[2])
   228  	if err != nil {
   229  		return "", false
   230  	}
   231  	return encv, true
   232  }
   233  
   234  func isNotTokenChar(r rune) bool {
   235  	return !isTokenChar(r)
   236  }
   237  
   238  // consumeToken consumes a token from the beginning of provided
   239  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   240  // the token consumed and the rest of the string. Returns ("", v) on
   241  // failure to consume at least one character.
   242  func consumeToken(v string) (token, rest string) {
   243  	notPos := strings.IndexFunc(v, isNotTokenChar)
   244  	if notPos == -1 {
   245  		return v, ""
   246  	}
   247  	if notPos == 0 {
   248  		return "", v
   249  	}
   250  	return v[0:notPos], v[notPos:]
   251  }
   252  
   253  // consumeValue consumes a "value" per RFC 2045, where a value is
   254  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   255  // returns the value consumed (and de-quoted/escaped, if a
   256  // quoted-string) and the rest of the string. On failure, returns
   257  // ("", v).
   258  func consumeValue(v string) (value, rest string) {
   259  	if v == "" {
   260  		return
   261  	}
   262  	if v[0] != '"' {
   263  		return consumeToken(v)
   264  	}
   265  
   266  	// parse a quoted-string
   267  	buffer := new(strings.Builder)
   268  	for i := 1; i < len(v); i++ {
   269  		r := v[i]
   270  		if r == '"' {
   271  			return buffer.String(), v[i+1:]
   272  		}
   273  		// When MSIE sends a full file path (in "intranet mode"), it does not
   274  		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
   275  		//
   276  		// No known MIME generators emit unnecessary backslash escapes
   277  		// for simple token characters like numbers and letters.
   278  		//
   279  		// If we see an unnecessary backslash escape, assume it is from MSIE
   280  		// and intended as a literal backslash. This makes Go servers deal better
   281  		// with MSIE without affecting the way they handle conforming MIME
   282  		// generators.
   283  		if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
   284  			buffer.WriteByte(v[i+1])
   285  			i++
   286  			continue
   287  		}
   288  		if r == '\r' || r == '\n' {
   289  			return "", v
   290  		}
   291  		buffer.WriteByte(v[i])
   292  	}
   293  	// Did not find end quote.
   294  	return "", v
   295  }
   296  
   297  func consumeMediaParam(v string) (param, value, rest string) {
   298  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   299  	if !strings.HasPrefix(rest, ";") {
   300  		return "", "", v
   301  	}
   302  
   303  	rest = rest[1:] // consume semicolon
   304  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   305  	param, rest = consumeToken(rest)
   306  	param = strings.ToLower(param)
   307  	if param == "" {
   308  		return "", "", v
   309  	}
   310  
   311  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   312  	if !strings.HasPrefix(rest, "=") {
   313  		return "", "", v
   314  	}
   315  	rest = rest[1:] // consume equals sign
   316  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   317  	value, rest2 := consumeValue(rest)
   318  	if value == "" && rest2 == rest {
   319  		return "", "", v
   320  	}
   321  	rest = rest2
   322  	return param, value, rest
   323  }
   324  
   325  func percentHexUnescape(s string) (string, error) {
   326  	// Count %, check that they're well-formed.
   327  	percents := 0
   328  	for i := 0; i < len(s); {
   329  		if s[i] != '%' {
   330  			i++
   331  			continue
   332  		}
   333  		percents++
   334  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   335  			s = s[i:]
   336  			if len(s) > 3 {
   337  				s = s[0:3]
   338  			}
   339  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   340  		}
   341  		i += 3
   342  	}
   343  	if percents == 0 {
   344  		return s, nil
   345  	}
   346  
   347  	t := make([]byte, len(s)-2*percents)
   348  	j := 0
   349  	for i := 0; i < len(s); {
   350  		switch s[i] {
   351  		case '%':
   352  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   353  			j++
   354  			i += 3
   355  		default:
   356  			t[j] = s[i]
   357  			j++
   358  			i++
   359  		}
   360  	}
   361  	return string(t), nil
   362  }
   363  
   364  func ishex(c byte) bool {
   365  	switch {
   366  	case '0' <= c && c <= '9':
   367  		return true
   368  	case 'a' <= c && c <= 'f':
   369  		return true
   370  	case 'A' <= c && c <= 'F':
   371  		return true
   372  	}
   373  	return false
   374  }
   375  
   376  func unhex(c byte) byte {
   377  	switch {
   378  	case '0' <= c && c <= '9':
   379  		return c - '0'
   380  	case 'a' <= c && c <= 'f':
   381  		return c - 'a' + 10
   382  	case 'A' <= c && c <= 'F':
   383  		return c - 'A' + 10
   384  	}
   385  	return 0
   386  }
   387  

View as plain text