...
Run Format

Source file src/mime/mediatype.go

Documentation: mime

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"sort"
    12  	"strings"
    13  	"unicode"
    14  )
    15  
    16  // FormatMediaType serializes mediatype t and the parameters
    17  // param as a media type conforming to RFC 2045 and RFC 2616.
    18  // The type and parameter names are written in lower-case.
    19  // When any of the arguments result in a standard violation then
    20  // FormatMediaType returns the empty string.
    21  func FormatMediaType(t string, param map[string]string) string {
    22  	var b bytes.Buffer
    23  	if slash := strings.Index(t, "/"); slash == -1 {
    24  		if !isToken(t) {
    25  			return ""
    26  		}
    27  		b.WriteString(strings.ToLower(t))
    28  	} else {
    29  		major, sub := t[:slash], t[slash+1:]
    30  		if !isToken(major) || !isToken(sub) {
    31  			return ""
    32  		}
    33  		b.WriteString(strings.ToLower(major))
    34  		b.WriteByte('/')
    35  		b.WriteString(strings.ToLower(sub))
    36  	}
    37  
    38  	attrs := make([]string, 0, len(param))
    39  	for a := range param {
    40  		attrs = append(attrs, a)
    41  	}
    42  	sort.Strings(attrs)
    43  
    44  	for _, attribute := range attrs {
    45  		value := param[attribute]
    46  		b.WriteByte(';')
    47  		b.WriteByte(' ')
    48  		if !isToken(attribute) {
    49  			return ""
    50  		}
    51  		b.WriteString(strings.ToLower(attribute))
    52  		b.WriteByte('=')
    53  		if isToken(value) {
    54  			b.WriteString(value)
    55  			continue
    56  		}
    57  
    58  		b.WriteByte('"')
    59  		offset := 0
    60  		for index, character := range value {
    61  			if character == '"' || character == '\\' {
    62  				b.WriteString(value[offset:index])
    63  				offset = index
    64  				b.WriteByte('\\')
    65  			}
    66  			if character&0x80 != 0 {
    67  				return ""
    68  			}
    69  		}
    70  		b.WriteString(value[offset:])
    71  		b.WriteByte('"')
    72  	}
    73  	return b.String()
    74  }
    75  
    76  func checkMediaTypeDisposition(s string) error {
    77  	typ, rest := consumeToken(s)
    78  	if typ == "" {
    79  		return errors.New("mime: no media type")
    80  	}
    81  	if rest == "" {
    82  		return nil
    83  	}
    84  	if !strings.HasPrefix(rest, "/") {
    85  		return errors.New("mime: expected slash after first token")
    86  	}
    87  	subtype, rest := consumeToken(rest[1:])
    88  	if subtype == "" {
    89  		return errors.New("mime: expected token after slash")
    90  	}
    91  	if rest != "" {
    92  		return errors.New("mime: unexpected content after media subtype")
    93  	}
    94  	return nil
    95  }
    96  
    97  // ErrInvalidMediaParameter is returned by ParseMediaType if
    98  // the media type value was found but there was an error parsing
    99  // the optional parameters
   100  var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
   101  
   102  // ParseMediaType parses a media type value and any optional
   103  // parameters, per RFC 1521.  Media types are the values in
   104  // Content-Type and Content-Disposition headers (RFC 2183).
   105  // On success, ParseMediaType returns the media type converted
   106  // to lowercase and trimmed of white space and a non-nil map.
   107  // If there is an error parsing the optional parameter,
   108  // the media type will be returned along with the error
   109  // ErrInvalidMediaParameter.
   110  // The returned map, params, maps from the lowercase
   111  // attribute to the attribute value with its case preserved.
   112  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   113  	i := strings.Index(v, ";")
   114  	if i == -1 {
   115  		i = len(v)
   116  	}
   117  	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
   118  
   119  	err = checkMediaTypeDisposition(mediatype)
   120  	if err != nil {
   121  		return "", nil, err
   122  	}
   123  
   124  	params = make(map[string]string)
   125  
   126  	// Map of base parameter name -> parameter name -> value
   127  	// for parameters containing a '*' character.
   128  	// Lazily initialized.
   129  	var continuation map[string]map[string]string
   130  
   131  	v = v[i:]
   132  	for len(v) > 0 {
   133  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   134  		if len(v) == 0 {
   135  			break
   136  		}
   137  		key, value, rest := consumeMediaParam(v)
   138  		if key == "" {
   139  			if strings.TrimSpace(rest) == ";" {
   140  				// Ignore trailing semicolons.
   141  				// Not an error.
   142  				return
   143  			}
   144  			// Parse error.
   145  			return mediatype, nil, ErrInvalidMediaParameter
   146  		}
   147  
   148  		pmap := params
   149  		if idx := strings.Index(key, "*"); idx != -1 {
   150  			baseName := key[:idx]
   151  			if continuation == nil {
   152  				continuation = make(map[string]map[string]string)
   153  			}
   154  			var ok bool
   155  			if pmap, ok = continuation[baseName]; !ok {
   156  				continuation[baseName] = make(map[string]string)
   157  				pmap = continuation[baseName]
   158  			}
   159  		}
   160  		if _, exists := pmap[key]; exists {
   161  			// Duplicate parameter name is bogus.
   162  			return "", nil, errors.New("mime: duplicate parameter name")
   163  		}
   164  		pmap[key] = value
   165  		v = rest
   166  	}
   167  
   168  	// Stitch together any continuations or things with stars
   169  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   170  	var buf bytes.Buffer
   171  	for key, pieceMap := range continuation {
   172  		singlePartKey := key + "*"
   173  		if v, ok := pieceMap[singlePartKey]; ok {
   174  			if decv, ok := decode2231Enc(v); ok {
   175  				params[key] = decv
   176  			}
   177  			continue
   178  		}
   179  
   180  		buf.Reset()
   181  		valid := false
   182  		for n := 0; ; n++ {
   183  			simplePart := fmt.Sprintf("%s*%d", key, n)
   184  			if v, ok := pieceMap[simplePart]; ok {
   185  				valid = true
   186  				buf.WriteString(v)
   187  				continue
   188  			}
   189  			encodedPart := simplePart + "*"
   190  			v, ok := pieceMap[encodedPart]
   191  			if !ok {
   192  				break
   193  			}
   194  			valid = true
   195  			if n == 0 {
   196  				if decv, ok := decode2231Enc(v); ok {
   197  					buf.WriteString(decv)
   198  				}
   199  			} else {
   200  				decv, _ := percentHexUnescape(v)
   201  				buf.WriteString(decv)
   202  			}
   203  		}
   204  		if valid {
   205  			params[key] = buf.String()
   206  		}
   207  	}
   208  
   209  	return
   210  }
   211  
   212  func decode2231Enc(v string) (string, bool) {
   213  	sv := strings.SplitN(v, "'", 3)
   214  	if len(sv) != 3 {
   215  		return "", false
   216  	}
   217  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   218  	// need to decide how to expose it in the API. But I'm not sure
   219  	// anybody uses it in practice.
   220  	charset := strings.ToLower(sv[0])
   221  	if len(charset) == 0 {
   222  		return "", false
   223  	}
   224  	if charset != "us-ascii" && charset != "utf-8" {
   225  		// TODO: unsupported encoding
   226  		return "", false
   227  	}
   228  	encv, err := percentHexUnescape(sv[2])
   229  	if err != nil {
   230  		return "", false
   231  	}
   232  	return encv, true
   233  }
   234  
   235  func isNotTokenChar(r rune) bool {
   236  	return !isTokenChar(r)
   237  }
   238  
   239  // consumeToken consumes a token from the beginning of provided
   240  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   241  // the token consumed and the rest of the string. Returns ("", v) on
   242  // failure to consume at least one character.
   243  func consumeToken(v string) (token, rest string) {
   244  	notPos := strings.IndexFunc(v, isNotTokenChar)
   245  	if notPos == -1 {
   246  		return v, ""
   247  	}
   248  	if notPos == 0 {
   249  		return "", v
   250  	}
   251  	return v[0:notPos], v[notPos:]
   252  }
   253  
   254  // consumeValue consumes a "value" per RFC 2045, where a value is
   255  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   256  // returns the value consumed (and de-quoted/escaped, if a
   257  // quoted-string) and the rest of the string. On failure, returns
   258  // ("", v).
   259  func consumeValue(v string) (value, rest string) {
   260  	if v == "" {
   261  		return
   262  	}
   263  	if v[0] != '"' {
   264  		return consumeToken(v)
   265  	}
   266  
   267  	// parse a quoted-string
   268  	buffer := new(bytes.Buffer)
   269  	for i := 1; i < len(v); i++ {
   270  		r := v[i]
   271  		if r == '"' {
   272  			return buffer.String(), v[i+1:]
   273  		}
   274  		// When MSIE sends a full file path (in "intranet mode"), it does not
   275  		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
   276  		//
   277  		// No known MIME generators emit unnecessary backslash escapes
   278  		// for simple token characters like numbers and letters.
   279  		//
   280  		// If we see an unnecessary backslash escape, assume it is from MSIE
   281  		// and intended as a literal backslash. This makes Go servers deal better
   282  		// with MSIE without affecting the way they handle conforming MIME
   283  		// generators.
   284  		if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
   285  			buffer.WriteByte(v[i+1])
   286  			i++
   287  			continue
   288  		}
   289  		if r == '\r' || r == '\n' {
   290  			return "", v
   291  		}
   292  		buffer.WriteByte(v[i])
   293  	}
   294  	// Did not find end quote.
   295  	return "", v
   296  }
   297  
   298  func consumeMediaParam(v string) (param, value, rest string) {
   299  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   300  	if !strings.HasPrefix(rest, ";") {
   301  		return "", "", v
   302  	}
   303  
   304  	rest = rest[1:] // consume semicolon
   305  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   306  	param, rest = consumeToken(rest)
   307  	param = strings.ToLower(param)
   308  	if param == "" {
   309  		return "", "", v
   310  	}
   311  
   312  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   313  	if !strings.HasPrefix(rest, "=") {
   314  		return "", "", v
   315  	}
   316  	rest = rest[1:] // consume equals sign
   317  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   318  	value, rest2 := consumeValue(rest)
   319  	if value == "" && rest2 == rest {
   320  		return "", "", v
   321  	}
   322  	rest = rest2
   323  	return param, value, rest
   324  }
   325  
   326  func percentHexUnescape(s string) (string, error) {
   327  	// Count %, check that they're well-formed.
   328  	percents := 0
   329  	for i := 0; i < len(s); {
   330  		if s[i] != '%' {
   331  			i++
   332  			continue
   333  		}
   334  		percents++
   335  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   336  			s = s[i:]
   337  			if len(s) > 3 {
   338  				s = s[0:3]
   339  			}
   340  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   341  		}
   342  		i += 3
   343  	}
   344  	if percents == 0 {
   345  		return s, nil
   346  	}
   347  
   348  	t := make([]byte, len(s)-2*percents)
   349  	j := 0
   350  	for i := 0; i < len(s); {
   351  		switch s[i] {
   352  		case '%':
   353  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   354  			j++
   355  			i += 3
   356  		default:
   357  			t[j] = s[i]
   358  			j++
   359  			i++
   360  		}
   361  	}
   362  	return string(t), nil
   363  }
   364  
   365  func ishex(c byte) bool {
   366  	switch {
   367  	case '0' <= c && c <= '9':
   368  		return true
   369  	case 'a' <= c && c <= 'f':
   370  		return true
   371  	case 'A' <= c && c <= 'F':
   372  		return true
   373  	}
   374  	return false
   375  }
   376  
   377  func unhex(c byte) byte {
   378  	switch {
   379  	case '0' <= c && c <= '9':
   380  		return c - '0'
   381  	case 'a' <= c && c <= 'f':
   382  		return c - 'a' + 10
   383  	case 'A' <= c && c <= 'F':
   384  		return c - 'A' + 10
   385  	}
   386  	return 0
   387  }
   388  

View as plain text