...
Run Format

Source file src/net/http/sniff.go

     1	// Copyright 2011 The Go Authors.  All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package http
     6	
     7	import (
     8		"bytes"
     9		"encoding/binary"
    10	)
    11	
    12	// The algorithm uses at most sniffLen bytes to make its decision.
    13	const sniffLen = 512
    14	
    15	// DetectContentType implements the algorithm described
    16	// at http://mimesniff.spec.whatwg.org/ to determine the
    17	// Content-Type of the given data.  It considers at most the
    18	// first 512 bytes of data.  DetectContentType always returns
    19	// a valid MIME type: if it cannot determine a more specific one, it
    20	// returns "application/octet-stream".
    21	func DetectContentType(data []byte) string {
    22		if len(data) > sniffLen {
    23			data = data[:sniffLen]
    24		}
    25	
    26		// Index of the first non-whitespace byte in data.
    27		firstNonWS := 0
    28		for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ {
    29		}
    30	
    31		for _, sig := range sniffSignatures {
    32			if ct := sig.match(data, firstNonWS); ct != "" {
    33				return ct
    34			}
    35		}
    36	
    37		return "application/octet-stream" // fallback
    38	}
    39	
    40	func isWS(b byte) bool {
    41		switch b {
    42		case '\t', '\n', '\x0c', '\r', ' ':
    43			return true
    44		}
    45		return false
    46	}
    47	
    48	type sniffSig interface {
    49		// match returns the MIME type of the data, or "" if unknown.
    50		match(data []byte, firstNonWS int) string
    51	}
    52	
    53	// Data matching the table in section 6.
    54	var sniffSignatures = []sniffSig{
    55		htmlSig("<!DOCTYPE HTML"),
    56		htmlSig("<HTML"),
    57		htmlSig("<HEAD"),
    58		htmlSig("<SCRIPT"),
    59		htmlSig("<IFRAME"),
    60		htmlSig("<H1"),
    61		htmlSig("<DIV"),
    62		htmlSig("<FONT"),
    63		htmlSig("<TABLE"),
    64		htmlSig("<A"),
    65		htmlSig("<STYLE"),
    66		htmlSig("<TITLE"),
    67		htmlSig("<B"),
    68		htmlSig("<BODY"),
    69		htmlSig("<BR"),
    70		htmlSig("<P"),
    71		htmlSig("<!--"),
    72	
    73		&maskedSig{mask: []byte("\xFF\xFF\xFF\xFF\xFF"), pat: []byte("<?xml"), skipWS: true, ct: "text/xml; charset=utf-8"},
    74	
    75		&exactSig{[]byte("%PDF-"), "application/pdf"},
    76		&exactSig{[]byte("%!PS-Adobe-"), "application/postscript"},
    77	
    78		// UTF BOMs.
    79		&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFE\xFF\x00\x00"), ct: "text/plain; charset=utf-16be"},
    80		&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFF\xFE\x00\x00"), ct: "text/plain; charset=utf-16le"},
    81		&maskedSig{mask: []byte("\xFF\xFF\xFF\x00"), pat: []byte("\xEF\xBB\xBF\x00"), ct: "text/plain; charset=utf-8"},
    82	
    83		&exactSig{[]byte("GIF87a"), "image/gif"},
    84		&exactSig{[]byte("GIF89a"), "image/gif"},
    85		&exactSig{[]byte("\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"), "image/png"},
    86		&exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"},
    87		&exactSig{[]byte("BM"), "image/bmp"},
    88		&maskedSig{
    89			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"),
    90			pat:  []byte("RIFF\x00\x00\x00\x00WEBPVP"),
    91			ct:   "image/webp",
    92		},
    93		&exactSig{[]byte("\x00\x00\x01\x00"), "image/vnd.microsoft.icon"},
    94		&exactSig{[]byte("\x4F\x67\x67\x53\x00"), "application/ogg"},
    95		&maskedSig{
    96			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
    97			pat:  []byte("RIFF\x00\x00\x00\x00WAVE"),
    98			ct:   "audio/wave",
    99		},
   100		&exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"},
   101		&exactSig{[]byte("\x52\x61\x72\x20\x1A\x07\x00"), "application/x-rar-compressed"},
   102		&exactSig{[]byte("\x50\x4B\x03\x04"), "application/zip"},
   103		&exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"},
   104	
   105		// TODO(dsymonds): Re-enable this when the spec is sorted w.r.t. MP4.
   106		//mp4Sig(0),
   107	
   108		textSig(0), // should be last
   109	}
   110	
   111	type exactSig struct {
   112		sig []byte
   113		ct  string
   114	}
   115	
   116	func (e *exactSig) match(data []byte, firstNonWS int) string {
   117		if bytes.HasPrefix(data, e.sig) {
   118			return e.ct
   119		}
   120		return ""
   121	}
   122	
   123	type maskedSig struct {
   124		mask, pat []byte
   125		skipWS    bool
   126		ct        string
   127	}
   128	
   129	func (m *maskedSig) match(data []byte, firstNonWS int) string {
   130		if m.skipWS {
   131			data = data[firstNonWS:]
   132		}
   133		if len(data) < len(m.mask) {
   134			return ""
   135		}
   136		for i, mask := range m.mask {
   137			db := data[i] & mask
   138			if db != m.pat[i] {
   139				return ""
   140			}
   141		}
   142		return m.ct
   143	}
   144	
   145	type htmlSig []byte
   146	
   147	func (h htmlSig) match(data []byte, firstNonWS int) string {
   148		data = data[firstNonWS:]
   149		if len(data) < len(h)+1 {
   150			return ""
   151		}
   152		for i, b := range h {
   153			db := data[i]
   154			if 'A' <= b && b <= 'Z' {
   155				db &= 0xDF
   156			}
   157			if b != db {
   158				return ""
   159			}
   160		}
   161		// Next byte must be space or right angle bracket.
   162		if db := data[len(h)]; db != ' ' && db != '>' {
   163			return ""
   164		}
   165		return "text/html; charset=utf-8"
   166	}
   167	
   168	var mp4ftype = []byte("ftyp")
   169	
   170	type mp4Sig int
   171	
   172	func (mp4Sig) match(data []byte, firstNonWS int) string {
   173		// c.f. section 6.1.
   174		if len(data) < 8 {
   175			return ""
   176		}
   177		boxSize := int(binary.BigEndian.Uint32(data[:4]))
   178		if boxSize%4 != 0 || len(data) < boxSize {
   179			return ""
   180		}
   181		if !bytes.Equal(data[4:8], mp4ftype) {
   182			return ""
   183		}
   184		for st := 8; st < boxSize; st += 4 {
   185			if st == 12 {
   186				// minor version number
   187				continue
   188			}
   189			seg := string(data[st : st+3])
   190			switch seg {
   191			case "mp4", "iso", "M4V", "M4P", "M4B":
   192				return "video/mp4"
   193				/* The remainder are not in the spec.
   194				case "M4A":
   195					return "audio/mp4"
   196				case "3gp":
   197					return "video/3gpp"
   198				case "jp2":
   199					return "image/jp2" // JPEG 2000
   200				*/
   201			}
   202		}
   203		return ""
   204	}
   205	
   206	type textSig int
   207	
   208	func (textSig) match(data []byte, firstNonWS int) string {
   209		// c.f. section 5, step 4.
   210		for _, b := range data[firstNonWS:] {
   211			switch {
   212			case 0x00 <= b && b <= 0x08,
   213				b == 0x0B,
   214				0x0E <= b && b <= 0x1A,
   215				0x1C <= b && b <= 0x1F:
   216				return ""
   217			}
   218		}
   219		return "text/plain; charset=utf-8"
   220	}
   221	

View as plain text