...
Run Format

Source file src/net/http/sniff.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package http
     6	
     7	import (
     8		"bytes"
     9		"encoding/binary"
    10	)
    11	
    12	// The algorithm uses at most sniffLen bytes to make its decision.
    13	const sniffLen = 512
    14	
    15	// DetectContentType implements the algorithm described
    16	// at http://mimesniff.spec.whatwg.org/ to determine the
    17	// Content-Type of the given data. It considers at most the
    18	// first 512 bytes of data. DetectContentType always returns
    19	// a valid MIME type: if it cannot determine a more specific one, it
    20	// returns "application/octet-stream".
    21	func DetectContentType(data []byte) string {
    22		if len(data) > sniffLen {
    23			data = data[:sniffLen]
    24		}
    25	
    26		// Index of the first non-whitespace byte in data.
    27		firstNonWS := 0
    28		for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ {
    29		}
    30	
    31		for _, sig := range sniffSignatures {
    32			if ct := sig.match(data, firstNonWS); ct != "" {
    33				return ct
    34			}
    35		}
    36	
    37		return "application/octet-stream" // fallback
    38	}
    39	
    40	func isWS(b byte) bool {
    41		switch b {
    42		case '\t', '\n', '\x0c', '\r', ' ':
    43			return true
    44		}
    45		return false
    46	}
    47	
    48	type sniffSig interface {
    49		// match returns the MIME type of the data, or "" if unknown.
    50		match(data []byte, firstNonWS int) string
    51	}
    52	
    53	// Data matching the table in section 6.
    54	var sniffSignatures = []sniffSig{
    55		htmlSig("<!DOCTYPE HTML"),
    56		htmlSig("<HTML"),
    57		htmlSig("<HEAD"),
    58		htmlSig("<SCRIPT"),
    59		htmlSig("<IFRAME"),
    60		htmlSig("<H1"),
    61		htmlSig("<DIV"),
    62		htmlSig("<FONT"),
    63		htmlSig("<TABLE"),
    64		htmlSig("<A"),
    65		htmlSig("<STYLE"),
    66		htmlSig("<TITLE"),
    67		htmlSig("<B"),
    68		htmlSig("<BODY"),
    69		htmlSig("<BR"),
    70		htmlSig("<P"),
    71		htmlSig("<!--"),
    72	
    73		&maskedSig{mask: []byte("\xFF\xFF\xFF\xFF\xFF"), pat: []byte("<?xml"), skipWS: true, ct: "text/xml; charset=utf-8"},
    74	
    75		&exactSig{[]byte("%PDF-"), "application/pdf"},
    76		&exactSig{[]byte("%!PS-Adobe-"), "application/postscript"},
    77	
    78		// UTF BOMs.
    79		&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFE\xFF\x00\x00"), ct: "text/plain; charset=utf-16be"},
    80		&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFF\xFE\x00\x00"), ct: "text/plain; charset=utf-16le"},
    81		&maskedSig{mask: []byte("\xFF\xFF\xFF\x00"), pat: []byte("\xEF\xBB\xBF\x00"), ct: "text/plain; charset=utf-8"},
    82	
    83		&exactSig{[]byte("GIF87a"), "image/gif"},
    84		&exactSig{[]byte("GIF89a"), "image/gif"},
    85		&exactSig{[]byte("\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"), "image/png"},
    86		&exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"},
    87		&exactSig{[]byte("BM"), "image/bmp"},
    88		&maskedSig{
    89			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"),
    90			pat:  []byte("RIFF\x00\x00\x00\x00WEBPVP"),
    91			ct:   "image/webp",
    92		},
    93		&exactSig{[]byte("\x00\x00\x01\x00"), "image/vnd.microsoft.icon"},
    94		&maskedSig{
    95			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
    96			pat:  []byte("RIFF\x00\x00\x00\x00WAVE"),
    97			ct:   "audio/wave",
    98		},
    99		&maskedSig{
   100			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
   101			pat:  []byte("FORM\x00\x00\x00\x00AIFF"),
   102			ct:   "audio/aiff",
   103		},
   104		&maskedSig{
   105			mask: []byte("\xFF\xFF\xFF\xFF"),
   106			pat:  []byte(".snd"),
   107			ct:   "audio/basic",
   108		},
   109		&maskedSig{
   110			mask: []byte("OggS\x00"),
   111			pat:  []byte("\x4F\x67\x67\x53\x00"),
   112			ct:   "application/ogg",
   113		},
   114		&maskedSig{
   115			mask: []byte("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"),
   116			pat:  []byte("MThd\x00\x00\x00\x06"),
   117			ct:   "audio/midi",
   118		},
   119		&maskedSig{
   120			mask: []byte("\xFF\xFF\xFF"),
   121			pat:  []byte("ID3"),
   122			ct:   "audio/mpeg",
   123		},
   124		&maskedSig{
   125			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
   126			pat:  []byte("RIFF\x00\x00\x00\x00AVI "),
   127			ct:   "video/avi",
   128		},
   129		&exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"},
   130		&exactSig{[]byte("\x52\x61\x72\x20\x1A\x07\x00"), "application/x-rar-compressed"},
   131		&exactSig{[]byte("\x50\x4B\x03\x04"), "application/zip"},
   132		&exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"},
   133	
   134		mp4Sig{},
   135	
   136		textSig{}, // should be last
   137	}
   138	
   139	type exactSig struct {
   140		sig []byte
   141		ct  string
   142	}
   143	
   144	func (e *exactSig) match(data []byte, firstNonWS int) string {
   145		if bytes.HasPrefix(data, e.sig) {
   146			return e.ct
   147		}
   148		return ""
   149	}
   150	
   151	type maskedSig struct {
   152		mask, pat []byte
   153		skipWS    bool
   154		ct        string
   155	}
   156	
   157	func (m *maskedSig) match(data []byte, firstNonWS int) string {
   158		// pattern matching algorithm section 6
   159		// https://mimesniff.spec.whatwg.org/#pattern-matching-algorithm
   160	
   161		if m.skipWS {
   162			data = data[firstNonWS:]
   163		}
   164		if len(m.pat) != len(m.mask) {
   165			return ""
   166		}
   167		if len(data) < len(m.mask) {
   168			return ""
   169		}
   170		for i, mask := range m.mask {
   171			db := data[i] & mask
   172			if db != m.pat[i] {
   173				return ""
   174			}
   175		}
   176		return m.ct
   177	}
   178	
   179	type htmlSig []byte
   180	
   181	func (h htmlSig) match(data []byte, firstNonWS int) string {
   182		data = data[firstNonWS:]
   183		if len(data) < len(h)+1 {
   184			return ""
   185		}
   186		for i, b := range h {
   187			db := data[i]
   188			if 'A' <= b && b <= 'Z' {
   189				db &= 0xDF
   190			}
   191			if b != db {
   192				return ""
   193			}
   194		}
   195		// Next byte must be space or right angle bracket.
   196		if db := data[len(h)]; db != ' ' && db != '>' {
   197			return ""
   198		}
   199		return "text/html; charset=utf-8"
   200	}
   201	
   202	var mp4ftype = []byte("ftyp")
   203	var mp4 = []byte("mp4")
   204	
   205	type mp4Sig struct{}
   206	
   207	func (mp4Sig) match(data []byte, firstNonWS int) string {
   208		// https://mimesniff.spec.whatwg.org/#signature-for-mp4
   209		// c.f. section 6.2.1
   210		if len(data) < 12 {
   211			return ""
   212		}
   213		boxSize := int(binary.BigEndian.Uint32(data[:4]))
   214		if boxSize%4 != 0 || len(data) < boxSize {
   215			return ""
   216		}
   217		if !bytes.Equal(data[4:8], mp4ftype) {
   218			return ""
   219		}
   220		for st := 8; st < boxSize; st += 4 {
   221			if st == 12 {
   222				// minor version number
   223				continue
   224			}
   225			if bytes.Equal(data[st:st+3], mp4) {
   226				return "video/mp4"
   227			}
   228		}
   229		return ""
   230	}
   231	
   232	type textSig struct{}
   233	
   234	func (textSig) match(data []byte, firstNonWS int) string {
   235		// c.f. section 5, step 4.
   236		for _, b := range data[firstNonWS:] {
   237			switch {
   238			case b <= 0x08,
   239				b == 0x0B,
   240				0x0E <= b && b <= 0x1A,
   241				0x1C <= b && b <= 0x1F:
   242				return ""
   243			}
   244		}
   245		return "text/plain; charset=utf-8"
   246	}
   247	

View as plain text