...
Run Format

Source file src/encoding/json/scanner.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package json
     6	
     7	// JSON value parser state machine.
     8	// Just about at the limit of what is reasonable to write by hand.
     9	// Some parts are a bit tedious, but overall it nicely factors out the
    10	// otherwise common code from the multiple scanning functions
    11	// in this package (Compact, Indent, checkValid, nextValue, etc).
    12	//
    13	// This file starts with two simple examples using the scanner
    14	// before diving into the scanner itself.
    15	
    16	import "strconv"
    17	
    18	// checkValid verifies that data is valid JSON-encoded data.
    19	// scan is passed in for use by checkValid to avoid an allocation.
    20	func checkValid(data []byte, scan *scanner) error {
    21		scan.reset()
    22		for _, c := range data {
    23			scan.bytes++
    24			if scan.step(scan, c) == scanError {
    25				return scan.err
    26			}
    27		}
    28		if scan.eof() == scanError {
    29			return scan.err
    30		}
    31		return nil
    32	}
    33	
    34	// nextValue splits data after the next whole JSON value,
    35	// returning that value and the bytes that follow it as separate slices.
    36	// scan is passed in for use by nextValue to avoid an allocation.
    37	func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) {
    38		scan.reset()
    39		for i, c := range data {
    40			v := scan.step(scan, c)
    41			if v >= scanEndObject {
    42				switch v {
    43				// probe the scanner with a space to determine whether we will
    44				// get scanEnd on the next character. Otherwise, if the next character
    45				// is not a space, scanEndTop allocates a needless error.
    46				case scanEndObject, scanEndArray:
    47					if scan.step(scan, ' ') == scanEnd {
    48						return data[:i+1], data[i+1:], nil
    49					}
    50				case scanError:
    51					return nil, nil, scan.err
    52				case scanEnd:
    53					return data[:i], data[i:], nil
    54				}
    55			}
    56		}
    57		if scan.eof() == scanError {
    58			return nil, nil, scan.err
    59		}
    60		return data, nil, nil
    61	}
    62	
    63	// A SyntaxError is a description of a JSON syntax error.
    64	type SyntaxError struct {
    65		msg    string // description of error
    66		Offset int64  // error occurred after reading Offset bytes
    67	}
    68	
    69	func (e *SyntaxError) Error() string { return e.msg }
    70	
    71	// A scanner is a JSON scanning state machine.
    72	// Callers call scan.reset() and then pass bytes in one at a time
    73	// by calling scan.step(&scan, c) for each byte.
    74	// The return value, referred to as an opcode, tells the
    75	// caller about significant parsing events like beginning
    76	// and ending literals, objects, and arrays, so that the
    77	// caller can follow along if it wishes.
    78	// The return value scanEnd indicates that a single top-level
    79	// JSON value has been completed, *before* the byte that
    80	// just got passed in.  (The indication must be delayed in order
    81	// to recognize the end of numbers: is 123 a whole value or
    82	// the beginning of 12345e+6?).
    83	type scanner struct {
    84		// The step is a func to be called to execute the next transition.
    85		// Also tried using an integer constant and a single func
    86		// with a switch, but using the func directly was 10% faster
    87		// on a 64-bit Mac Mini, and it's nicer to read.
    88		step func(*scanner, byte) int
    89	
    90		// Reached end of top-level value.
    91		endTop bool
    92	
    93		// Stack of what we're in the middle of - array values, object keys, object values.
    94		parseState []int
    95	
    96		// Error that happened, if any.
    97		err error
    98	
    99		// 1-byte redo (see undo method)
   100		redo      bool
   101		redoCode  int
   102		redoState func(*scanner, byte) int
   103	
   104		// total bytes consumed, updated by decoder.Decode
   105		bytes int64
   106	}
   107	
   108	// These values are returned by the state transition functions
   109	// assigned to scanner.state and the method scanner.eof.
   110	// They give details about the current state of the scan that
   111	// callers might be interested to know about.
   112	// It is okay to ignore the return value of any particular
   113	// call to scanner.state: if one call returns scanError,
   114	// every subsequent call will return scanError too.
   115	const (
   116		// Continue.
   117		scanContinue     = iota // uninteresting byte
   118		scanBeginLiteral        // end implied by next result != scanContinue
   119		scanBeginObject         // begin object
   120		scanObjectKey           // just finished object key (string)
   121		scanObjectValue         // just finished non-last object value
   122		scanEndObject           // end object (implies scanObjectValue if possible)
   123		scanBeginArray          // begin array
   124		scanArrayValue          // just finished array value
   125		scanEndArray            // end array (implies scanArrayValue if possible)
   126		scanSkipSpace           // space byte; can skip; known to be last "continue" result
   127	
   128		// Stop.
   129		scanEnd   // top-level value ended *before* this byte; known to be first "stop" result
   130		scanError // hit an error, scanner.err.
   131	)
   132	
   133	// These values are stored in the parseState stack.
   134	// They give the current state of a composite value
   135	// being scanned. If the parser is inside a nested value
   136	// the parseState describes the nested state, outermost at entry 0.
   137	const (
   138		parseObjectKey   = iota // parsing object key (before colon)
   139		parseObjectValue        // parsing object value (after colon)
   140		parseArrayValue         // parsing array value
   141	)
   142	
   143	// reset prepares the scanner for use.
   144	// It must be called before calling s.step.
   145	func (s *scanner) reset() {
   146		s.step = stateBeginValue
   147		s.parseState = s.parseState[0:0]
   148		s.err = nil
   149		s.redo = false
   150		s.endTop = false
   151	}
   152	
   153	// eof tells the scanner that the end of input has been reached.
   154	// It returns a scan status just as s.step does.
   155	func (s *scanner) eof() int {
   156		if s.err != nil {
   157			return scanError
   158		}
   159		if s.endTop {
   160			return scanEnd
   161		}
   162		s.step(s, ' ')
   163		if s.endTop {
   164			return scanEnd
   165		}
   166		if s.err == nil {
   167			s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
   168		}
   169		return scanError
   170	}
   171	
   172	// pushParseState pushes a new parse state p onto the parse stack.
   173	func (s *scanner) pushParseState(p int) {
   174		s.parseState = append(s.parseState, p)
   175	}
   176	
   177	// popParseState pops a parse state (already obtained) off the stack
   178	// and updates s.step accordingly.
   179	func (s *scanner) popParseState() {
   180		n := len(s.parseState) - 1
   181		s.parseState = s.parseState[0:n]
   182		s.redo = false
   183		if n == 0 {
   184			s.step = stateEndTop
   185			s.endTop = true
   186		} else {
   187			s.step = stateEndValue
   188		}
   189	}
   190	
   191	func isSpace(c byte) bool {
   192		return c == ' ' || c == '\t' || c == '\r' || c == '\n'
   193	}
   194	
   195	// stateBeginValueOrEmpty is the state after reading `[`.
   196	func stateBeginValueOrEmpty(s *scanner, c byte) int {
   197		if c <= ' ' && isSpace(c) {
   198			return scanSkipSpace
   199		}
   200		if c == ']' {
   201			return stateEndValue(s, c)
   202		}
   203		return stateBeginValue(s, c)
   204	}
   205	
   206	// stateBeginValue is the state at the beginning of the input.
   207	func stateBeginValue(s *scanner, c byte) int {
   208		if c <= ' ' && isSpace(c) {
   209			return scanSkipSpace
   210		}
   211		switch c {
   212		case '{':
   213			s.step = stateBeginStringOrEmpty
   214			s.pushParseState(parseObjectKey)
   215			return scanBeginObject
   216		case '[':
   217			s.step = stateBeginValueOrEmpty
   218			s.pushParseState(parseArrayValue)
   219			return scanBeginArray
   220		case '"':
   221			s.step = stateInString
   222			return scanBeginLiteral
   223		case '-':
   224			s.step = stateNeg
   225			return scanBeginLiteral
   226		case '0': // beginning of 0.123
   227			s.step = state0
   228			return scanBeginLiteral
   229		case 't': // beginning of true
   230			s.step = stateT
   231			return scanBeginLiteral
   232		case 'f': // beginning of false
   233			s.step = stateF
   234			return scanBeginLiteral
   235		case 'n': // beginning of null
   236			s.step = stateN
   237			return scanBeginLiteral
   238		}
   239		if '1' <= c && c <= '9' { // beginning of 1234.5
   240			s.step = state1
   241			return scanBeginLiteral
   242		}
   243		return s.error(c, "looking for beginning of value")
   244	}
   245	
   246	// stateBeginStringOrEmpty is the state after reading `{`.
   247	func stateBeginStringOrEmpty(s *scanner, c byte) int {
   248		if c <= ' ' && isSpace(c) {
   249			return scanSkipSpace
   250		}
   251		if c == '}' {
   252			n := len(s.parseState)
   253			s.parseState[n-1] = parseObjectValue
   254			return stateEndValue(s, c)
   255		}
   256		return stateBeginString(s, c)
   257	}
   258	
   259	// stateBeginString is the state after reading `{"key": value,`.
   260	func stateBeginString(s *scanner, c byte) int {
   261		if c <= ' ' && isSpace(c) {
   262			return scanSkipSpace
   263		}
   264		if c == '"' {
   265			s.step = stateInString
   266			return scanBeginLiteral
   267		}
   268		return s.error(c, "looking for beginning of object key string")
   269	}
   270	
   271	// stateEndValue is the state after completing a value,
   272	// such as after reading `{}` or `true` or `["x"`.
   273	func stateEndValue(s *scanner, c byte) int {
   274		n := len(s.parseState)
   275		if n == 0 {
   276			// Completed top-level before the current byte.
   277			s.step = stateEndTop
   278			s.endTop = true
   279			return stateEndTop(s, c)
   280		}
   281		if c <= ' ' && isSpace(c) {
   282			s.step = stateEndValue
   283			return scanSkipSpace
   284		}
   285		ps := s.parseState[n-1]
   286		switch ps {
   287		case parseObjectKey:
   288			if c == ':' {
   289				s.parseState[n-1] = parseObjectValue
   290				s.step = stateBeginValue
   291				return scanObjectKey
   292			}
   293			return s.error(c, "after object key")
   294		case parseObjectValue:
   295			if c == ',' {
   296				s.parseState[n-1] = parseObjectKey
   297				s.step = stateBeginString
   298				return scanObjectValue
   299			}
   300			if c == '}' {
   301				s.popParseState()
   302				return scanEndObject
   303			}
   304			return s.error(c, "after object key:value pair")
   305		case parseArrayValue:
   306			if c == ',' {
   307				s.step = stateBeginValue
   308				return scanArrayValue
   309			}
   310			if c == ']' {
   311				s.popParseState()
   312				return scanEndArray
   313			}
   314			return s.error(c, "after array element")
   315		}
   316		return s.error(c, "")
   317	}
   318	
   319	// stateEndTop is the state after finishing the top-level value,
   320	// such as after reading `{}` or `[1,2,3]`.
   321	// Only space characters should be seen now.
   322	func stateEndTop(s *scanner, c byte) int {
   323		if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
   324			// Complain about non-space byte on next call.
   325			s.error(c, "after top-level value")
   326		}
   327		return scanEnd
   328	}
   329	
   330	// stateInString is the state after reading `"`.
   331	func stateInString(s *scanner, c byte) int {
   332		if c == '"' {
   333			s.step = stateEndValue
   334			return scanContinue
   335		}
   336		if c == '\\' {
   337			s.step = stateInStringEsc
   338			return scanContinue
   339		}
   340		if c < 0x20 {
   341			return s.error(c, "in string literal")
   342		}
   343		return scanContinue
   344	}
   345	
   346	// stateInStringEsc is the state after reading `"\` during a quoted string.
   347	func stateInStringEsc(s *scanner, c byte) int {
   348		switch c {
   349		case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
   350			s.step = stateInString
   351			return scanContinue
   352		case 'u':
   353			s.step = stateInStringEscU
   354			return scanContinue
   355		}
   356		return s.error(c, "in string escape code")
   357	}
   358	
   359	// stateInStringEscU is the state after reading `"\u` during a quoted string.
   360	func stateInStringEscU(s *scanner, c byte) int {
   361		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   362			s.step = stateInStringEscU1
   363			return scanContinue
   364		}
   365		// numbers
   366		return s.error(c, "in \\u hexadecimal character escape")
   367	}
   368	
   369	// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
   370	func stateInStringEscU1(s *scanner, c byte) int {
   371		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   372			s.step = stateInStringEscU12
   373			return scanContinue
   374		}
   375		// numbers
   376		return s.error(c, "in \\u hexadecimal character escape")
   377	}
   378	
   379	// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
   380	func stateInStringEscU12(s *scanner, c byte) int {
   381		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   382			s.step = stateInStringEscU123
   383			return scanContinue
   384		}
   385		// numbers
   386		return s.error(c, "in \\u hexadecimal character escape")
   387	}
   388	
   389	// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
   390	func stateInStringEscU123(s *scanner, c byte) int {
   391		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   392			s.step = stateInString
   393			return scanContinue
   394		}
   395		// numbers
   396		return s.error(c, "in \\u hexadecimal character escape")
   397	}
   398	
   399	// stateNeg is the state after reading `-` during a number.
   400	func stateNeg(s *scanner, c byte) int {
   401		if c == '0' {
   402			s.step = state0
   403			return scanContinue
   404		}
   405		if '1' <= c && c <= '9' {
   406			s.step = state1
   407			return scanContinue
   408		}
   409		return s.error(c, "in numeric literal")
   410	}
   411	
   412	// state1 is the state after reading a non-zero integer during a number,
   413	// such as after reading `1` or `100` but not `0`.
   414	func state1(s *scanner, c byte) int {
   415		if '0' <= c && c <= '9' {
   416			s.step = state1
   417			return scanContinue
   418		}
   419		return state0(s, c)
   420	}
   421	
   422	// state0 is the state after reading `0` during a number.
   423	func state0(s *scanner, c byte) int {
   424		if c == '.' {
   425			s.step = stateDot
   426			return scanContinue
   427		}
   428		if c == 'e' || c == 'E' {
   429			s.step = stateE
   430			return scanContinue
   431		}
   432		return stateEndValue(s, c)
   433	}
   434	
   435	// stateDot is the state after reading the integer and decimal point in a number,
   436	// such as after reading `1.`.
   437	func stateDot(s *scanner, c byte) int {
   438		if '0' <= c && c <= '9' {
   439			s.step = stateDot0
   440			return scanContinue
   441		}
   442		return s.error(c, "after decimal point in numeric literal")
   443	}
   444	
   445	// stateDot0 is the state after reading the integer, decimal point, and subsequent
   446	// digits of a number, such as after reading `3.14`.
   447	func stateDot0(s *scanner, c byte) int {
   448		if '0' <= c && c <= '9' {
   449			return scanContinue
   450		}
   451		if c == 'e' || c == 'E' {
   452			s.step = stateE
   453			return scanContinue
   454		}
   455		return stateEndValue(s, c)
   456	}
   457	
   458	// stateE is the state after reading the mantissa and e in a number,
   459	// such as after reading `314e` or `0.314e`.
   460	func stateE(s *scanner, c byte) int {
   461		if c == '+' || c == '-' {
   462			s.step = stateESign
   463			return scanContinue
   464		}
   465		return stateESign(s, c)
   466	}
   467	
   468	// stateESign is the state after reading the mantissa, e, and sign in a number,
   469	// such as after reading `314e-` or `0.314e+`.
   470	func stateESign(s *scanner, c byte) int {
   471		if '0' <= c && c <= '9' {
   472			s.step = stateE0
   473			return scanContinue
   474		}
   475		return s.error(c, "in exponent of numeric literal")
   476	}
   477	
   478	// stateE0 is the state after reading the mantissa, e, optional sign,
   479	// and at least one digit of the exponent in a number,
   480	// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
   481	func stateE0(s *scanner, c byte) int {
   482		if '0' <= c && c <= '9' {
   483			return scanContinue
   484		}
   485		return stateEndValue(s, c)
   486	}
   487	
   488	// stateT is the state after reading `t`.
   489	func stateT(s *scanner, c byte) int {
   490		if c == 'r' {
   491			s.step = stateTr
   492			return scanContinue
   493		}
   494		return s.error(c, "in literal true (expecting 'r')")
   495	}
   496	
   497	// stateTr is the state after reading `tr`.
   498	func stateTr(s *scanner, c byte) int {
   499		if c == 'u' {
   500			s.step = stateTru
   501			return scanContinue
   502		}
   503		return s.error(c, "in literal true (expecting 'u')")
   504	}
   505	
   506	// stateTru is the state after reading `tru`.
   507	func stateTru(s *scanner, c byte) int {
   508		if c == 'e' {
   509			s.step = stateEndValue
   510			return scanContinue
   511		}
   512		return s.error(c, "in literal true (expecting 'e')")
   513	}
   514	
   515	// stateF is the state after reading `f`.
   516	func stateF(s *scanner, c byte) int {
   517		if c == 'a' {
   518			s.step = stateFa
   519			return scanContinue
   520		}
   521		return s.error(c, "in literal false (expecting 'a')")
   522	}
   523	
   524	// stateFa is the state after reading `fa`.
   525	func stateFa(s *scanner, c byte) int {
   526		if c == 'l' {
   527			s.step = stateFal
   528			return scanContinue
   529		}
   530		return s.error(c, "in literal false (expecting 'l')")
   531	}
   532	
   533	// stateFal is the state after reading `fal`.
   534	func stateFal(s *scanner, c byte) int {
   535		if c == 's' {
   536			s.step = stateFals
   537			return scanContinue
   538		}
   539		return s.error(c, "in literal false (expecting 's')")
   540	}
   541	
   542	// stateFals is the state after reading `fals`.
   543	func stateFals(s *scanner, c byte) int {
   544		if c == 'e' {
   545			s.step = stateEndValue
   546			return scanContinue
   547		}
   548		return s.error(c, "in literal false (expecting 'e')")
   549	}
   550	
   551	// stateN is the state after reading `n`.
   552	func stateN(s *scanner, c byte) int {
   553		if c == 'u' {
   554			s.step = stateNu
   555			return scanContinue
   556		}
   557		return s.error(c, "in literal null (expecting 'u')")
   558	}
   559	
   560	// stateNu is the state after reading `nu`.
   561	func stateNu(s *scanner, c byte) int {
   562		if c == 'l' {
   563			s.step = stateNul
   564			return scanContinue
   565		}
   566		return s.error(c, "in literal null (expecting 'l')")
   567	}
   568	
   569	// stateNul is the state after reading `nul`.
   570	func stateNul(s *scanner, c byte) int {
   571		if c == 'l' {
   572			s.step = stateEndValue
   573			return scanContinue
   574		}
   575		return s.error(c, "in literal null (expecting 'l')")
   576	}
   577	
   578	// stateError is the state after reaching a syntax error,
   579	// such as after reading `[1}` or `5.1.2`.
   580	func stateError(s *scanner, c byte) int {
   581		return scanError
   582	}
   583	
   584	// error records an error and switches to the error state.
   585	func (s *scanner) error(c byte, context string) int {
   586		s.step = stateError
   587		s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
   588		return scanError
   589	}
   590	
   591	// quoteChar formats c as a quoted character literal
   592	func quoteChar(c byte) string {
   593		// special cases - different from quoted strings
   594		if c == '\'' {
   595			return `'\''`
   596		}
   597		if c == '"' {
   598			return `'"'`
   599		}
   600	
   601		// use quoted string with different quotation marks
   602		s := strconv.Quote(string(c))
   603		return "'" + s[1:len(s)-1] + "'"
   604	}
   605	
   606	// undo causes the scanner to return scanCode from the next state transition.
   607	// This gives callers a simple 1-byte undo mechanism.
   608	func (s *scanner) undo(scanCode int) {
   609		if s.redo {
   610			panic("json: invalid use of scanner")
   611		}
   612		s.redoCode = scanCode
   613		s.redoState = s.step
   614		s.step = stateRedo
   615		s.redo = true
   616	}
   617	
   618	// stateRedo helps implement the scanner's 1-byte undo.
   619	func stateRedo(s *scanner, c byte) int {
   620		s.redo = false
   621		s.step = s.redoState
   622		return s.redoCode
   623	}
   624	

View as plain text