...
Run Format

Source file src/encoding/json/stream.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package json
     6	
     7	import (
     8		"bytes"
     9		"errors"
    10		"io"
    11	)
    12	
    13	// A Decoder reads and decodes JSON values from an input stream.
    14	type Decoder struct {
    15		r     io.Reader
    16		buf   []byte
    17		d     decodeState
    18		scanp int // start of unread data in buf
    19		scan  scanner
    20		err   error
    21	
    22		tokenState int
    23		tokenStack []int
    24	}
    25	
    26	// NewDecoder returns a new decoder that reads from r.
    27	//
    28	// The decoder introduces its own buffering and may
    29	// read data from r beyond the JSON values requested.
    30	func NewDecoder(r io.Reader) *Decoder {
    31		return &Decoder{r: r}
    32	}
    33	
    34	// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    35	// Number instead of as a float64.
    36	func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    37	
    38	// Decode reads the next JSON-encoded value from its
    39	// input and stores it in the value pointed to by v.
    40	//
    41	// See the documentation for Unmarshal for details about
    42	// the conversion of JSON into a Go value.
    43	func (dec *Decoder) Decode(v interface{}) error {
    44		if dec.err != nil {
    45			return dec.err
    46		}
    47	
    48		if err := dec.tokenPrepareForDecode(); err != nil {
    49			return err
    50		}
    51	
    52		if !dec.tokenValueAllowed() {
    53			return &SyntaxError{msg: "not at beginning of value"}
    54		}
    55	
    56		// Read whole value into buffer.
    57		n, err := dec.readValue()
    58		if err != nil {
    59			return err
    60		}
    61		dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    62		dec.scanp += n
    63	
    64		// Don't save err from unmarshal into dec.err:
    65		// the connection is still usable since we read a complete JSON
    66		// object from it before the error happened.
    67		err = dec.d.unmarshal(v)
    68	
    69		// fixup token streaming state
    70		dec.tokenValueEnd()
    71	
    72		return err
    73	}
    74	
    75	// Buffered returns a reader of the data remaining in the Decoder's
    76	// buffer. The reader is valid until the next call to Decode.
    77	func (dec *Decoder) Buffered() io.Reader {
    78		return bytes.NewReader(dec.buf[dec.scanp:])
    79	}
    80	
    81	// readValue reads a JSON value into dec.buf.
    82	// It returns the length of the encoding.
    83	func (dec *Decoder) readValue() (int, error) {
    84		dec.scan.reset()
    85	
    86		scanp := dec.scanp
    87		var err error
    88	Input:
    89		for {
    90			// Look in the buffer for a new value.
    91			for i, c := range dec.buf[scanp:] {
    92				dec.scan.bytes++
    93				v := dec.scan.step(&dec.scan, c)
    94				if v == scanEnd {
    95					scanp += i
    96					break Input
    97				}
    98				// scanEnd is delayed one byte.
    99				// We might block trying to get that byte from src,
   100				// so instead invent a space byte.
   101				if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   102					scanp += i + 1
   103					break Input
   104				}
   105				if v == scanError {
   106					dec.err = dec.scan.err
   107					return 0, dec.scan.err
   108				}
   109			}
   110			scanp = len(dec.buf)
   111	
   112			// Did the last read have an error?
   113			// Delayed until now to allow buffer scan.
   114			if err != nil {
   115				if err == io.EOF {
   116					if dec.scan.step(&dec.scan, ' ') == scanEnd {
   117						break Input
   118					}
   119					if nonSpace(dec.buf) {
   120						err = io.ErrUnexpectedEOF
   121					}
   122				}
   123				dec.err = err
   124				return 0, err
   125			}
   126	
   127			n := scanp - dec.scanp
   128			err = dec.refill()
   129			scanp = dec.scanp + n
   130		}
   131		return scanp - dec.scanp, nil
   132	}
   133	
   134	func (dec *Decoder) refill() error {
   135		// Make room to read more into the buffer.
   136		// First slide down data already consumed.
   137		if dec.scanp > 0 {
   138			n := copy(dec.buf, dec.buf[dec.scanp:])
   139			dec.buf = dec.buf[:n]
   140			dec.scanp = 0
   141		}
   142	
   143		// Grow buffer if not large enough.
   144		const minRead = 512
   145		if cap(dec.buf)-len(dec.buf) < minRead {
   146			newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   147			copy(newBuf, dec.buf)
   148			dec.buf = newBuf
   149		}
   150	
   151		// Read. Delay error for next iteration (after scan).
   152		n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   153		dec.buf = dec.buf[0 : len(dec.buf)+n]
   154	
   155		return err
   156	}
   157	
   158	func nonSpace(b []byte) bool {
   159		for _, c := range b {
   160			if !isSpace(c) {
   161				return true
   162			}
   163		}
   164		return false
   165	}
   166	
   167	// An Encoder writes JSON values to an output stream.
   168	type Encoder struct {
   169		w          io.Writer
   170		err        error
   171		escapeHTML bool
   172	
   173		indentBuf    *bytes.Buffer
   174		indentPrefix string
   175		indentValue  string
   176	}
   177	
   178	// NewEncoder returns a new encoder that writes to w.
   179	func NewEncoder(w io.Writer) *Encoder {
   180		return &Encoder{w: w, escapeHTML: true}
   181	}
   182	
   183	// Encode writes the JSON encoding of v to the stream,
   184	// followed by a newline character.
   185	//
   186	// See the documentation for Marshal for details about the
   187	// conversion of Go values to JSON.
   188	func (enc *Encoder) Encode(v interface{}) error {
   189		if enc.err != nil {
   190			return enc.err
   191		}
   192		e := newEncodeState()
   193		err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   194		if err != nil {
   195			return err
   196		}
   197	
   198		// Terminate each value with a newline.
   199		// This makes the output look a little nicer
   200		// when debugging, and some kind of space
   201		// is required if the encoded value was a number,
   202		// so that the reader knows there aren't more
   203		// digits coming.
   204		e.WriteByte('\n')
   205	
   206		b := e.Bytes()
   207		if enc.indentPrefix != "" || enc.indentValue != "" {
   208			if enc.indentBuf == nil {
   209				enc.indentBuf = new(bytes.Buffer)
   210			}
   211			enc.indentBuf.Reset()
   212			err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
   213			if err != nil {
   214				return err
   215			}
   216			b = enc.indentBuf.Bytes()
   217		}
   218		if _, err = enc.w.Write(b); err != nil {
   219			enc.err = err
   220		}
   221		encodeStatePool.Put(e)
   222		return err
   223	}
   224	
   225	// SetIndent instructs the encoder to format each subsequent encoded
   226	// value as if indented by the package-level function Indent(dst, src, prefix, indent).
   227	// Calling SetIndent("", "") disables indentation.
   228	func (enc *Encoder) SetIndent(prefix, indent string) {
   229		enc.indentPrefix = prefix
   230		enc.indentValue = indent
   231	}
   232	
   233	// SetEscapeHTML specifies whether problematic HTML characters
   234	// should be escaped inside JSON quoted strings.
   235	// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   236	// to avoid certain safety problems that can arise when embedding JSON in HTML.
   237	//
   238	// In non-HTML settings where the escaping interferes with the readability
   239	// of the output, SetEscapeHTML(false) disables this behavior.
   240	func (enc *Encoder) SetEscapeHTML(on bool) {
   241		enc.escapeHTML = on
   242	}
   243	
   244	// RawMessage is a raw encoded JSON value.
   245	// It implements Marshaler and Unmarshaler and can
   246	// be used to delay JSON decoding or precompute a JSON encoding.
   247	type RawMessage []byte
   248	
   249	// MarshalJSON returns *m as the JSON encoding of m.
   250	func (m *RawMessage) MarshalJSON() ([]byte, error) {
   251		return *m, nil
   252	}
   253	
   254	// UnmarshalJSON sets *m to a copy of data.
   255	func (m *RawMessage) UnmarshalJSON(data []byte) error {
   256		if m == nil {
   257			return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   258		}
   259		*m = append((*m)[0:0], data...)
   260		return nil
   261	}
   262	
   263	var _ Marshaler = (*RawMessage)(nil)
   264	var _ Unmarshaler = (*RawMessage)(nil)
   265	
   266	// A Token holds a value of one of these types:
   267	//
   268	//	Delim, for the four JSON delimiters [ ] { }
   269	//	bool, for JSON booleans
   270	//	float64, for JSON numbers
   271	//	Number, for JSON numbers
   272	//	string, for JSON string literals
   273	//	nil, for JSON null
   274	//
   275	type Token interface{}
   276	
   277	const (
   278		tokenTopValue = iota
   279		tokenArrayStart
   280		tokenArrayValue
   281		tokenArrayComma
   282		tokenObjectStart
   283		tokenObjectKey
   284		tokenObjectColon
   285		tokenObjectValue
   286		tokenObjectComma
   287	)
   288	
   289	// advance tokenstate from a separator state to a value state
   290	func (dec *Decoder) tokenPrepareForDecode() error {
   291		// Note: Not calling peek before switch, to avoid
   292		// putting peek into the standard Decode path.
   293		// peek is only called when using the Token API.
   294		switch dec.tokenState {
   295		case tokenArrayComma:
   296			c, err := dec.peek()
   297			if err != nil {
   298				return err
   299			}
   300			if c != ',' {
   301				return &SyntaxError{"expected comma after array element", 0}
   302			}
   303			dec.scanp++
   304			dec.tokenState = tokenArrayValue
   305		case tokenObjectColon:
   306			c, err := dec.peek()
   307			if err != nil {
   308				return err
   309			}
   310			if c != ':' {
   311				return &SyntaxError{"expected colon after object key", 0}
   312			}
   313			dec.scanp++
   314			dec.tokenState = tokenObjectValue
   315		}
   316		return nil
   317	}
   318	
   319	func (dec *Decoder) tokenValueAllowed() bool {
   320		switch dec.tokenState {
   321		case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   322			return true
   323		}
   324		return false
   325	}
   326	
   327	func (dec *Decoder) tokenValueEnd() {
   328		switch dec.tokenState {
   329		case tokenArrayStart, tokenArrayValue:
   330			dec.tokenState = tokenArrayComma
   331		case tokenObjectValue:
   332			dec.tokenState = tokenObjectComma
   333		}
   334	}
   335	
   336	// A Delim is a JSON array or object delimiter, one of [ ] { or }.
   337	type Delim rune
   338	
   339	func (d Delim) String() string {
   340		return string(d)
   341	}
   342	
   343	// Token returns the next JSON token in the input stream.
   344	// At the end of the input stream, Token returns nil, io.EOF.
   345	//
   346	// Token guarantees that the delimiters [ ] { } it returns are
   347	// properly nested and matched: if Token encounters an unexpected
   348	// delimiter in the input, it will return an error.
   349	//
   350	// The input stream consists of basic JSON values—bool, string,
   351	// number, and null—along with delimiters [ ] { } of type Delim
   352	// to mark the start and end of arrays and objects.
   353	// Commas and colons are elided.
   354	func (dec *Decoder) Token() (Token, error) {
   355		for {
   356			c, err := dec.peek()
   357			if err != nil {
   358				return nil, err
   359			}
   360			switch c {
   361			case '[':
   362				if !dec.tokenValueAllowed() {
   363					return dec.tokenError(c)
   364				}
   365				dec.scanp++
   366				dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   367				dec.tokenState = tokenArrayStart
   368				return Delim('['), nil
   369	
   370			case ']':
   371				if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   372					return dec.tokenError(c)
   373				}
   374				dec.scanp++
   375				dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   376				dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   377				dec.tokenValueEnd()
   378				return Delim(']'), nil
   379	
   380			case '{':
   381				if !dec.tokenValueAllowed() {
   382					return dec.tokenError(c)
   383				}
   384				dec.scanp++
   385				dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   386				dec.tokenState = tokenObjectStart
   387				return Delim('{'), nil
   388	
   389			case '}':
   390				if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   391					return dec.tokenError(c)
   392				}
   393				dec.scanp++
   394				dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   395				dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   396				dec.tokenValueEnd()
   397				return Delim('}'), nil
   398	
   399			case ':':
   400				if dec.tokenState != tokenObjectColon {
   401					return dec.tokenError(c)
   402				}
   403				dec.scanp++
   404				dec.tokenState = tokenObjectValue
   405				continue
   406	
   407			case ',':
   408				if dec.tokenState == tokenArrayComma {
   409					dec.scanp++
   410					dec.tokenState = tokenArrayValue
   411					continue
   412				}
   413				if dec.tokenState == tokenObjectComma {
   414					dec.scanp++
   415					dec.tokenState = tokenObjectKey
   416					continue
   417				}
   418				return dec.tokenError(c)
   419	
   420			case '"':
   421				if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   422					var x string
   423					old := dec.tokenState
   424					dec.tokenState = tokenTopValue
   425					err := dec.Decode(&x)
   426					dec.tokenState = old
   427					if err != nil {
   428						clearOffset(err)
   429						return nil, err
   430					}
   431					dec.tokenState = tokenObjectColon
   432					return x, nil
   433				}
   434				fallthrough
   435	
   436			default:
   437				if !dec.tokenValueAllowed() {
   438					return dec.tokenError(c)
   439				}
   440				var x interface{}
   441				if err := dec.Decode(&x); err != nil {
   442					clearOffset(err)
   443					return nil, err
   444				}
   445				return x, nil
   446			}
   447		}
   448	}
   449	
   450	func clearOffset(err error) {
   451		if s, ok := err.(*SyntaxError); ok {
   452			s.Offset = 0
   453		}
   454	}
   455	
   456	func (dec *Decoder) tokenError(c byte) (Token, error) {
   457		var context string
   458		switch dec.tokenState {
   459		case tokenTopValue:
   460			context = " looking for beginning of value"
   461		case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   462			context = " looking for beginning of value"
   463		case tokenArrayComma:
   464			context = " after array element"
   465		case tokenObjectKey:
   466			context = " looking for beginning of object key string"
   467		case tokenObjectColon:
   468			context = " after object key"
   469		case tokenObjectComma:
   470			context = " after object key:value pair"
   471		}
   472		return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
   473	}
   474	
   475	// More reports whether there is another element in the
   476	// current array or object being parsed.
   477	func (dec *Decoder) More() bool {
   478		c, err := dec.peek()
   479		return err == nil && c != ']' && c != '}'
   480	}
   481	
   482	func (dec *Decoder) peek() (byte, error) {
   483		var err error
   484		for {
   485			for i := dec.scanp; i < len(dec.buf); i++ {
   486				c := dec.buf[i]
   487				if isSpace(c) {
   488					continue
   489				}
   490				dec.scanp = i
   491				return c, nil
   492			}
   493			// buffer has been scanned, now report any error
   494			if err != nil {
   495				return 0, err
   496			}
   497			err = dec.refill()
   498		}
   499	}
   500	
   501	/*
   502	TODO
   503	
   504	// EncodeToken writes the given JSON token to the stream.
   505	// It returns an error if the delimiters [ ] { } are not properly used.
   506	//
   507	// EncodeToken does not call Flush, because usually it is part of
   508	// a larger operation such as Encode, and those will call Flush when finished.
   509	// Callers that create an Encoder and then invoke EncodeToken directly,
   510	// without using Encode, need to call Flush when finished to ensure that
   511	// the JSON is written to the underlying writer.
   512	func (e *Encoder) EncodeToken(t Token) error  {
   513		...
   514	}
   515	
   516	*/
   517	

View as plain text