Go Home Page
The Go Programming Language

Source file src/pkg/json/decode.go

// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Represents JSON data structure using native Go types: booleans, floats,
// strings, arrays, and maps.

package json

import (
    "container/vector"
    "os"
    "reflect"
    "runtime"
    "strconv"
    "strings"
    "unicode"
    "utf16"
    "utf8"
)

// Unmarshal parses the JSON-encoded data and stores the result
// in the value pointed to by v.
//
// Unmarshal traverses the value v recursively.
// If an encountered value implements the Unmarshaler interface,
// Unmarshal calls its UnmarshalJSON method with a well-formed
// JSON encoding.
//
// Otherwise, Unmarshal uses the inverse of the encodings that
// Marshal uses, allocating maps, slices, and pointers as necessary,
// with the following additional rules:
//
// To unmarshal a JSON value into a nil interface value, the
// type stored in the interface value is one of:
//
//	bool, for JSON booleans
//	float64, for JSON numbers
//	string, for JSON strings
//	[]interface{}, for JSON arrays
//	map[string]interface{}, for JSON objects
//	nil for JSON null
//
// If a JSON value is not appropriate for a given target type,
// or if a JSON number overflows the target type, Unmarshal
// skips that field and completes the unmarshalling as best it can.
// If no more serious errors are encountered, Unmarshal returns
// an UnmarshalTypeError describing the earliest such error.
//
func Unmarshal(data []byte, v interface{}) os.Error {
    d := new(decodeState).init(data)

    // Quick check for well-formedness.
    // Avoids filling out half a data structure
    // before discovering a JSON syntax error.
    err := checkValid(data, &d.scan)
    if err != nil {
        return err
    }

    return d.unmarshal(v)
}

// Unmarshaler is the interface implemented by objects
// that can unmarshal a JSON description of themselves.
// The input can be assumed to be a valid JSON object
// encoding.  UnmarshalJSON must copy the JSON data
// if it wishes to retain the data after returning.
type Unmarshaler interface {
    UnmarshalJSON([]byte) os.Error
}


// An UnmarshalTypeError describes a JSON value that was
// not appropriate for a value of a specific Go type.
type UnmarshalTypeError struct {
    Value string       // description of JSON value - "bool", "array", "number -5"
    Type  reflect.Type // type of Go value it could not be assigned to
}

func (e *UnmarshalTypeError) String() string {
    return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
}

// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
// (The argument to Unmarshal must be a non-nil pointer.)
type InvalidUnmarshalError struct {
    Type reflect.Type
}

func (e *InvalidUnmarshalError) String() string {
    if e.Type == nil {
        return "json: Unmarshal(nil)"
    }

    if _, ok := e.Type.(*reflect.PtrType); !ok {
        return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
    }
    return "json: Unmarshal(nil " + e.Type.String() + ")"
}

func (d *decodeState) unmarshal(v interface{}) (err os.Error) {
    defer func() {
        if r := recover(); r != nil {
            if _, ok := r.(runtime.Error); ok {
                panic(r)
            }
            err = r.(os.Error)
        }
    }()

    rv := reflect.NewValue(v)
    pv, ok := rv.(*reflect.PtrValue)
    if !ok || pv.IsNil() {
        return &InvalidUnmarshalError{reflect.Typeof(v)}
    }

    d.scan.reset()
    d.value(pv.Elem())
    return d.savedError
}

// decodeState represents the state while decoding a JSON value.
type decodeState struct {
    data       []byte
    off        int // read offset in data
    scan       scanner
    nextscan   scanner // for calls to nextValue
    savedError os.Error
}

// errPhase is used for errors that should not happen unless
// there is a bug in the JSON decoder or something is editing
// the data slice while the decoder executes.
var errPhase = os.NewError("JSON decoder out of sync - data changing underfoot?")

func (d *decodeState) init(data []byte) *decodeState {
    d.data = data
    d.off = 0
    d.savedError = nil
    return d
}

// error aborts the decoding by panicking with err.
func (d *decodeState) error(err os.Error) {
    panic(err)
}

// saveError saves the first err it is called with,
// for reporting at the end of the unmarshal.
func (d *decodeState) saveError(err os.Error) {
    if d.savedError == nil {
        d.savedError = err
    }
}

// next cuts off and returns the next full JSON value in d.data[d.off:].
// The next value is known to be an object or array, not a literal.
func (d *decodeState) next() []byte {
    c := d.data[d.off]
    item, rest, err := nextValue(d.data[d.off:], &d.nextscan)
    if err != nil {
        d.error(err)
    }
    d.off = len(d.data) - len(rest)

    // Our scanner has seen the opening brace/bracket
    // and thinks we're still in the middle of the object.
    // invent a closing brace/bracket to get it out.
    if c == '{' {
        d.scan.step(&d.scan, '}')
    } else {
        d.scan.step(&d.scan, ']')
    }

    return item
}

// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
// It updates d.off and returns the new scan code.
func (d *decodeState) scanWhile(op int) int {
    var newOp int
    for {
        if d.off >= len(d.data) {
            newOp = d.scan.eof()
            d.off = len(d.data) + 1 // mark processed EOF with len+1
        } else {
            c := int(d.data[d.off])
            d.off++
            newOp = d.scan.step(&d.scan, c)
        }
        if newOp != op {
            break
        }
    }
    return newOp
}

// value decodes a JSON value from d.data[d.off:] into the value.
// it updates d.off to point past the decoded value.
func (d *decodeState) value(v reflect.Value) {
    if v == nil {
        _, rest, err := nextValue(d.data[d.off:], &d.nextscan)
        if err != nil {
            d.error(err)
        }
        d.off = len(d.data) - len(rest)

        // d.scan thinks we're still at the beginning of the item.
        // Feed in an empty string - the shortest, simplest value -
        // so that it knows we got to the end of the value.
        if d.scan.step == stateRedo {
            panic("redo")
        }
        d.scan.step(&d.scan, '"')
        d.scan.step(&d.scan, '"')
        return
    }

    switch op := d.scanWhile(scanSkipSpace); op {
    default:
        d.error(errPhase)

    case scanBeginArray:
        d.array(v)

    case scanBeginObject:
        d.object(v)

    case scanBeginLiteral:
        d.literal(v)
    }
}

// indirect walks down v allocating pointers as needed,
// until it gets to a non-pointer.
// if it encounters an Unmarshaler, indirect stops and returns that.
// if wantptr is true, indirect stops at the last pointer.
func (d *decodeState) indirect(v reflect.Value, wantptr bool) (Unmarshaler, reflect.Value) {
    for {
        var isUnmarshaler bool
        if v.Type().NumMethod() > 0 {
            // Remember that this is an unmarshaler,
            // but wait to return it until after allocating
            // the pointer (if necessary).
            _, isUnmarshaler = v.Interface().(Unmarshaler)
        }

        if iv, ok := v.(*reflect.InterfaceValue); ok && !iv.IsNil() {
            v = iv.Elem()
            continue
        }
        pv, ok := v.(*reflect.PtrValue)
        if !ok {
            break
        }
        _, isptrptr := pv.Elem().(*reflect.PtrValue)
        if !isptrptr && wantptr && !isUnmarshaler {
            return nil, pv
        }
        if pv.IsNil() {
            pv.PointTo(reflect.MakeZero(pv.Type().(*reflect.PtrType).Elem()))
        }
        if isUnmarshaler {
            // Using v.Interface().(Unmarshaler)
            // here means that we have to use a pointer
            // as the struct field.  We cannot use a value inside
            // a pointer to a struct, because in that case
            // v.Interface() is the value (x.f) not the pointer (&x.f).
            // This is an unfortunate consequence of reflect.
            // An alternative would be to look up the
            // UnmarshalJSON method and return a FuncValue.
            return v.Interface().(Unmarshaler), nil
        }
        v = pv.Elem()
    }
    return nil, v
}

// array consumes an array from d.data[d.off-1:], decoding into the value v.
// the first byte of the array ('[') has been read already.
func (d *decodeState) array(v reflect.Value) {
    // Check for unmarshaler.
    unmarshaler, pv := d.indirect(v, false)
    if unmarshaler != nil {
        d.off--
        err := unmarshaler.UnmarshalJSON(d.next())
        if err != nil {
            d.error(err)
        }
        return
    }
    v = pv

    // Decoding into nil interface?  Switch to non-reflect code.
    iv, ok := v.(*reflect.InterfaceValue)
    if ok {
        iv.Set(reflect.NewValue(d.arrayInterface()))
        return
    }

    // Check type of target.
    av, ok := v.(reflect.ArrayOrSliceValue)
    if !ok {
        d.saveError(&UnmarshalTypeError{"array", v.Type()})
        d.off--
        d.next()
        return
    }

    sv, _ := v.(*reflect.SliceValue)

    i := 0
    for {
        // Look ahead for ] - can only happen on first iteration.
        op := d.scanWhile(scanSkipSpace)
        if op == scanEndArray {
            break
        }

        // Back up so d.value can have the byte we just read.
        d.off--
        d.scan.undo(op)

        // Get element of array, growing if necessary.
        if i >= av.Cap() && sv != nil {
            newcap := sv.Cap() + sv.Cap()/2
            if newcap < 4 {
                newcap = 4
            }
            newv := reflect.MakeSlice(sv.Type().(*reflect.SliceType), sv.Len(), newcap)
            reflect.ArrayCopy(newv, sv)
            sv.Set(newv)
        }
        if i >= av.Len() && sv != nil {
            // Must be slice; gave up on array during i >= av.Cap().
            sv.SetLen(i + 1)
        }

        // Decode into element.
        if i < av.Len() {
            d.value(av.Elem(i))
        } else {
            // Ran out of fixed array: skip.
            d.value(nil)
        }
        i++

        // Next token must be , or ].
        op = d.scanWhile(scanSkipSpace)
        if op == scanEndArray {
            break
        }
        if op != scanArrayValue {
            d.error(errPhase)
        }
    }
    if i < av.Len() {
        if sv == nil {
            // Array.  Zero the rest.
            z := reflect.MakeZero(av.Type().(*reflect.ArrayType).Elem())
            for ; i < av.Len(); i++ {
                av.Elem(i).SetValue(z)
            }
        } else {
            sv.SetLen(i)
        }
    }
}

// matchName returns true if key should be written to a field named name.
func matchName(key, name string) bool {
    return strings.ToLower(key) == strings.ToLower(name)
}

// object consumes an object from d.data[d.off-1:], decoding into the value v.
// the first byte of the object ('{') has been read already.
func (d *decodeState) object(v reflect.Value) {
    // Check for unmarshaler.
    unmarshaler, pv := d.indirect(v, false)
    if unmarshaler != nil {
        d.off--
        err := unmarshaler.UnmarshalJSON(d.next())
        if err != nil {
            d.error(err)
        }
        return
    }
    v = pv

    // Decoding into nil interface?  Switch to non-reflect code.
    iv, ok := v.(*reflect.InterfaceValue)
    if ok {
        iv.Set(reflect.NewValue(d.objectInterface()))
        return
    }

    // Check type of target: struct or map[string]T
    var (
        mv *reflect.MapValue
        sv *reflect.StructValue
    )
    switch v := v.(type) {
    case *reflect.MapValue:
        // map must have string type
        t := v.Type().(*reflect.MapType)
        if t.Key() != reflect.Typeof("") {
            d.saveError(&UnmarshalTypeError{"object", v.Type()})
            break
        }
        mv = v
        if mv.IsNil() {
            mv.SetValue(reflect.MakeMap(t))
        }
    case *reflect.StructValue:
        sv = v
    default:
        d.saveError(&UnmarshalTypeError{"object", v.Type()})
    }

    if mv == nil && sv == nil {
        d.off--
        d.next() // skip over { } in input
        return
    }

    for {
        // Read opening " of string key or closing }.
        op := d.scanWhile(scanSkipSpace)
        if op == scanEndObject {
            // closing } - can only happen on first iteration.
            break
        }
        if op != scanBeginLiteral {
            d.error(errPhase)
        }

        // Read string key.
        start := d.off - 1
        op = d.scanWhile(scanContinue)
        item := d.data[start : d.off-1]
        key, ok := unquote(item)
        if !ok {
            d.error(errPhase)
        }

        // Figure out field corresponding to key.
        var subv reflect.Value
        if mv != nil {
            subv = reflect.MakeZero(mv.Type().(*reflect.MapType).Elem())
        } else {
            // First try for field with that tag.
            for i := 0; i < sv.NumField(); i++ {
                f := sv.Type().(*reflect.StructType).Field(i)
                if f.Tag == key {
                    subv = sv.Field(i)
                    break
                }
            }
            if subv == nil {
                // Second, exact match.
                subv = sv.FieldByName(key)
                if subv == nil {
                    // Third, case-insensitive match.
                    subv = sv.FieldByNameFunc(func(s string) bool { return matchName(key, s) })
                }
            }
        }

        // Read : before value.
        if op == scanSkipSpace {
            op = d.scanWhile(scanSkipSpace)
        }
        if op != scanObjectKey {
            d.error(errPhase)
        }

        // Read value.
        d.value(subv)

        // Write value back to map;
        // if using struct, subv points into struct already.
        if mv != nil {
            mv.SetElem(reflect.NewValue(key), subv)
        }

        // Next token must be , or }.
        op = d.scanWhile(scanSkipSpace)
        if op == scanEndObject {
            break
        }
        if op != scanObjectValue {
            d.error(errPhase)
        }
    }
}

// literal consumes a literal from d.data[d.off-1:], decoding into the value v.
// The first byte of the literal has been read already
// (that's how the caller knows it's a literal).
func (d *decodeState) literal(v reflect.Value) {
    // All bytes inside literal return scanContinue op code.
    start := d.off - 1
    op := d.scanWhile(scanContinue)

    // Scan read one byte too far; back up.
    d.off--
    d.scan.undo(op)
    item := d.data[start:d.off]

    // Check for unmarshaler.
    wantptr := item[0] == 'n' // null
    unmarshaler, pv := d.indirect(v, wantptr)
    if unmarshaler != nil {
        err := unmarshaler.UnmarshalJSON(item)
        if err != nil {
            d.error(err)
        }
        return
    }
    v = pv

    switch c := item[0]; c {
    case 'n': // null
        switch v.(type) {
        default:
            d.saveError(&UnmarshalTypeError{"null", v.Type()})
        case *reflect.InterfaceValue, *reflect.PtrValue, *reflect.MapValue:
            v.SetValue(nil)
        }

    case 't', 'f': // true, false
        value := c == 't'
        switch v := v.(type) {
        default:
            d.saveError(&UnmarshalTypeError{"bool", v.Type()})
        case *reflect.BoolValue:
            v.Set(value)
        case *reflect.InterfaceValue:
            v.Set(reflect.NewValue(value))
        }

    case '"': // string
        s, ok := unquote(item)
        if !ok {
            d.error(errPhase)
        }
        switch v := v.(type) {
        default:
            d.saveError(&UnmarshalTypeError{"string", v.Type()})
        case *reflect.StringValue:
            v.Set(s)
        case *reflect.InterfaceValue:
            v.Set(reflect.NewValue(s))
        }

    default: // number
        if c != '-' && (c < '0' || c > '9') {
            d.error(errPhase)
        }
        s := string(item)
        switch v := v.(type) {
        default:
            d.error(&UnmarshalTypeError{"number", v.Type()})
        case *reflect.InterfaceValue:
            n, err := strconv.Atof64(s)
            if err != nil {
                d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
                break
            }
            v.Set(reflect.NewValue(n))

        case *reflect.IntValue:
            n, err := strconv.Atoi64(s)
            if err != nil || v.Overflow(n) {
                d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
                break
            }
            v.Set(n)

        case *reflect.UintValue:
            n, err := strconv.Atoui64(s)
            if err != nil || v.Overflow(n) {
                d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
                break
            }
            v.Set(n)

        case *reflect.FloatValue:
            n, err := strconv.AtofN(s, v.Type().Bits())
            if err != nil || v.Overflow(n) {
                d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
                break
            }
            v.Set(n)
        }
    }
}

// The xxxInterface routines build up a value to be stored
// in an empty interface.  They are not strictly necessary,
// but they avoid the weight of reflection in this common case.

// valueInterface is like value but returns interface{}
func (d *decodeState) valueInterface() interface{} {
    switch d.scanWhile(scanSkipSpace) {
    default:
        d.error(errPhase)
    case scanBeginArray:
        return d.arrayInterface()
    case scanBeginObject:
        return d.objectInterface()
    case scanBeginLiteral:
        return d.literalInterface()
    }
    panic("unreachable")
}

// arrayInterface is like array but returns []interface{}.
func (d *decodeState) arrayInterface() []interface{} {
    var v vector.Vector
    for {
        // Look ahead for ] - can only happen on first iteration.
        op := d.scanWhile(scanSkipSpace)
        if op == scanEndArray {
            break
        }

        // Back up so d.value can have the byte we just read.
        d.off--
        d.scan.undo(op)

        v.Push(d.valueInterface())

        // Next token must be , or ].
        op = d.scanWhile(scanSkipSpace)
        if op == scanEndArray {
            break
        }
        if op != scanArrayValue {
            d.error(errPhase)
        }
    }
    return v
}

// objectInterface is like object but returns map[string]interface{}.
func (d *decodeState) objectInterface() map[string]interface{} {
    m := make(map[string]interface{})
    for {
        // Read opening " of string key or closing }.
        op := d.scanWhile(scanSkipSpace)
        if op == scanEndObject {
            // closing } - can only happen on first iteration.
            break
        }
        if op != scanBeginLiteral {
            d.error(errPhase)
        }

        // Read string key.
        start := d.off - 1
        op = d.scanWhile(scanContinue)
        item := d.data[start : d.off-1]
        key, ok := unquote(item)
        if !ok {
            d.error(errPhase)
        }

        // Read : before value.
        if op == scanSkipSpace {
            op = d.scanWhile(scanSkipSpace)
        }
        if op != scanObjectKey {
            d.error(errPhase)
        }

        // Read value.
        m[key] = d.valueInterface()

        // Next token must be , or }.
        op = d.scanWhile(scanSkipSpace)
        if op == scanEndObject {
            break
        }
        if op != scanObjectValue {
            d.error(errPhase)
        }
    }
    return m
}


// literalInterface is like literal but returns an interface value.
func (d *decodeState) literalInterface() interface{} {
    // All bytes inside literal return scanContinue op code.
    start := d.off - 1
    op := d.scanWhile(scanContinue)

    // Scan read one byte too far; back up.
    d.off--
    d.scan.undo(op)
    item := d.data[start:d.off]

    switch c := item[0]; c {
    case 'n': // null
        return nil

    case 't', 'f': // true, false
        return c == 't'

    case '"': // string
        s, ok := unquote(item)
        if !ok {
            d.error(errPhase)
        }
        return s

    default: // number
        if c != '-' && (c < '0' || c > '9') {
            d.error(errPhase)
        }
        n, err := strconv.Atof64(string(item))
        if err != nil {
            d.saveError(&UnmarshalTypeError{"number " + string(item), reflect.Typeof(float64(0))})
        }
        return n
    }
    panic("unreachable")
}

// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) int {
    if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
        return -1
    }
    rune, err := strconv.Btoui64(string(s[2:6]), 16)
    if err != nil {
        return -1
    }
    return int(rune)
}

// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
func unquote(s []byte) (t string, ok bool) {
    if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
        return
    }
    b := make([]byte, len(s)+2*utf8.UTFMax)
    w := 0
    for r := 1; r < len(s)-1; {
        // Out of room?  Can only happen if s is full of
        // malformed UTF-8 and we're replacing each
        // byte with RuneError.
        if w >= len(b)-2*utf8.UTFMax {
            nb := make([]byte, (len(b)+utf8.UTFMax)*2)
            copy(nb, b[0:w])
            b = nb
        }
        switch c := s[r]; {
        case c == '\\':
            r++
            if r >= len(s)-1 {
                return
            }
            switch s[r] {
            default:
                return
            case '"', '\\', '/', '\'':
                b[w] = s[r]
                r++
                w++
            case 'b':
                b[w] = '\b'
                r++
                w++
            case 'f':
                b[w] = '\f'
                r++
                w++
            case 'n':
                b[w] = '\n'
                r++
                w++
            case 'r':
                b[w] = '\r'
                r++
                w++
            case 't':
                b[w] = '\t'
                r++
                w++
            case 'u':
                r--
                rune := getu4(s[r:])
                if rune < 0 {
                    return
                }
                r += 6
                if utf16.IsSurrogate(rune) {
                    rune1 := getu4(s[r:])
                    if dec := utf16.DecodeRune(rune, rune1); dec != unicode.ReplacementChar {
                        // A valid pair; consume.
                        r += 6
                        w += utf8.EncodeRune(dec, b[w:])
                        break
                    }
                    // Invalid surrogate; fall back to replacement rune.
                    rune = unicode.ReplacementChar
                }
                w += utf8.EncodeRune(rune, b[w:])
            }

        // Quote, control characters are invalid.
        case c == '"', c < ' ':
            return

        // ASCII
        case c < utf8.RuneSelf:
            b[w] = c
            r++
            w++

        // Coerce to well-formed UTF-8.
        default:
            rune, size := utf8.DecodeRune(s[r:])
            r += size
            w += utf8.EncodeRune(rune, b[w:])
        }
    }
    return string(b[0:w]), true
}