Go Home Page
The Go Programming Language

Source file src/pkg/json/parse.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// JSON (JavaScript Object Notation) parser.
// See http://www.json.org/

// The json package implements a simple parser and
// representation for JSON (JavaScript Object Notation),
// as defined at http://www.json.org/.
package json

import (
    "bytes"
    "strconv"
    "utf8"
)

// Strings
//
//   Double quoted with escapes: \" \\ \/ \b \f \n \r \t \uXXXX.
//   No literal control characters, supposedly.
//   Have also seen \' and embedded newlines.

func _UnHex(p string, r, l int) (v int, ok bool) {
    v = 0
    for i := r; i < l; i++ {
        if i >= len(p) {
            return 0, false
        }
        v *= 16
        switch {
        case '0' <= p[i] && p[i] <= '9':
            v += int(p[i] - '0')
        case 'a' <= p[i] && p[i] <= 'f':
            v += int(p[i] - 'a' + 10)
        case 'A' <= p[i] && p[i] <= 'F':
            v += int(p[i] - 'A' + 10)
        default:
            return 0, false
        }
    }
    return v, true
}

func _ToHex(b []byte, rune int) {
    const hexDigits = "0123456789abcdef"
    b[0] = hexDigits[rune>>12&0xf]
    b[1] = hexDigits[rune>>8&0xf]
    b[2] = hexDigits[rune>>4&0xf]
    b[3] = hexDigits[rune&0xf]
}

// Unquote unquotes the JSON-quoted string s,
// returning a raw string t.  If s is not a valid
// JSON-quoted string, Unquote returns with ok set to false.
func Unquote(s string) (t string, ok bool) {
    if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
        return
    }
    b := make([]byte, len(s))
    w := 0
    for r := 1; r < len(s)-1; {
        switch {
        case s[r] == '\\':
            r++
            if r >= len(s)-1 {
                return
            }
            switch s[r] {
            default:
                return
            case '"', '\\', '/', '\'':
                b[w] = s[r]
                r++
                w++
            case 'b':
                b[w] = '\b'
                r++
                w++
            case 'f':
                b[w] = '\f'
                r++
                w++
            case 'n':
                b[w] = '\n'
                r++
                w++
            case 'r':
                b[w] = '\r'
                r++
                w++
            case 't':
                b[w] = '\t'
                r++
                w++
            case 'u':
                r++
                rune, ok := _UnHex(s, r, r+4)
                if !ok {
                    return
                }
                r += 4
                w += utf8.EncodeRune(rune, b[w:])
            }
        // Control characters are invalid, but we've seen raw \n.
        case s[r] < ' ' && s[r] != '\n':
            if s[r] == '\n' {
                b[w] = '\n'
                r++
                w++
                break
            }
            return
        // ASCII
        case s[r] < utf8.RuneSelf:
            b[w] = s[r]
            r++
            w++
        // Coerce to well-formed UTF-8.
        default:
            rune, size := utf8.DecodeRuneInString(s[r:])
            r += size
            w += utf8.EncodeRune(rune, b[w:])
        }
    }
    return string(b[0:w]), true
}

// Quote quotes the raw string s using JSON syntax,
// so that Unquote(Quote(s)) = s, true.
func Quote(s string) string {
    chr := make([]byte, 6)
    chr0 := chr[0:1]
    b := new(bytes.Buffer)
    chr[0] = '"'
    b.Write(chr0)

    for _, rune := range s {
        switch {
        case rune == '"' || rune == '\\':
            chr[0] = '\\'
            chr[1] = byte(rune)
            b.Write(chr[0:2])

        case rune == '\b':
            chr[0] = '\\'
            chr[1] = 'b'
            b.Write(chr[0:2])

        case rune == '\f':
            chr[0] = '\\'
            chr[1] = 'f'
            b.Write(chr[0:2])

        case rune == '\n':
            chr[0] = '\\'
            chr[1] = 'n'
            b.Write(chr[0:2])

        case rune == '\r':
            chr[0] = '\\'
            chr[1] = 'r'
            b.Write(chr[0:2])

        case rune == '\t':
            chr[0] = '\\'
            chr[1] = 't'
            b.Write(chr[0:2])

        case 0x20 <= rune && rune < utf8.RuneSelf:
            chr[0] = byte(rune)
            b.Write(chr0)

        default:
            chr[0] = '\\'
            chr[1] = 'u'
            _ToHex(chr[2:6], rune)
            b.Write(chr)
        }
    }
    chr[0] = '"'
    b.Write(chr0)
    return b.String()
}


// _Lexer

type _Lexer struct {
    s     string
    i     int
    kind  int
    token string
}

func punct(c byte) bool {
    return c == '"' || c == '[' || c == ']' || c == ':' || c == '{' || c == '}' || c == ','
}

func white(c byte) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' }

func skipwhite(p string, i int) int {
    for i < len(p) && white(p[i]) {
        i++
    }
    return i
}

func skiptoken(p string, i int) int {
    for i < len(p) && !punct(p[i]) && !white(p[i]) {
        i++
    }
    return i
}

func skipstring(p string, i int) int {
    for i++; i < len(p) && p[i] != '"'; i++ {
        if p[i] == '\\' {
            i++
        }
    }
    if i >= len(p) {
        return i
    }
    return i + 1
}

func (t *_Lexer) Next() {
    i, s := t.i, t.s
    i = skipwhite(s, i)
    if i >= len(s) {
        t.kind = 0
        t.token = ""
        t.i = len(s)
        return
    }

    c := s[i]
    switch {
    case c == '-' || '0' <= c && c <= '9':
        j := skiptoken(s, i)
        t.kind = '1'
        t.token = s[i:j]
        i = j

    case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
        j := skiptoken(s, i)
        t.kind = 'a'
        t.token = s[i:j]
        i = j

    case c == '"':
        j := skipstring(s, i)
        t.kind = '"'
        t.token = s[i:j]
        i = j

    case c == '[', c == ']', c == ':', c == '{', c == '}', c == ',':
        t.kind = int(c)
        t.token = s[i : i+1]
        i++

    default:
        t.kind = '?'
        t.token = s[i : i+1]
    }

    t.i = i
}


// Parser
//
// Implements parsing but not the actions.  Those are
// carried out by the implementation of the Builder interface.
// A Builder represents the object being created.
// Calling a method like Int64(i) sets that object to i.
// Calling a method like Elem(i) or Key(s) creates a
// new builder for a subpiece of the object (logically,
// an array element or a map key).
//
// There are two Builders, in other files.
// The JsonBuilder builds a generic Json structure
// in which maps are maps.
// The StructBuilder copies data into a possibly
// nested data structure, using the "map keys"
// as struct field names.

type _Value interface{}

// BUG(rsc): The json Builder interface needs to be
// reconciled with the xml Builder interface.

// A Builder is an interface implemented by clients and passed
// to the JSON parser.  It gives clients full control over the
// eventual representation returned by the parser.
type Builder interface {
    // Set value
    Int64(i int64)
    Uint64(i uint64)
    Float64(f float64)
    String(s string)
    Bool(b bool)
    Null()
    Array()
    Map()

    // Create sub-Builders
    Elem(i int) Builder
    Key(s string) Builder

    // Flush changes to parent Builder if necessary.
    Flush()
}

func parse(lex *_Lexer, build Builder) bool {
    ok := false
Switch:
    switch lex.kind {
    case 0:
        break
    case '1':
        // If the number is exactly an integer, use that.
        if i, err := strconv.Atoi64(lex.token); err == nil {
            build.Int64(i)
            ok = true
        } else if i, err := strconv.Atoui64(lex.token); err == nil {
            build.Uint64(i)
            ok = true
        } else
        // Fall back to floating point.
        if f, err := strconv.Atof64(lex.token); err == nil {
            build.Float64(f)
            ok = true
        }

    case 'a':
        switch lex.token {
        case "true":
            build.Bool(true)
            ok = true
        case "false":
            build.Bool(false)
            ok = true
        case "null":
            build.Null()
            ok = true
        }

    case '"':
        if str, ok1 := Unquote(lex.token); ok1 {
            build.String(str)
            ok = true
        }

    case '[':
        // array
        build.Array()
        lex.Next()
        n := 0
        for lex.kind != ']' {
            if n > 0 {
                if lex.kind != ',' {
                    break Switch
                }
                lex.Next()
            }
            if !parse(lex, build.Elem(n)) {
                break Switch
            }
            n++
        }
        ok = true

    case '{':
        // map
        lex.Next()
        build.Map()
        n := 0
        for lex.kind != '}' {
            if n > 0 {
                if lex.kind != ',' {
                    break Switch
                }
                lex.Next()
            }
            if lex.kind != '"' {
                break Switch
            }
            key, ok := Unquote(lex.token)
            if !ok {
                break Switch
            }
            lex.Next()
            if lex.kind != ':' {
                break Switch
            }
            lex.Next()
            if !parse(lex, build.Key(key)) {
                break Switch
            }
            n++
        }
        ok = true
    }

    if ok {
        lex.Next()
    }
    build.Flush()
    return ok
}

// Parse parses the JSON syntax string s and makes calls to
// the builder to construct a parsed representation.
// On success, it returns with ok set to true.
// On error, it returns with ok set to false, errindx set
// to the byte index in s where a syntax error occurred,
// and errtok set to the offending token.
func Parse(s string, builder Builder) (ok bool, errindx int, errtok string) {
    lex := new(_Lexer)
    lex.s = s
    lex.Next()
    if parse(lex, builder) {
        if lex.kind == 0 { // EOF
            return true, 0, ""
        }
    }
    return false, lex.i, lex.token
}