The Go Programming Language

Source file src/pkg/ebnf/parser.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package ebnf
     6	
     7	import (
     8		"go/scanner"
     9		"go/token"
    10		"os"
    11		"strconv"
    12	)
    13	
    14	type parser struct {
    15		fset *token.FileSet
    16		scanner.ErrorVector
    17		scanner scanner.Scanner
    18		pos     token.Pos   // token position
    19		tok     token.Token // one token look-ahead
    20		lit     string      // token literal
    21	}
    22	
    23	func (p *parser) next() {
    24		p.pos, p.tok, p.lit = p.scanner.Scan()
    25		if p.tok.IsKeyword() {
    26			// TODO Should keyword mapping always happen outside scanner?
    27			//      Or should there be a flag to scanner to enable keyword mapping?
    28			p.tok = token.IDENT
    29		}
    30	}
    31	
    32	func (p *parser) error(pos token.Pos, msg string) {
    33		p.Error(p.fset.Position(pos), msg)
    34	}
    35	
    36	func (p *parser) errorExpected(pos token.Pos, msg string) {
    37		msg = "expected " + msg
    38		if pos == p.pos {
    39			// the error happened at the current position;
    40			// make the error message more specific
    41			msg += ", found '" + p.tok.String() + "'"
    42			if p.tok.IsLiteral() {
    43				msg += " " + p.lit
    44			}
    45		}
    46		p.error(pos, msg)
    47	}
    48	
    49	func (p *parser) expect(tok token.Token) token.Pos {
    50		pos := p.pos
    51		if p.tok != tok {
    52			p.errorExpected(pos, "'"+tok.String()+"'")
    53		}
    54		p.next() // make progress in any case
    55		return pos
    56	}
    57	
    58	func (p *parser) parseIdentifier() *Name {
    59		pos := p.pos
    60		name := p.lit
    61		p.expect(token.IDENT)
    62		return &Name{pos, name}
    63	}
    64	
    65	func (p *parser) parseToken() *Token {
    66		pos := p.pos
    67		value := ""
    68		if p.tok == token.STRING {
    69			value, _ = strconv.Unquote(p.lit)
    70			// Unquote may fail with an error, but only if the scanner found
    71			// an illegal string in the first place. In this case the error
    72			// has already been reported.
    73			p.next()
    74		} else {
    75			p.expect(token.STRING)
    76		}
    77		return &Token{pos, value}
    78	}
    79	
    80	// ParseTerm returns nil if no term was found.
    81	func (p *parser) parseTerm() (x Expression) {
    82		pos := p.pos
    83	
    84		switch p.tok {
    85		case token.IDENT:
    86			x = p.parseIdentifier()
    87	
    88		case token.STRING:
    89			tok := p.parseToken()
    90			x = tok
    91			const ellipsis = "…" // U+2026, the horizontal ellipsis character
    92			if p.tok == token.ILLEGAL && p.lit == ellipsis {
    93				p.next()
    94				x = &Range{tok, p.parseToken()}
    95			}
    96	
    97		case token.LPAREN:
    98			p.next()
    99			x = &Group{pos, p.parseExpression()}
   100			p.expect(token.RPAREN)
   101	
   102		case token.LBRACK:
   103			p.next()
   104			x = &Option{pos, p.parseExpression()}
   105			p.expect(token.RBRACK)
   106	
   107		case token.LBRACE:
   108			p.next()
   109			x = &Repetition{pos, p.parseExpression()}
   110			p.expect(token.RBRACE)
   111		}
   112	
   113		return x
   114	}
   115	
   116	func (p *parser) parseSequence() Expression {
   117		var list Sequence
   118	
   119		for x := p.parseTerm(); x != nil; x = p.parseTerm() {
   120			list = append(list, x)
   121		}
   122	
   123		// no need for a sequence if list.Len() < 2
   124		switch len(list) {
   125		case 0:
   126			p.errorExpected(p.pos, "term")
   127			return &Bad{p.pos, "term expected"}
   128		case 1:
   129			return list[0]
   130		}
   131	
   132		return list
   133	}
   134	
   135	func (p *parser) parseExpression() Expression {
   136		var list Alternative
   137	
   138		for {
   139			list = append(list, p.parseSequence())
   140			if p.tok != token.OR {
   141				break
   142			}
   143			p.next()
   144		}
   145		// len(list) > 0
   146	
   147		// no need for an Alternative node if list.Len() < 2
   148		if len(list) == 1 {
   149			return list[0]
   150		}
   151	
   152		return list
   153	}
   154	
   155	func (p *parser) parseProduction() *Production {
   156		name := p.parseIdentifier()
   157		p.expect(token.ASSIGN)
   158		var expr Expression
   159		if p.tok != token.PERIOD {
   160			expr = p.parseExpression()
   161		}
   162		p.expect(token.PERIOD)
   163		return &Production{name, expr}
   164	}
   165	
   166	func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
   167		// initialize parser
   168		p.fset = fset
   169		p.ErrorVector.Reset()
   170		p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
   171		p.next() // initializes pos, tok, lit
   172	
   173		grammar := make(Grammar)
   174		for p.tok != token.EOF {
   175			prod := p.parseProduction()
   176			name := prod.Name.String
   177			if _, found := grammar[name]; !found {
   178				grammar[name] = prod
   179			} else {
   180				p.error(prod.Pos(), name+" declared already")
   181			}
   182		}
   183	
   184		return grammar
   185	}
   186	
   187	// Parse parses a set of EBNF productions from source src.
   188	// It returns a set of productions. Errors are reported
   189	// for incorrect syntax and if a production is declared
   190	// more than once. Position information is recorded relative
   191	// to the file set fset.
   192	//
   193	func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
   194		var p parser
   195		grammar := p.parse(fset, filename, src)
   196		return grammar, p.GetError(scanner.Sorted)
   197	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.