The Go Programming Language

Source file src/pkg/ebnf/ebnf.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
     6	// satisfying the following grammar (represented itself in EBNF):
     7	//
     8	//	Production  = name "=" [ Expression ] "." .
     9	//	Expression  = Alternative { "|" Alternative } .
    10	//	Alternative = Term { Term } .
    11	//	Term        = name | token [ "…" token ] | Group | Option | Repetition .
    12	//	Group       = "(" Expression ")" .
    13	//	Option      = "[" Expression "]" .
    14	//	Repetition  = "{" Expression "}" .
    15	//
    16	// A name is a Go identifier, a token is a Go string, and comments
    17	// and white space follow the same rules as for the Go language.
    18	// Production names starting with an uppercase Unicode letter denote
    19	// non-terminal productions (i.e., productions which allow white-space
    20	// and comments between tokens); all other production names denote
    21	// lexical productions.
    22	//
    23	package ebnf
    24	
    25	import (
    26		"go/scanner"
    27		"go/token"
    28		"os"
    29		"unicode"
    30		"utf8"
    31	)
    32	
    33	// ----------------------------------------------------------------------------
    34	// Internal representation
    35	
    36	type (
    37		// An Expression node represents a production expression.
    38		Expression interface {
    39			// Pos is the position of the first character of the syntactic construct
    40			Pos() token.Pos
    41		}
    42	
    43		// An Alternative node represents a non-empty list of alternative expressions.
    44		Alternative []Expression // x | y | z
    45	
    46		// A Sequence node represents a non-empty list of sequential expressions.
    47		Sequence []Expression // x y z
    48	
    49		// A Name node represents a production name.
    50		Name struct {
    51			StringPos token.Pos
    52			String    string
    53		}
    54	
    55		// A Token node represents a literal.
    56		Token struct {
    57			StringPos token.Pos
    58			String    string
    59		}
    60	
    61		// A List node represents a range of characters.
    62		Range struct {
    63			Begin, End *Token // begin ... end
    64		}
    65	
    66		// A Group node represents a grouped expression.
    67		Group struct {
    68			Lparen token.Pos
    69			Body   Expression // (body)
    70		}
    71	
    72		// An Option node represents an optional expression.
    73		Option struct {
    74			Lbrack token.Pos
    75			Body   Expression // [body]
    76		}
    77	
    78		// A Repetition node represents a repeated expression.
    79		Repetition struct {
    80			Lbrace token.Pos
    81			Body   Expression // {body}
    82		}
    83	
    84		// A Bad node stands for pieces of source code that lead to a parse error.
    85		Bad struct {
    86			TokPos token.Pos
    87			Error  string // parser error message
    88		}
    89	
    90		// A Production node represents an EBNF production.
    91		Production struct {
    92			Name *Name
    93			Expr Expression
    94		}
    95	
    96		// A Grammar is a set of EBNF productions. The map
    97		// is indexed by production name.
    98		//
    99		Grammar map[string]*Production
   100	)
   101	
   102	func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative
   103	func (x Sequence) Pos() token.Pos    { return x[0].Pos() } // the parser always generates non-empty Sequences
   104	func (x *Name) Pos() token.Pos       { return x.StringPos }
   105	func (x *Token) Pos() token.Pos      { return x.StringPos }
   106	func (x *Range) Pos() token.Pos      { return x.Begin.Pos() }
   107	func (x *Group) Pos() token.Pos      { return x.Lparen }
   108	func (x *Option) Pos() token.Pos     { return x.Lbrack }
   109	func (x *Repetition) Pos() token.Pos { return x.Lbrace }
   110	func (x *Bad) Pos() token.Pos        { return x.TokPos }
   111	func (x *Production) Pos() token.Pos { return x.Name.Pos() }
   112	
   113	// ----------------------------------------------------------------------------
   114	// Grammar verification
   115	
   116	func isLexical(name string) bool {
   117		ch, _ := utf8.DecodeRuneInString(name)
   118		return !unicode.IsUpper(ch)
   119	}
   120	
   121	type verifier struct {
   122		fset *token.FileSet
   123		scanner.ErrorVector
   124		worklist []*Production
   125		reached  Grammar // set of productions reached from (and including) the root production
   126		grammar  Grammar
   127	}
   128	
   129	func (v *verifier) error(pos token.Pos, msg string) {
   130		v.Error(v.fset.Position(pos), msg)
   131	}
   132	
   133	func (v *verifier) push(prod *Production) {
   134		name := prod.Name.String
   135		if _, found := v.reached[name]; !found {
   136			v.worklist = append(v.worklist, prod)
   137			v.reached[name] = prod
   138		}
   139	}
   140	
   141	func (v *verifier) verifyChar(x *Token) int {
   142		s := x.String
   143		if utf8.RuneCountInString(s) != 1 {
   144			v.error(x.Pos(), "single char expected, found "+s)
   145			return 0
   146		}
   147		ch, _ := utf8.DecodeRuneInString(s)
   148		return ch
   149	}
   150	
   151	func (v *verifier) verifyExpr(expr Expression, lexical bool) {
   152		switch x := expr.(type) {
   153		case nil:
   154			// empty expression
   155		case Alternative:
   156			for _, e := range x {
   157				v.verifyExpr(e, lexical)
   158			}
   159		case Sequence:
   160			for _, e := range x {
   161				v.verifyExpr(e, lexical)
   162			}
   163		case *Name:
   164			// a production with this name must exist;
   165			// add it to the worklist if not yet processed
   166			if prod, found := v.grammar[x.String]; found {
   167				v.push(prod)
   168			} else {
   169				v.error(x.Pos(), "missing production "+x.String)
   170			}
   171			// within a lexical production references
   172			// to non-lexical productions are invalid
   173			if lexical && !isLexical(x.String) {
   174				v.error(x.Pos(), "reference to non-lexical production "+x.String)
   175			}
   176		case *Token:
   177			// nothing to do for now
   178		case *Range:
   179			i := v.verifyChar(x.Begin)
   180			j := v.verifyChar(x.End)
   181			if i >= j {
   182				v.error(x.Pos(), "decreasing character range")
   183			}
   184		case *Group:
   185			v.verifyExpr(x.Body, lexical)
   186		case *Option:
   187			v.verifyExpr(x.Body, lexical)
   188		case *Repetition:
   189			v.verifyExpr(x.Body, lexical)
   190		default:
   191			panic("unreachable")
   192		}
   193	}
   194	
   195	func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
   196		// find root production
   197		root, found := grammar[start]
   198		if !found {
   199			// token.NoPos doesn't require a file set;
   200			// ok to set v.fset only afterwards
   201			v.error(token.NoPos, "no start production "+start)
   202			return
   203		}
   204	
   205		// initialize verifier
   206		v.fset = fset
   207		v.ErrorVector.Reset()
   208		v.worklist = v.worklist[0:0]
   209		v.reached = make(Grammar)
   210		v.grammar = grammar
   211	
   212		// work through the worklist
   213		v.push(root)
   214		for {
   215			n := len(v.worklist) - 1
   216			if n < 0 {
   217				break
   218			}
   219			prod := v.worklist[n]
   220			v.worklist = v.worklist[0:n]
   221			v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
   222		}
   223	
   224		// check if all productions were reached
   225		if len(v.reached) < len(v.grammar) {
   226			for name, prod := range v.grammar {
   227				if _, found := v.reached[name]; !found {
   228					v.error(prod.Pos(), name+" is unreachable")
   229				}
   230			}
   231		}
   232	}
   233	
   234	// Verify checks that:
   235	//	- all productions used are defined
   236	//	- all productions defined are used when beginning at start
   237	//	- lexical productions refer only to other lexical productions
   238	//
   239	// Position information is interpreted relative to the file set fset.
   240	//
   241	func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error {
   242		var v verifier
   243		v.verify(fset, grammar, start)
   244		return v.GetError(scanner.Sorted)
   245	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.