1 #!/usr/bin/awk -f
2 # Copyright 2010 The Go Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style
4 # license that can be found in the LICENSE file.
5
6 # This program implements the core idea from
7 #
8 # Clinton L. Jeffery, Generating LR syntax error messages from examples,
9 # ACM TOPLAS 25(5) (September 2003). http://doi.acm.org/10.1145/937563.937566
10 #
11 # It reads Bison's summary of a grammar followed by a file
12 # like go.errors, replacing lines beginning with % by the
13 # yystate and yychar that will be active when an error happens
14 # while parsing that line.
15 #
16 # Unlike the system described in the paper, the lines in go.errors
17 # give grammar symbol name lists, not actual program fragments.
18 # This is a little less programmer-friendly but doesn't require being
19 # able to run the text through lex.c.
20
21 BEGIN{
22 bison = 1
23 grammar = 0
24 states = 0
25 }
26
27 # In Grammar section of y.output,
28 # record lhs and length of rhs for each rule.
29 bison && /^Grammar/ { grammar = 1 }
30 bison && /^(Terminals|state 0)/ { grammar = 0 }
31 grammar && NF>0 {
32 if($2 != "|") {
33 r = $2
34 sub(/:$/, "", r)
35 }
36 rulelhs[$1] = r
37 rulesize[$1] = NF-2
38 if(rulesize[$1] == 3 && $3 $4 $5 == "/*empty*/") {
39 rulesize[$1] = 0
40 }
41 }
42
43 # In state dumps, record shift/reduce actions.
44 bison && /^state 0/ { grammar = 0; states = 1 }
45
46 states && /^state / { state = $2 }
47 states { statetext[state] = statetext[state] $0 "\n" }
48
49 states && / shift, and go to state/ {
50 n = nshift[state]++
51 shift[state,n] = $7
52 shifttoken[state,n] = $1
53 next
54 }
55 states && / go to state/ {
56 n = nshift[state]++
57 shift[state,n] = $5
58 shifttoken[state,n] = $1
59 next
60 }
61 states && / reduce using rule/ {
62 n = nreduce[state]++
63 reduce[state,n] = $5
64 reducetoken[state,n] = $1
65 next
66 }
67
68 # First // comment marks the beginning of the pattern file.
69 /^\/\// { bison = 0; grammar = 0; state = 0 }
70 bison { next }
71
72 # Treat % as first field on line as introducing a pattern (token sequence).
73 # Run it through the LR machine and print the induced "yystate, yychar,"
74 # at the point where the error happens.
75 $1 == "%" {
76 nstack = 0
77 state = 0
78 f = 2
79 tok = ""
80 for(;;) {
81 if(tok == "" && f <= NF) {
82 tok = $f
83 f++
84 }
85 found = 0
86 for(j=0; j<nshift[state]; j++) {
87 if(shifttoken[state,j] == tok) {
88 # print "SHIFT " tok " " state " -> " shift[state,j]
89 stack[nstack++] = state
90 state = shift[state,j]
91 found = 1
92 tok = ""
93 break
94 }
95 }
96 if(found)
97 continue
98 for(j=0; j<nreduce[state]; j++) {
99 if(reducetoken[state,j] == tok || reducetoken[state,j] == "$default") {
100 stack[nstack++] = state
101 rule = reduce[state,j]
102 nstack -= rulesize[rule]
103 state = stack[--nstack]
104 lhs = rulelhs[rule]
105 if(tok != "")
106 --f
107 tok = rulelhs[rule]
108 # print "REDUCE " nstack " " state " " tok " rule " rule " size " rulesize[rule]
109 found = 1
110 break
111 }
112 }
113 if(found)
114 continue
115
116 # No shift or reduce applied - found the error.
117 printf("\t%s, %s,\n", state, tok);
118 break
119 }
120 next
121 }
122
123 # Print other lines verbatim.
124 {print}