1 | #!/usr/bin/env python2
|
2 | """
|
3 | grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import os
|
8 | import sys
|
9 |
|
10 | from _devbuild.gen.id_kind_asdl import Id, Kind
|
11 | from _devbuild.gen.syntax_asdl import source
|
12 |
|
13 | from core import alloc
|
14 | from core import optview
|
15 | from mycpp.mylib import log
|
16 | from frontend import lexer
|
17 | from frontend import lexer_def
|
18 | from frontend import reader
|
19 | from pgen2 import parse, pgen, token
|
20 |
|
21 |
|
22 | class OilTokenDef(object):
|
23 |
|
24 | def __init__(self, ops, more_ops, keyword_ops):
|
25 | self.ops = ops
|
26 | self.more_ops = more_ops
|
27 | self.keyword_ops = keyword_ops
|
28 |
|
29 | def GetTerminalNum(self, label):
|
30 | """e.g. translate Expr_Name in the grammar to 178."""
|
31 | id_ = getattr(Id, label)
|
32 | #log('Id %s = %d', id_, id_)
|
33 | assert id_ < token.NT_OFFSET, id_
|
34 | return id_
|
35 |
|
36 | def GetKeywordNum(self, s):
|
37 | """e.g 'xor' -> Id.Expr_Xor.
|
38 |
|
39 | Python doesn't have this, but Oil does. Returns None if not
|
40 | found.
|
41 | """
|
42 | id_ = self.keyword_ops.get(s)
|
43 | if id_ is None:
|
44 | return None
|
45 | assert id_ < token.NT_OFFSET, id_
|
46 | return id_
|
47 |
|
48 | def GetOpNum(self, op_str):
|
49 | """
|
50 | Args:
|
51 | op_str: '>='
|
52 |
|
53 | Returns:
|
54 | Integer for '>=' or Id.Arith_GreatEqual
|
55 | """
|
56 | # Fail if not there
|
57 | id_ = self.ops.get(op_str) or self.more_ops[op_str]
|
58 | assert id_ < token.NT_OFFSET, id_
|
59 | return id_
|
60 |
|
61 |
|
62 | def MakeOilLexer(code_str, arena):
|
63 | arena.PushSource(source.MainFile('pgen2_main'))
|
64 | line_reader = reader.StringLineReader(code_str, arena)
|
65 | line_lexer = lexer.LineLexer(arena)
|
66 | lex = lexer.Lexer(line_lexer, line_reader)
|
67 | return lex
|
68 |
|
69 |
|
70 | def main(argv):
|
71 | action = argv[1]
|
72 | argv = argv[2:]
|
73 |
|
74 | # Used at grammar BUILD time.
|
75 | OPS = {
|
76 | '!': Id.Expr_Bang,
|
77 | '.': Id.Expr_Dot,
|
78 | '..=': Id.Expr_DDotEqual,
|
79 | '..<': Id.Expr_DDotLessThan,
|
80 | '->': Id.Expr_RArrow,
|
81 | '=>': Id.Expr_RDArrow,
|
82 | '//': Id.Expr_DSlash,
|
83 | '++': Id.Arith_DPlus,
|
84 | '!~': Id.Expr_NotTilde,
|
85 | '~~': Id.Expr_DTilde,
|
86 | '!~~': Id.Expr_NotDTilde,
|
87 | '~==': Id.Expr_TildeDEqual,
|
88 | '===': Id.Expr_TEqual,
|
89 | '!==': Id.Expr_NotDEqual,
|
90 | '@': Id.Expr_At,
|
91 | '...': Id.Expr_Ellipsis,
|
92 | '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/
|
93 | '**=': Id.Expr_DStarEqual,
|
94 | '//=': Id.Expr_DSlashEqual,
|
95 | }
|
96 |
|
97 | # Note: We have two lists of ops because Id.Op_Semi is used, not
|
98 | # Id.Arith_Semi.
|
99 | for _, token_str, id_ in lexer_def.EXPR_OPS:
|
100 | assert token_str not in OPS, token_str
|
101 | OPS[token_str] = id_
|
102 |
|
103 | # Tokens that look like / or ${ or @{
|
104 | triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
|
105 | lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
|
106 | lexer_def.EXPR_WORDS)
|
107 | more_ops = {}
|
108 | for _, token_str, id_ in triples:
|
109 | if token_str in more_ops:
|
110 | import pprint
|
111 | raise AssertionError(
|
112 | '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
|
113 | more_ops[token_str] = id_
|
114 |
|
115 | # Tokens that look like 'for'
|
116 | keyword_ops = {}
|
117 | for _, token_str, id_ in lexer_def.EXPR_WORDS: # for, in, etc.
|
118 | assert token_str not in keyword_ops, token_str
|
119 | keyword_ops[token_str] = id_
|
120 |
|
121 | if 0:
|
122 | from pprint import pprint
|
123 | pprint(OPS)
|
124 | print('---')
|
125 | pprint(more_ops)
|
126 | print('---')
|
127 | pprint(keyword_ops)
|
128 | print('---')
|
129 |
|
130 | tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
|
131 |
|
132 | if action == 'py': # generate the grammar and parse it
|
133 | grammar_path = argv[0]
|
134 | out_dir = argv[1]
|
135 |
|
136 | basename, _ = os.path.splitext(os.path.basename(grammar_path))
|
137 |
|
138 | # HACK for find:
|
139 | if basename == 'find':
|
140 | from tools.find import tokenizer as find_tokenizer
|
141 | tok_def = find_tokenizer.TokenDef()
|
142 |
|
143 | with open(grammar_path) as f:
|
144 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
145 |
|
146 | marshal_path = os.path.join(out_dir, basename + '.marshal')
|
147 | with open(marshal_path, 'wb') as out_f:
|
148 | gr.dump(out_f)
|
149 |
|
150 | nonterm_py = os.path.join(out_dir, basename + '_nt.py')
|
151 | with open(nonterm_py, 'w') as out_f:
|
152 | gr.dump_nonterminals_py(out_f)
|
153 |
|
154 | log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
|
155 | out_dir, basename)
|
156 |
|
157 | #gr.report()
|
158 |
|
159 | elif action == 'cpp': # generate the grammar and parse it
|
160 | grammar_path = argv[0]
|
161 | out_dir = argv[1]
|
162 |
|
163 | basename, _ = os.path.splitext(os.path.basename(grammar_path))
|
164 |
|
165 | with open(grammar_path) as f:
|
166 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
167 |
|
168 | nonterm_h = os.path.join(out_dir, basename + '_nt.h')
|
169 | with open(nonterm_h, 'w') as out_f:
|
170 | gr.dump_nonterminals_cpp(out_f)
|
171 |
|
172 | grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
|
173 | with open(grammar_cpp_path, 'w') as src_f:
|
174 | gr.dump_cpp(src_f)
|
175 |
|
176 | if 0:
|
177 | log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path,
|
178 | out_dir, basename)
|
179 |
|
180 | elif action == 'parse': # generate the grammar and parse it
|
181 | # Remove build dependency
|
182 | from frontend import parse_lib
|
183 | from ysh import expr_parse
|
184 | from ysh import expr_to_ast
|
185 |
|
186 | grammar_path = argv[0]
|
187 | start_symbol = argv[1]
|
188 | code_str = argv[2]
|
189 |
|
190 | # For choosing lexer and semantic actions
|
191 | grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
|
192 |
|
193 | with open(grammar_path) as f:
|
194 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
195 |
|
196 | arena = alloc.Arena()
|
197 | lex_ = MakeOilLexer(code_str, arena)
|
198 |
|
199 | is_expr = grammar_name in ('calc', 'grammar')
|
200 |
|
201 | parse_opts = optview.Parse([], [])
|
202 | parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
|
203 | p = expr_parse.ExprParser(parse_ctx, gr, False)
|
204 | try:
|
205 | with expr_parse.ctx_PNodeAllocator(p):
|
206 | pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
|
207 | except parse.ParseError as e:
|
208 | log('Parse Error: %s', e)
|
209 | return 1
|
210 |
|
211 | names = expr_to_ast.MakeGrammarNames(gr)
|
212 | p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes
|
213 | p_printer.Print(pnode)
|
214 |
|
215 | if is_expr:
|
216 | tr = expr_to_ast.Transformer(gr)
|
217 | if start_symbol == 'eval_input':
|
218 | ast_node = tr.Expr(pnode)
|
219 | elif start_symbol == 'ysh_case_pat':
|
220 | ast_node = tr.YshCasePattern(pnode)
|
221 | else:
|
222 | ast_node = tr.VarDecl(pnode)
|
223 | ast_node.PrettyPrint()
|
224 | print()
|
225 |
|
226 | elif action == 'stdlib-test':
|
227 | # This shows how deep Python's parse tree is. It doesn't use semantic
|
228 | # actions to prune on the fly!
|
229 |
|
230 | import parser # builtin module
|
231 | t = parser.expr('1+2')
|
232 | print(t)
|
233 | t2 = parser.st2tuple(t)
|
234 | print(t2)
|
235 |
|
236 | else:
|
237 | raise RuntimeError('Invalid action %r' % action)
|
238 |
|
239 |
|
240 | if __name__ == '__main__':
|
241 | try:
|
242 | sys.exit(main(sys.argv))
|
243 | except RuntimeError as e:
|
244 | print('FATAL: %s' % e, file=sys.stderr)
|
245 | sys.exit(1)
|