OILS / ysh / grammar_gen.py View on Github | oils.pub

245 lines, 168 significant
1#!/usr/bin/env python2
2"""
3grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
4"""
5from __future__ import print_function
6
7import os
8import sys
9
10from _devbuild.gen.id_kind_asdl import Id, Kind
11from _devbuild.gen.syntax_asdl import source
12
13from core import alloc
14from core import optview
15from mycpp.mylib import log
16from frontend import lexer
17from frontend import lexer_def
18from frontend import reader
19from pgen2 import parse, pgen, token
20
21
22class OilTokenDef(object):
23
24 def __init__(self, ops, more_ops, keyword_ops):
25 self.ops = ops
26 self.more_ops = more_ops
27 self.keyword_ops = keyword_ops
28
29 def GetTerminalNum(self, label):
30 """e.g. translate Expr_Name in the grammar to 178."""
31 id_ = getattr(Id, label)
32 #log('Id %s = %d', id_, id_)
33 assert id_ < token.NT_OFFSET, id_
34 return id_
35
36 def GetKeywordNum(self, s):
37 """e.g 'xor' -> Id.Expr_Xor.
38
39 Python doesn't have this, but Oil does. Returns None if not
40 found.
41 """
42 id_ = self.keyword_ops.get(s)
43 if id_ is None:
44 return None
45 assert id_ < token.NT_OFFSET, id_
46 return id_
47
48 def GetOpNum(self, op_str):
49 """
50 Args:
51 op_str: '>='
52
53 Returns:
54 Integer for '>=' or Id.Arith_GreatEqual
55 """
56 # Fail if not there
57 id_ = self.ops.get(op_str) or self.more_ops[op_str]
58 assert id_ < token.NT_OFFSET, id_
59 return id_
60
61
62def MakeOilLexer(code_str, arena):
63 arena.PushSource(source.MainFile('pgen2_main'))
64 line_reader = reader.StringLineReader(code_str, arena)
65 line_lexer = lexer.LineLexer(arena)
66 lex = lexer.Lexer(line_lexer, line_reader)
67 return lex
68
69
70def main(argv):
71 action = argv[1]
72 argv = argv[2:]
73
74 # Used at grammar BUILD time.
75 OPS = {
76 '!': Id.Expr_Bang,
77 '.': Id.Expr_Dot,
78 '..=': Id.Expr_DDotEqual,
79 '..<': Id.Expr_DDotLessThan,
80 '->': Id.Expr_RArrow,
81 '=>': Id.Expr_RDArrow,
82 '//': Id.Expr_DSlash,
83 '++': Id.Arith_DPlus,
84 '!~': Id.Expr_NotTilde,
85 '~~': Id.Expr_DTilde,
86 '!~~': Id.Expr_NotDTilde,
87 '~==': Id.Expr_TildeDEqual,
88 '===': Id.Expr_TEqual,
89 '!==': Id.Expr_NotDEqual,
90 '@': Id.Expr_At,
91 '...': Id.Expr_Ellipsis,
92 '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/
93 '**=': Id.Expr_DStarEqual,
94 '//=': Id.Expr_DSlashEqual,
95 }
96
97 # Note: We have two lists of ops because Id.Op_Semi is used, not
98 # Id.Arith_Semi.
99 for _, token_str, id_ in lexer_def.EXPR_OPS:
100 assert token_str not in OPS, token_str
101 OPS[token_str] = id_
102
103 # Tokens that look like / or ${ or @{
104 triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
105 lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
106 lexer_def.EXPR_WORDS)
107 more_ops = {}
108 for _, token_str, id_ in triples:
109 if token_str in more_ops:
110 import pprint
111 raise AssertionError(
112 '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
113 more_ops[token_str] = id_
114
115 # Tokens that look like 'for'
116 keyword_ops = {}
117 for _, token_str, id_ in lexer_def.EXPR_WORDS: # for, in, etc.
118 assert token_str not in keyword_ops, token_str
119 keyword_ops[token_str] = id_
120
121 if 0:
122 from pprint import pprint
123 pprint(OPS)
124 print('---')
125 pprint(more_ops)
126 print('---')
127 pprint(keyword_ops)
128 print('---')
129
130 tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
131
132 if action == 'py': # generate the grammar and parse it
133 grammar_path = argv[0]
134 out_dir = argv[1]
135
136 basename, _ = os.path.splitext(os.path.basename(grammar_path))
137
138 # HACK for find:
139 if basename == 'find':
140 from tools.find import tokenizer as find_tokenizer
141 tok_def = find_tokenizer.TokenDef()
142
143 with open(grammar_path) as f:
144 gr = pgen.MakeGrammar(f, tok_def=tok_def)
145
146 marshal_path = os.path.join(out_dir, basename + '.marshal')
147 with open(marshal_path, 'wb') as out_f:
148 gr.dump(out_f)
149
150 nonterm_py = os.path.join(out_dir, basename + '_nt.py')
151 with open(nonterm_py, 'w') as out_f:
152 gr.dump_nonterminals_py(out_f)
153
154 log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
155 out_dir, basename)
156
157 #gr.report()
158
159 elif action == 'cpp': # generate the grammar and parse it
160 grammar_path = argv[0]
161 out_dir = argv[1]
162
163 basename, _ = os.path.splitext(os.path.basename(grammar_path))
164
165 with open(grammar_path) as f:
166 gr = pgen.MakeGrammar(f, tok_def=tok_def)
167
168 nonterm_h = os.path.join(out_dir, basename + '_nt.h')
169 with open(nonterm_h, 'w') as out_f:
170 gr.dump_nonterminals_cpp(out_f)
171
172 grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
173 with open(grammar_cpp_path, 'w') as src_f:
174 gr.dump_cpp(src_f)
175
176 if 0:
177 log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path,
178 out_dir, basename)
179
180 elif action == 'parse': # generate the grammar and parse it
181 # Remove build dependency
182 from frontend import parse_lib
183 from ysh import expr_parse
184 from ysh import expr_to_ast
185
186 grammar_path = argv[0]
187 start_symbol = argv[1]
188 code_str = argv[2]
189
190 # For choosing lexer and semantic actions
191 grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
192
193 with open(grammar_path) as f:
194 gr = pgen.MakeGrammar(f, tok_def=tok_def)
195
196 arena = alloc.Arena()
197 lex_ = MakeOilLexer(code_str, arena)
198
199 is_expr = grammar_name in ('calc', 'grammar')
200
201 parse_opts = optview.Parse([], [])
202 parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
203 p = expr_parse.ExprParser(parse_ctx, gr, False)
204 try:
205 with expr_parse.ctx_PNodeAllocator(p):
206 pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
207 except parse.ParseError as e:
208 log('Parse Error: %s', e)
209 return 1
210
211 names = expr_to_ast.MakeGrammarNames(gr)
212 p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes
213 p_printer.Print(pnode)
214
215 if is_expr:
216 tr = expr_to_ast.Transformer(gr)
217 if start_symbol == 'eval_input':
218 ast_node = tr.Expr(pnode)
219 elif start_symbol == 'ysh_case_pat':
220 ast_node = tr.YshCasePattern(pnode)
221 else:
222 ast_node = tr.VarDecl(pnode)
223 ast_node.PrettyPrint()
224 print()
225
226 elif action == 'stdlib-test':
227 # This shows how deep Python's parse tree is. It doesn't use semantic
228 # actions to prune on the fly!
229
230 import parser # builtin module
231 t = parser.expr('1+2')
232 print(t)
233 t2 = parser.st2tuple(t)
234 print(t2)
235
236 else:
237 raise RuntimeError('Invalid action %r' % action)
238
239
240if __name__ == '__main__':
241 try:
242 sys.exit(main(sys.argv))
243 except RuntimeError as e:
244 print('FATAL: %s' % e, file=sys.stderr)
245 sys.exit(1)