OILS / mycpp / mycpp_main.py View on Github | oilshell.org

422 lines, 255 significant
1#!/usr/bin/env python3
2"""
3mycpp_main.py - Translate a subset of Python to C++, using MyPy's typed AST.
4"""
5from __future__ import print_function
6
7import optparse
8import os
9import sys
10import tempfile
11
12from typing import List, Optional, Tuple
13
14from mypy.build import build as mypy_build
15from mypy.build import BuildSource
16from mypy.main import process_options
17
18from mycpp import ir_pass
19from mycpp import const_pass
20from mycpp import cppgen_pass
21from mycpp import debug_pass
22from mycpp import control_flow_pass
23from mycpp import pass_state
24from mycpp.util import log
25
26
27def Options():
28 """Returns an option parser instance."""
29
30 p = optparse.OptionParser()
31 p.add_option('-v',
32 '--verbose',
33 dest='verbose',
34 action='store_true',
35 default=False,
36 help='Show details about translation')
37
38 p.add_option('--cc-out',
39 dest='cc_out',
40 default=None,
41 help='.cc file to write to')
42
43 p.add_option('--to-header',
44 dest='to_header',
45 action='append',
46 default=[],
47 help='Export this module to a header, e.g. frontend.args')
48
49 p.add_option('--header-out',
50 dest='header_out',
51 default=None,
52 help='Write this header')
53
54 p.add_option(
55 '--stack-roots-warn',
56 dest='stack_roots_warn',
57 default=None,
58 type='int',
59 help='Emit warnings about functions with too many stack roots')
60
61 p.add_option('--minimize-stack-roots',
62 dest='minimize_stack_roots',
63 action='store_true',
64 default=False,
65 help='Try to minimize the number of GC stack roots.')
66
67 return p
68
69
70# Copied from mypyc/build.py
71def get_mypy_config(
72 paths: List[str], mypy_options: Optional[List[str]]
73) -> Tuple[List[BuildSource], Options]:
74 """Construct mypy BuildSources and Options from file and options lists"""
75 # It is kind of silly to do this but oh well
76 mypy_options = mypy_options or []
77 mypy_options.append('--')
78 mypy_options.extend(paths)
79
80 sources, options = process_options(mypy_options)
81
82 options.show_traceback = True
83 # Needed to get types for all AST nodes
84 options.export_types = True
85 # TODO: Support incremental checking
86 options.incremental = False
87 # 10/2019: FIX for MyPy 0.730. Not sure why I need this but I do.
88 options.preserve_asts = True
89
90 # 1/2023: Workaround for conditional import in osh/builtin_comp.py
91 # Same as devtools/types.sh
92 options.warn_unused_ignores = False
93
94 for source in sources:
95 options.per_module_options.setdefault(source.module,
96 {})['mypyc'] = True
97
98 return sources, options
99
100
101_FIRST = ('asdl.runtime', 'core.vm')
102
103# should be LAST because they use base classes
104_LAST = ('builtin.bracket_osh', 'builtin.completion_osh', 'core.shell')
105
106
107def ModulesToCompile(result, mod_names):
108 # HACK TO PUT asdl/runtime FIRST.
109 #
110 # Another fix is to hoist those to the declaration phase? Not sure if that
111 # makes sense.
112
113 # FIRST files. Somehow the MyPy builder reorders the modules.
114 for name, module in result.files.items():
115 if name in _FIRST:
116 yield name, module
117
118 for name, module in result.files.items():
119 # Only translate files that were mentioned on the command line
120 suffix = name.split('.')[-1]
121 if suffix not in mod_names:
122 continue
123
124 if name in _FIRST: # We already did these
125 continue
126
127 if name in _LAST: # We'll do these later
128 continue
129
130 yield name, module
131
132 # LAST files
133 for name, module in result.files.items():
134 if name in _LAST:
135 yield name, module
136
137
138def main(argv):
139 # TODO: Put these in the shell script
140 mypy_options = [
141 '--py2',
142 '--strict',
143 '--no-implicit-optional',
144 '--no-strict-optional',
145 # for consistency?
146 '--follow-imports=silent',
147 #'--verbose',
148 ]
149
150 o = Options()
151 opts, argv = o.parse_args(argv)
152
153 paths = argv[1:] # e.g. asdl/typed_arith_parse.py
154
155 log('\tmycpp: LOADING %s', ' '.join(paths))
156 #log('\tmycpp: MYPYPATH = %r', os.getenv('MYPYPATH'))
157
158 if 0:
159 print(opts)
160 print(paths)
161 return
162
163 # e.g. asdl/typed_arith_parse.py -> 'typed_arith_parse'
164 mod_names = [os.path.basename(p) for p in paths]
165 mod_names = [os.path.splitext(name)[0] for name in mod_names]
166
167 # Ditto
168 to_header = opts.to_header
169 #if to_header:
170 if 0:
171 to_header = [os.path.basename(p) for p in to_header]
172 to_header = [os.path.splitext(name)[0] for name in to_header]
173
174 #log('to_header %s', to_header)
175
176 sources, options = get_mypy_config(paths, mypy_options)
177 if 0:
178 for source in sources:
179 log('source %s', source)
180 log('')
181 #log('options %s', options)
182
183 #result = emitmodule.parse_and_typecheck(sources, options)
184 import time
185 start_time = time.time()
186 result = mypy_build(sources=sources, options=options)
187 #log('elapsed 1: %f', time.time() - start_time)
188
189 if result.errors:
190 log('')
191 log('-' * 80)
192 for e in result.errors:
193 log(e)
194 log('-' * 80)
195 log('')
196 return 1
197
198 # Important functions in mypyc/build.py:
199 #
200 # generate_c (251 lines)
201 # parse_and_typecheck
202 # compile_modules_to_c
203
204 # mypyc/emitmodule.py (487 lines)
205 # def compile_modules_to_c(result: BuildResult, module_names: List[str],
206 # class ModuleGenerator:
207 # # This generates a whole bunch of textual code!
208
209 # literals, modules, errors = genops.build_ir(file_nodes, result.graph,
210 # result.types)
211
212 # TODO: Debug what comes out of here.
213 #build.dump_graph(result.graph)
214 #return
215
216 # no-op
217 if 0:
218 for name in result.graph:
219 log('result %s %s', name, result.graph[name])
220 log('')
221
222 # GLOBAL Constant pass over all modules. We want to collect duplicate
223 # strings together. And have globally unique IDs str0, str1, ... strN.
224 const_lookup = {} # Dict {StrExpr node => string name}
225 const_code = []
226 pass1 = const_pass.Collect(result.types, const_lookup, const_code)
227
228 to_compile = list(ModulesToCompile(result, mod_names))
229
230 # HACK: Why do I get oil.asdl.tdop in addition to asdl.tdop?
231 #names = set(name for name, _ in to_compile)
232
233 filtered = []
234 seen = set()
235 for name, module in to_compile:
236 if name.startswith('oil.'):
237 name = name[4:]
238
239 # ditto with testpkg.module1
240 if name.startswith('mycpp.'):
241 name = name[6:]
242
243 if name not in seen: # remove dupe
244 filtered.append((name, module))
245 seen.add(name)
246
247 to_compile = filtered
248
249 #import pickle
250 if 0:
251 for name, module in to_compile:
252 log('to_compile %s', name)
253 log('')
254
255 # can't pickle but now I see deserialize() nodes and stuff
256 #s = pickle.dumps(module)
257 #log('%d pickle', len(s))
258
259 # Print the tree for debugging
260 if 0:
261 for name, module in to_compile:
262 builder = debug_pass.Print(result.types)
263 builder.visit_mypy_file(module)
264 return
265
266 if opts.cc_out:
267 f = open(opts.cc_out, 'w')
268 else:
269 f = sys.stdout
270
271 f.write("""\
272// BEGIN mycpp output
273
274#include "mycpp/runtime.h"
275
276""")
277
278 # Convert the mypy AST into our own IR.
279 dot_exprs = {} # module name -> {expr node -> access type}
280 log('\tmycpp pass: IR')
281 for _, module in to_compile:
282 p = ir_pass.Build(result.types)
283 p.visit_mypy_file(module)
284 dot_exprs[module.path] = p.dot_exprs
285
286 # Collect constants and then emit code.
287 log('\tmycpp pass: CONST')
288 for name, module in to_compile:
289 pass1.visit_mypy_file(module)
290
291 # Instead of top-level code, should we generate a function and call it from
292 # main?
293 for line in const_code:
294 f.write('%s\n' % line)
295 f.write('\n')
296
297 # Note: doesn't take into account module names!
298 virtual = pass_state.Virtual()
299
300 if opts.header_out:
301 header_f = open(opts.header_out, 'w') # Not closed
302
303 log('\tmycpp pass: FORWARD DECL')
304
305 # Forward declarations first.
306 # class Foo; class Bar;
307 for name, module in to_compile:
308 #log('forward decl name %s', name)
309 if name in to_header:
310 out_f = header_f
311 else:
312 out_f = f
313 p2 = cppgen_pass.Generate(result.types,
314 const_lookup,
315 out_f,
316 virtual=virtual,
317 forward_decl=True,
318 dot_exprs=dot_exprs[module.path])
319
320 p2.visit_mypy_file(module)
321 MaybeExitWithErrors(p2)
322
323 # After seeing class and method names in the first pass, figure out which
324 # ones are virtual. We use this info in the second pass.
325 virtual.Calculate()
326 if 0:
327 log('virtuals %s', virtual.virtuals)
328 log('has_vtable %s', virtual.has_vtable)
329
330 local_vars = {} # FuncDef node -> (name, c_type) list
331 ctx_member_vars = {
332 } # Dict[ClassDef node for ctx_Foo, Dict[member_name: str, Type]]
333
334 log('\tmycpp pass: PROTOTYPES')
335
336 # First generate ALL C++ declarations / "headers".
337 # class Foo { void method(); }; class Bar { void method(); };
338 for name, module in to_compile:
339 #log('decl name %s', name)
340 if name in to_header:
341 out_f = header_f
342 else:
343 out_f = f
344 p3 = cppgen_pass.Generate(result.types,
345 const_lookup,
346 out_f,
347 local_vars=local_vars,
348 ctx_member_vars=ctx_member_vars,
349 virtual=virtual,
350 decl=True,
351 dot_exprs=dot_exprs[module.path])
352
353 p3.visit_mypy_file(module)
354 MaybeExitWithErrors(p3)
355
356 if 0:
357 log('\tctx_member_vars')
358 from pprint import pformat
359 print(pformat(ctx_member_vars), file=sys.stderr)
360
361 log('\tmycpp pass: CONTROL FLOW')
362
363 cfgs = {} # fully qualified function name -> control flow graph
364 for name, module in to_compile:
365 cfg_pass = control_flow_pass.Build(result.types, virtual, local_vars,
366 dot_exprs[module.path])
367 cfg_pass.visit_mypy_file(module)
368 cfgs.update(cfg_pass.cfgs)
369
370 log('\tmycpp pass: DATAFLOW')
371 stack_roots = None
372 if opts.minimize_stack_roots:
373 # souffle_dir contains two subdirectories.
374 # facts: TSV files for the souffle inputs generated by mycpp
375 # outputs: TSV files for the solver's output relations
376 souffle_dir = os.getenv('MYCPP_SOUFFLE_DIR', None)
377 if souffle_dir is None:
378 tmp_dir = tempfile.TemporaryDirectory()
379 souffle_dir = tmp_dir.name
380 stack_roots = pass_state.ComputeMinimalStackRoots(
381 cfgs, souffle_dir=souffle_dir)
382 else:
383 pass_state.DumpControlFlowGraphs(cfgs)
384
385 log('\tmycpp pass: IMPL')
386
387 # Now the definitions / implementations.
388 # void Foo:method() { ... }
389 # void Bar:method() { ... }
390 for name, module in to_compile:
391 p4 = cppgen_pass.Generate(result.types,
392 const_lookup,
393 f,
394 local_vars=local_vars,
395 ctx_member_vars=ctx_member_vars,
396 stack_roots_warn=opts.stack_roots_warn,
397 dot_exprs=dot_exprs[module.path],
398 stack_roots=stack_roots)
399 p4.visit_mypy_file(module)
400 MaybeExitWithErrors(p4)
401
402 return 0 # success
403
404
405def MaybeExitWithErrors(p):
406 # Check for errors we collected
407 num_errors = len(p.errors_keep_going)
408 if num_errors != 0:
409 log('')
410 log('%s: %d translation errors (after type checking)', sys.argv[0],
411 num_errors)
412
413 # A little hack to tell the test-invalid-examples harness how many errors we had
414 sys.exit(min(num_errors, 255))
415
416
417if __name__ == '__main__':
418 try:
419 sys.exit(main(sys.argv))
420 except RuntimeError as e:
421 print('FATAL: %s' % e, file=sys.stderr)
422 sys.exit(1)