OILS / mycpp / mycpp_main.py View on Github | oilshell.org

423 lines, 256 significant
1#!/usr/bin/env python3
2"""
3mycpp_main.py - Translate a subset of Python to C++, using MyPy's typed AST.
4"""
5from __future__ import print_function
6
7import optparse
8import os
9import sys
10import tempfile
11
12from typing import List, Optional, Tuple
13
14from mypy.build import build as mypy_build
15from mypy.build import BuildSource
16from mypy.main import process_options
17
18from mycpp import ir_pass
19from mycpp import const_pass
20from mycpp import cppgen_pass
21from mycpp import debug_pass
22from mycpp import control_flow_pass
23from mycpp import pass_state
24from mycpp.util import log
25
26
27def Options():
28 """Returns an option parser instance."""
29
30 p = optparse.OptionParser()
31 p.add_option('-v',
32 '--verbose',
33 dest='verbose',
34 action='store_true',
35 default=False,
36 help='Show details about translation')
37
38 p.add_option('--cc-out',
39 dest='cc_out',
40 default=None,
41 help='.cc file to write to')
42
43 p.add_option('--to-header',
44 dest='to_header',
45 action='append',
46 default=[],
47 help='Export this module to a header, e.g. frontend.args')
48
49 p.add_option('--header-out',
50 dest='header_out',
51 default=None,
52 help='Write this header')
53
54 p.add_option(
55 '--stack-roots-warn',
56 dest='stack_roots_warn',
57 default=None,
58 type='int',
59 help='Emit warnings about functions with too many stack roots')
60
61 p.add_option(
62 '--minimize-stack-roots',
63 dest='minimize_stack_roots',
64 action='store_true',
65 default=False,
66 help='Try to minimize the number of GC stack roots.')
67
68 return p
69
70
71# Copied from mypyc/build.py
72def get_mypy_config(
73 paths: List[str], mypy_options: Optional[List[str]]
74) -> Tuple[List[BuildSource], Options]:
75 """Construct mypy BuildSources and Options from file and options lists"""
76 # It is kind of silly to do this but oh well
77 mypy_options = mypy_options or []
78 mypy_options.append('--')
79 mypy_options.extend(paths)
80
81 sources, options = process_options(mypy_options)
82
83 options.show_traceback = True
84 # Needed to get types for all AST nodes
85 options.export_types = True
86 # TODO: Support incremental checking
87 options.incremental = False
88 # 10/2019: FIX for MyPy 0.730. Not sure why I need this but I do.
89 options.preserve_asts = True
90
91 # 1/2023: Workaround for conditional import in osh/builtin_comp.py
92 # Same as devtools/types.sh
93 options.warn_unused_ignores = False
94
95 for source in sources:
96 options.per_module_options.setdefault(source.module,
97 {})['mypyc'] = True
98
99 return sources, options
100
101
102_FIRST = ('asdl.runtime', 'core.vm')
103
104# should be LAST because they use base classes
105_LAST = ('builtin.bracket_osh', 'builtin.completion_osh', 'core.shell')
106
107
108def ModulesToCompile(result, mod_names):
109 # HACK TO PUT asdl/runtime FIRST.
110 #
111 # Another fix is to hoist those to the declaration phase? Not sure if that
112 # makes sense.
113
114 # FIRST files. Somehow the MyPy builder reorders the modules.
115 for name, module in result.files.items():
116 if name in _FIRST:
117 yield name, module
118
119 for name, module in result.files.items():
120 # Only translate files that were mentioned on the command line
121 suffix = name.split('.')[-1]
122 if suffix not in mod_names:
123 continue
124
125 if name in _FIRST: # We already did these
126 continue
127
128 if name in _LAST: # We'll do these later
129 continue
130
131 yield name, module
132
133 # LAST files
134 for name, module in result.files.items():
135 if name in _LAST:
136 yield name, module
137
138
139def main(argv):
140 # TODO: Put these in the shell script
141 mypy_options = [
142 '--py2',
143 '--strict',
144 '--no-implicit-optional',
145 '--no-strict-optional',
146 # for consistency?
147 '--follow-imports=silent',
148 #'--verbose',
149 ]
150
151 o = Options()
152 opts, argv = o.parse_args(argv)
153
154 paths = argv[1:] # e.g. asdl/typed_arith_parse.py
155
156 log('\tmycpp: LOADING %s', ' '.join(paths))
157 #log('\tmycpp: MYPYPATH = %r', os.getenv('MYPYPATH'))
158
159 if 0:
160 print(opts)
161 print(paths)
162 return
163
164 # e.g. asdl/typed_arith_parse.py -> 'typed_arith_parse'
165 mod_names = [os.path.basename(p) for p in paths]
166 mod_names = [os.path.splitext(name)[0] for name in mod_names]
167
168 # Ditto
169 to_header = opts.to_header
170 #if to_header:
171 if 0:
172 to_header = [os.path.basename(p) for p in to_header]
173 to_header = [os.path.splitext(name)[0] for name in to_header]
174
175 #log('to_header %s', to_header)
176
177 sources, options = get_mypy_config(paths, mypy_options)
178 if 0:
179 for source in sources:
180 log('source %s', source)
181 log('')
182 #log('options %s', options)
183
184 #result = emitmodule.parse_and_typecheck(sources, options)
185 import time
186 start_time = time.time()
187 result = mypy_build(sources=sources, options=options)
188 #log('elapsed 1: %f', time.time() - start_time)
189
190 if result.errors:
191 log('')
192 log('-' * 80)
193 for e in result.errors:
194 log(e)
195 log('-' * 80)
196 log('')
197 return 1
198
199 # Important functions in mypyc/build.py:
200 #
201 # generate_c (251 lines)
202 # parse_and_typecheck
203 # compile_modules_to_c
204
205 # mypyc/emitmodule.py (487 lines)
206 # def compile_modules_to_c(result: BuildResult, module_names: List[str],
207 # class ModuleGenerator:
208 # # This generates a whole bunch of textual code!
209
210 # literals, modules, errors = genops.build_ir(file_nodes, result.graph,
211 # result.types)
212
213 # TODO: Debug what comes out of here.
214 #build.dump_graph(result.graph)
215 #return
216
217 # no-op
218 if 0:
219 for name in result.graph:
220 log('result %s %s', name, result.graph[name])
221 log('')
222
223 # GLOBAL Constant pass over all modules. We want to collect duplicate
224 # strings together. And have globally unique IDs str0, str1, ... strN.
225 const_lookup = {} # Dict {StrExpr node => string name}
226 const_code = []
227 pass1 = const_pass.Collect(result.types, const_lookup, const_code)
228
229 to_compile = list(ModulesToCompile(result, mod_names))
230
231 # HACK: Why do I get oil.asdl.tdop in addition to asdl.tdop?
232 #names = set(name for name, _ in to_compile)
233
234 filtered = []
235 seen = set()
236 for name, module in to_compile:
237 if name.startswith('oil.'):
238 name = name[4:]
239
240 # ditto with testpkg.module1
241 if name.startswith('mycpp.'):
242 name = name[6:]
243
244 if name not in seen: # remove dupe
245 filtered.append((name, module))
246 seen.add(name)
247
248 to_compile = filtered
249
250 #import pickle
251 if 0:
252 for name, module in to_compile:
253 log('to_compile %s', name)
254 log('')
255
256 # can't pickle but now I see deserialize() nodes and stuff
257 #s = pickle.dumps(module)
258 #log('%d pickle', len(s))
259
260 # Print the tree for debugging
261 if 0:
262 for name, module in to_compile:
263 builder = debug_pass.Print(result.types)
264 builder.visit_mypy_file(module)
265 return
266
267 if opts.cc_out:
268 f = open(opts.cc_out, 'w')
269 else:
270 f = sys.stdout
271
272 f.write("""\
273// BEGIN mycpp output
274
275#include "mycpp/runtime.h"
276
277""")
278
279 # Convert the mypy AST into our own IR.
280 dot_exprs = {} # module name -> {expr node -> access type}
281 log('\tmycpp pass: IR')
282 for _, module in to_compile:
283 p = ir_pass.Build(result.types)
284 p.visit_mypy_file(module)
285 dot_exprs[module.path] = p.dot_exprs
286
287 # Collect constants and then emit code.
288 log('\tmycpp pass: CONST')
289 for name, module in to_compile:
290 pass1.visit_mypy_file(module)
291
292 # Instead of top-level code, should we generate a function and call it from
293 # main?
294 for line in const_code:
295 f.write('%s\n' % line)
296 f.write('\n')
297
298 # Note: doesn't take into account module names!
299 virtual = pass_state.Virtual()
300
301 if opts.header_out:
302 header_f = open(opts.header_out, 'w') # Not closed
303
304 log('\tmycpp pass: FORWARD DECL')
305
306 # Forward declarations first.
307 # class Foo; class Bar;
308 for name, module in to_compile:
309 #log('forward decl name %s', name)
310 if name in to_header:
311 out_f = header_f
312 else:
313 out_f = f
314 p2 = cppgen_pass.Generate(result.types,
315 const_lookup,
316 out_f,
317 virtual=virtual,
318 forward_decl=True,
319 dot_exprs=dot_exprs[module.path])
320
321 p2.visit_mypy_file(module)
322 MaybeExitWithErrors(p2)
323
324 # After seeing class and method names in the first pass, figure out which
325 # ones are virtual. We use this info in the second pass.
326 virtual.Calculate()
327 if 0:
328 log('virtuals %s', virtual.virtuals)
329 log('has_vtable %s', virtual.has_vtable)
330
331 local_vars = {} # FuncDef node -> (name, c_type) list
332 ctx_member_vars = {
333 } # Dict[ClassDef node for ctx_Foo, Dict[member_name: str, Type]]
334
335 log('\tmycpp pass: PROTOTYPES')
336
337 # First generate ALL C++ declarations / "headers".
338 # class Foo { void method(); }; class Bar { void method(); };
339 for name, module in to_compile:
340 #log('decl name %s', name)
341 if name in to_header:
342 out_f = header_f
343 else:
344 out_f = f
345 p3 = cppgen_pass.Generate(result.types,
346 const_lookup,
347 out_f,
348 local_vars=local_vars,
349 ctx_member_vars=ctx_member_vars,
350 virtual=virtual,
351 decl=True,
352 dot_exprs=dot_exprs[module.path])
353
354 p3.visit_mypy_file(module)
355 MaybeExitWithErrors(p3)
356
357 if 0:
358 log('\tctx_member_vars')
359 from pprint import pformat
360 print(pformat(ctx_member_vars), file=sys.stderr)
361
362 log('\tmycpp pass: CONTROL FLOW')
363
364 cfgs = {} # fully qualified function name -> control flow graph
365 for name, module in to_compile:
366 cfg_pass = control_flow_pass.Build(result.types, virtual, local_vars,
367 dot_exprs[module.path])
368 cfg_pass.visit_mypy_file(module)
369 cfgs.update(cfg_pass.cfgs)
370
371 log('\tmycpp pass: DATAFLOW')
372 stack_roots = None
373 if opts.minimize_stack_roots:
374 # souffle_dir contains two subdirectories.
375 # facts: TSV files for the souffle inputs generated by mycpp
376 # outputs: TSV files for the solver's output relations
377 souffle_dir = os.getenv('MYCPP_SOUFFLE_DIR', None)
378 if souffle_dir is None:
379 tmp_dir = tempfile.TemporaryDirectory()
380 souffle_dir = tmp_dir.name
381 stack_roots = pass_state.ComputeMinimalStackRoots(cfgs,
382 souffle_dir=souffle_dir)
383 else:
384 pass_state.DumpControlFlowGraphs(cfgs)
385
386 log('\tmycpp pass: IMPL')
387
388 # Now the definitions / implementations.
389 # void Foo:method() { ... }
390 # void Bar:method() { ... }
391 for name, module in to_compile:
392 p4 = cppgen_pass.Generate(result.types,
393 const_lookup,
394 f,
395 local_vars=local_vars,
396 ctx_member_vars=ctx_member_vars,
397 stack_roots_warn=opts.stack_roots_warn,
398 dot_exprs=dot_exprs[module.path],
399 stack_roots=stack_roots)
400 p4.visit_mypy_file(module)
401 MaybeExitWithErrors(p4)
402
403 return 0 # success
404
405
406def MaybeExitWithErrors(p):
407 # Check for errors we collected
408 num_errors = len(p.errors_keep_going)
409 if num_errors != 0:
410 log('')
411 log('%s: %d translation errors (after type checking)', sys.argv[0],
412 num_errors)
413
414 # A little hack to tell the test-invalid-examples harness how many errors we had
415 sys.exit(min(num_errors, 255))
416
417
418if __name__ == '__main__':
419 try:
420 sys.exit(main(sys.argv))
421 except RuntimeError as e:
422 print('FATAL: %s' % e, file=sys.stderr)
423 sys.exit(1)