OILS / mycpp / pass_state.py View on Github | oilshell.org

597 lines, 310 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7import re
8import subprocess
9from collections import defaultdict
10
11from mypy.types import Type
12from mypy.nodes import Expression
13
14from mycpp.util import join_name, log, split_py_name, SymbolPath
15
16from typing import Optional
17
18_ = log
19
20
21class ModuleMember(object):
22 """
23 A member of a Python module.
24
25 e.g. core.state.Mem => core::state::Mem
26 """
27
28 def __init__(self, module_path: SymbolPath, member: str) -> None:
29 self.module_path = module_path
30 self.member = member
31
32
33class StaticObjectMember(object):
34 """
35 A static member of an object. Usually a a method like an alternative constructor.
36
37 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
38 """
39
40 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
41 self.base_type_name = base_type_name
42 self.member = member
43
44
45class HeapObjectMember(object):
46 """
47 A member of a heap-allocated object.
48
49 e.g foo.empty() => foo->empty()
50 """
51
52 def __init__(self, object_expr: Expression, object_type: Type,
53 member: str) -> None:
54 self.object_expr = object_expr
55 self.object_type = object_type
56 self.member = member
57
58
59class StackObjectMember(object):
60 """
61 A member of a stack-allocated object.
62
63 e.g foo.empty() => foo.empty()
64 """
65
66 def __init__(self, object_expr: Expression, object_type: Type,
67 member: str) -> None:
68 self.ojbect_expr = object_expr
69 self.object_type = object_type
70 self.member = member
71
72
73class Virtual(object):
74 """Calculate which C++ methods need the virtual keyword.
75
76 See unit test for example usage.
77 """
78
79 def __init__(self) -> None:
80 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
81 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
82 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
83 str]]] = {}
84 self.has_vtable: dict[SymbolPath, bool] = {}
85 self.can_reorder_fields: dict[SymbolPath, bool] = {}
86
87 # _Executor -> vm::_Executor
88 self.base_class_unique: dict[str, SymbolPath] = {}
89
90 # These are called on the Forward Declare pass
91 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
92 #log('OnMethod %s %s', class_name, method_name)
93
94 # __init__ and so forth don't count
95 if method_name.startswith('__') and method_name.endswith('__'):
96 return
97
98 self.methods[class_name].append(method_name)
99
100 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
101 if len(base_class) > 1:
102 # Hack for
103 #
104 # class _Executor: pass
105 # versus
106 # class MyExecutor(vm._Executor): pass
107 base_key = base_class[-1]
108
109 # Fail if we have two base classes in different namespaces with the same
110 # name.
111 if base_key in self.base_class_unique:
112 # Make sure we don't have collisions
113 assert (self.base_class_unique[base_key] == base_class or
114 base_class
115 in self.subclasses[self.base_class_unique[base_key]]
116 ), base_class
117 else:
118 self.base_class_unique[base_key] = base_class
119
120 else:
121 base_key = base_class
122
123 self.subclasses[base_class].append(subclass)
124
125 def Calculate(self) -> None:
126 """Call this after the forward declare pass."""
127 for base_class, subclasses in self.subclasses.items():
128 self.can_reorder_fields[base_class] = False
129
130 for subclass in subclasses:
131 self.can_reorder_fields[subclass] = False
132
133 b_methods = self.methods[base_class]
134 s_methods = self.methods[subclass]
135 overlapping = set(b_methods) & set(s_methods)
136 for method in overlapping:
137 self.virtuals[(base_class, method)] = None
138 self.virtuals[(subclass, method)] = (base_class, method)
139 if overlapping:
140 self.has_vtable[base_class] = True
141 self.has_vtable[subclass] = True
142
143 # These is called on the Decl pass
144 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
145 return (class_name, method_name) in self.virtuals
146
147 def HasVTable(self, class_name: SymbolPath) -> bool:
148 return class_name in self.has_vtable
149
150 def CanReorderFields(self, class_name: SymbolPath) -> bool:
151 if class_name in self.can_reorder_fields:
152 return self.can_reorder_fields[class_name]
153 else:
154 return True # by default they can be reordered
155
156
157def SymbolPathToReference(func: str, p: SymbolPath) -> str:
158 if len(p) > 1:
159 return '$ObjectMember({}, {})'.format(join_name(p[:-1], delim='.'),
160 p[-1])
161
162 return '$LocalVariable({}, {})'.format(func, p[0])
163
164
165class Fact(object):
166 """
167 An abstract fact. These can be used to build up datalog programs.
168 """
169
170 def __init__(self) -> None:
171 pass
172
173 def name(self) -> str:
174 raise NotImplementedError()
175
176 def Generate(self, func: str, statement: int) -> str:
177 raise NotImplementedError()
178
179
180class FunctionCall(Fact):
181
182 def __init__(self, callee: str) -> None:
183 self.callee = callee
184
185 def name(self) -> str:
186 return 'call'
187
188 def Generate(self, func: str, statement: int) -> str:
189 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
190
191
192class Definition(Fact):
193 """
194 The definition of a variable. This corresponds to an allocation.
195 """
196
197 def __init__(self, ref: SymbolPath, obj: str) -> None:
198 self.ref = ref
199 self.obj = obj
200
201 def name(self) -> str:
202 return 'assign'
203
204 def Generate(self, func: str, statement: int) -> str:
205 return '{}\t{}\t{}\t{}\n'.format(func, statement,
206 SymbolPathToReference(func, self.ref),
207 self.obj)
208
209
210class Assignment(Fact):
211 """
212 The assignment of one variable or object member to another.
213 """
214
215 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
216 self.lhs = lhs
217 self.rhs = rhs
218
219 def name(self) -> str:
220 return 'assign'
221
222 def Generate(self, func: str, statement: int) -> str:
223 return '{}\t{}\t{}\t$Ref({})\n'.format(
224 func, statement, SymbolPathToReference(func, self.lhs),
225 SymbolPathToReference(func, self.rhs))
226
227
228class Use(Fact):
229 """
230 The use of a reference.
231
232 In the last assignment below, we would emit Use(foo) and Use(x). We would,
233 however, not emit Use(foo.a) since it is an lvalue and would instead be
234 covered by the Assign fact. Similarly, the first two assignments do not
235 generate Use facts.
236
237 foo = Foo()
238 x = Bar()
239 foo.a = x
240
241 Any time a reference appears in an expression (or expression-statement) it
242 will be considered used.
243
244 some_function(a) => Use(a)
245 a + b => Use(a), Use(b)
246 print(thing.dict[key]) => Use(thing), Use(thing.dict), Use(key)
247 obj.func() => Use(obj)
248 """
249
250 def __init__(self, ref: SymbolPath) -> None:
251 self.ref = ref
252
253 def name(self) -> str:
254 return 'use'
255
256 def Generate(self, func: str, statement: int) -> str:
257 return '{}\t{}\t{}\n'.format(func, statement,
258 SymbolPathToReference(func, self.ref))
259
260
261class Bind(Fact):
262 """
263 Binding a reference to a positional function parameter.
264 """
265
266 def __init__(self, ref: SymbolPath, callee: SymbolPath,
267 arg_pos: int) -> None:
268 self.ref = ref
269 self.callee = callee
270 self.arg_pos = arg_pos
271
272 def name(self) -> str:
273 return 'bind'
274
275 def Generate(self, func: str, statement: int) -> str:
276 return '{}\t{}\t{}\t{}\t{}\n'.format(
277 func, statement, SymbolPathToReference(func, self.ref),
278 join_name(self.callee, delim='.'), self.arg_pos)
279
280
281class ControlFlowGraph(object):
282 """
283 A simple control-flow graph.
284
285 Every statement in the program is represented as a node in a graph with
286 unique a numeric ID. Control flow is represented as directed edges through
287 the graph. Loops can introduce back-edges. Every node in the graph will
288 satisfy at least one of the following conditions:
289
290 - Its indegree is at least one.
291
292 - Its outdegree is at least one.
293
294 For simple linear graphs all you need is the AddStatement method. For more
295 complex flows there is a set of context managers below to help simplify
296 construction.
297
298 - For branches-like statements (e.g. if- and try- statements) use
299 CfgBranchContext. It will take care of the details associated with
300 stitching the different branches to statements in the next statement.
301
302 - For loops, use CfgLoopContext. It will take care of adding back-edges
303 and connecting break statements to any statements that proceed the
304 loop.
305
306 - CfgBlockContext can be used for simple cases where you just want to
307 track the beginning and end of a sequence of statements.
308
309 Statements can carry annotations called facts, which are used as inputs to
310 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
311 To annotate a statement, use the AddFact method with any object that
312 implements the Fact interface.
313
314 See the unit tests in pass_state_test.py and the mycpp phase in
315 control_flow_pass.py for detailed examples of usage.
316 """
317
318 def __init__(self) -> None:
319 self.statement_counter: int = 0
320 self.edges: set[tuple[int, int]] = set({})
321 self.block_stack: list[int] = []
322 self.predecessors: set[int] = set({})
323 self.deadends: set[int] = set({})
324
325 # order doesn't actually matter here, but sets require elements to be
326 # hashable
327 self.facts: dict[int, list[Fact]] = defaultdict(list)
328
329 def AddEdge(self, pred: int, succ: int) -> None:
330 """
331 Add a directed edge from pred to succ. If pred is a deadend, its
332 non-deadends will be used instead.
333 """
334 if pred in self.deadends:
335 for w in [u for (u, v) in self.edges if v == pred]:
336 self.AddEdge(w, succ)
337 else:
338 self.edges.add((pred, succ))
339
340 def AddDeadend(self, statement: int):
341 """
342 Mark a statement as a dead-end (e.g. return or continue).
343 """
344 self.deadends.add(statement)
345
346 def AddStatement(self) -> int:
347 """
348 Add a new statement and return its ID.
349 """
350 if len(self.predecessors) == 0:
351 if len(self.block_stack):
352 self.predecessors.add(self.block_stack[-1])
353 else:
354 self.predecessors.add(self.statement_counter)
355
356 self.statement_counter += 1
357 for pred in self.predecessors:
358 self.AddEdge(pred, self.statement_counter)
359
360 self.predecessors = set({})
361
362 if len(self.block_stack):
363 self.block_stack[-1] = self.statement_counter
364
365 return self.statement_counter
366
367 def AddFact(self, statement: int, fact: Fact) -> None:
368 """
369 Annotate a statement with a fact.
370 """
371 self.facts[statement].append(fact)
372
373 def _PushBlock(self, begin: Optional[int] = None) -> int:
374 """
375 Start a block at the given statement ID. If a beginning statement isn't
376 provided one will be created and its ID will be returend.
377
378 Direct use of this function is discouraged. Consider using one of the
379 block context managers below instead.
380 """
381 if begin is None:
382 begin = self.AddStatement()
383 else:
384 self.predecessors.add(begin)
385
386 self.block_stack.append(begin)
387 return begin
388
389 def _PopBlock(self) -> int:
390 """
391 Pop a block from the top of the stack and return the ID of the block's
392 last statement.
393
394 Direct use of this function is discouraged. Consider using one of the
395 block context managers below instead.
396 """
397 assert len(self.block_stack)
398 last = self.block_stack.pop()
399 if len(self.block_stack) and last not in self.deadends:
400 self.block_stack[-1] = last
401
402 return last
403
404
405class CfgBlockContext(object):
406 """
407 Context manager to make dealing with things like with-statements easier.
408 """
409
410 def __init__(self,
411 cfg: ControlFlowGraph,
412 begin: Optional[int] = None) -> None:
413 self.cfg = cfg
414 if cfg is None:
415 return
416
417 self.entry = self.cfg._PushBlock(begin)
418 self.exit = self.entry
419
420 def __enter__(self) -> None:
421 return self if self.cfg else None
422
423 def __exit__(self, *args) -> None:
424 if not self.cfg:
425 return
426
427 self.exit = self.cfg._PopBlock()
428
429
430class CfgBranchContext(object):
431 """
432 Context manager to make dealing with if-else blocks easier.
433 """
434
435 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
436 self.cfg = cfg
437 self.entry = branch_point
438 self.exit = self.entry
439 if cfg is None:
440 return
441
442 self.arms = []
443 self.pushed = False
444
445 def AddBranch(self, entry: Optional[int] = None):
446 if not self.cfg:
447 return CfgBranchContext(None, None)
448
449 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
450 self.cfg._PushBlock(self.arms[-1].entry)
451 self.arms[-1].pushed = True
452 return self.arms[-1]
453
454 def __enter__(self) -> None:
455 return self
456
457 def __exit__(self, *args) -> None:
458 if not self.cfg:
459 return
460
461 if self.pushed:
462 self.exit = self.cfg._PopBlock()
463
464 for arm in self.arms:
465 if arm.exit not in self.cfg.deadends:
466 self.cfg.predecessors.add(arm.exit)
467
468
469class CfgLoopContext(object):
470 """
471 Context manager to make dealing with loops easier.
472 """
473
474 def __init__(self,
475 cfg: ControlFlowGraph,
476 entry: Optional[int] = None) -> None:
477 self.cfg = cfg
478 self.breaks = set({})
479 if cfg is None:
480 return
481
482 self.entry = self.cfg._PushBlock(entry)
483 self.exit = self.entry
484
485 def AddBreak(self, statement: int) -> None:
486 assert self.cfg
487 self.breaks.add(statement)
488 self.cfg.AddDeadend(statement)
489
490 def AddContinue(self, statement: int) -> None:
491 self.cfg.AddEdge(statement, self.entry)
492 self.cfg.AddDeadend(statement)
493
494 def __enter__(self) -> None:
495 return self if self.cfg else None
496
497 def __exit__(self, *args) -> None:
498 if not self.cfg:
499 return
500
501 self.exit = self.cfg._PopBlock()
502 self.cfg.AddEdge(self.exit, self.entry)
503 for pred in self.cfg.predecessors:
504 self.cfg.AddEdge(pred, self.entry)
505
506 # If we had any breaks, arm the predecessor set with the current
507 # statement and the break statements.
508 if len(self.breaks):
509 if len(self.cfg.block_stack):
510 self.cfg.predecessors.add(self.cfg.block_stack[-1])
511 else:
512 self.cfg.predecessors.add(self.cfg.statement_counter)
513
514 for b in self.breaks:
515 self.cfg.deadends.remove(b)
516 self.cfg.predecessors.add(b)
517
518
519class StackRoots(object):
520 """
521 Output of the souffle stack roots solver.
522 """
523
524 def __init__(self, tuples: set[tuple[SymbolPath, SymbolPath]]) -> None:
525 self.root_tuples = tuples
526
527 def needs_root(self, func: SymbolPath, reference: SymbolPath) -> bool:
528 """
529 Returns true if the given reference should have a stack root.
530 """
531 return (func, reference) in self.root_tuples
532
533
534def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
535 facts_dir='_tmp/mycpp-facts') -> None:
536 """
537 Dump the given control flow graphs and associated facts into the given
538 directory as text files that can be consumed by datalog.
539 """
540 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
541 fact_files = {}
542 os.makedirs(facts_dir, exist_ok=True)
543 with open(edge_facts, 'w') as cfg_f:
544 for func, cfg in sorted(cfgs.items()):
545 joined = join_name(func, delim='.')
546 for (u, v) in sorted(cfg.edges):
547 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
548
549 for statement, facts in sorted(cfg.facts.items()):
550 for fact in facts: # already sorted temporally
551 fact_f = fact_files.get(fact.name())
552 if not fact_f:
553 fact_f = open(
554 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
555 fact_files[fact.name()] = fact_f
556
557 fact_f.write(fact.Generate(joined, statement))
558
559 for f in fact_files.values():
560 f.close()
561
562
563def ComputeMinimalStackRoots(cfgs: dict[str, ControlFlowGraph],
564 facts_dir: str = '_tmp/mycpp-facts',
565 souffle_output_dir: str = '_tmp') -> StackRoots:
566 """
567 Run the the souffle stack roots solver and translate its output in a format
568 that can be queried by cppgen_pass.
569 """
570 DumpControlFlowGraphs(cfgs, facts_dir=facts_dir)
571 subprocess.check_call([
572 '_bin/datalog/dataflow',
573 '-F',
574 facts_dir,
575 '-D',
576 souffle_output_dir,
577 ])
578
579 tuples: set[tuple[SymbolPath, SymbolPath]] = set({})
580 with open('{}/stack_root_vars.tsv'.format(souffle_output_dir),
581 'r') as roots_f:
582 pat = re.compile(r'\$(.*)\((.*), (.*)\)')
583 for line in roots_f:
584 function, ref = line.split('\t')
585 m = pat.match(ref)
586 assert m.group(1) in ('LocalVariable', 'ObjectMember')
587 if m.group(1) == 'LocalVariable':
588 _, ref_func, var_name = m.groups()
589 assert ref_func == function
590 tuples.add((split_py_name(function), (var_name, )))
591
592 if m.group(1) == 'ObjectMember':
593 _, base_obj, member_name = m.groups()
594 tuples.add((split_py_name(function),
595 split_py_name(base_obj) + (member_name, )))
596
597 return StackRoots(tuples)