OILS / mycpp / pass_state.py View on Github | oilshell.org

610 lines, 319 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7import re
8import subprocess
9from collections import defaultdict
10
11from mypy.types import Type
12from mypy.nodes import Expression
13
14from mycpp.util import join_name, log, split_py_name, SymbolPath
15
16from typing import Optional
17
18_ = log
19
20
21class ModuleMember(object):
22 """
23 A member of a Python module.
24
25 e.g. core.state.Mem => core::state::Mem
26 """
27
28 def __init__(self, module_path: SymbolPath, member: str) -> None:
29 self.module_path = module_path
30 self.member = member
31
32
33class StaticObjectMember(object):
34 """
35 A static member of an object. Usually a a method like an alternative constructor.
36
37 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
38 """
39
40 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
41 self.base_type_name = base_type_name
42 self.member = member
43
44
45class HeapObjectMember(object):
46 """
47 A member of a heap-allocated object.
48
49 e.g foo.empty() => foo->empty()
50 """
51
52 def __init__(self, object_expr: Expression, object_type: Type,
53 member: str) -> None:
54 self.object_expr = object_expr
55 self.object_type = object_type
56 self.member = member
57
58
59class StackObjectMember(object):
60 """
61 A member of a stack-allocated object.
62
63 e.g foo.empty() => foo.empty()
64 """
65
66 def __init__(self, object_expr: Expression, object_type: Type,
67 member: str) -> None:
68 self.ojbect_expr = object_expr
69 self.object_type = object_type
70 self.member = member
71
72
73class Virtual(object):
74 """Calculate which C++ methods need the virtual keyword.
75
76 See unit test for example usage.
77 """
78
79 def __init__(self) -> None:
80 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
81 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
82 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
83 str]]] = {}
84 self.has_vtable: dict[SymbolPath, bool] = {}
85 self.can_reorder_fields: dict[SymbolPath, bool] = {}
86
87 # _Executor -> vm::_Executor
88 self.base_class_unique: dict[str, SymbolPath] = {}
89
90 # These are called on the Forward Declare pass
91 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
92 #log('OnMethod %s %s', class_name, method_name)
93
94 # __init__ and so forth don't count
95 if method_name.startswith('__') and method_name.endswith('__'):
96 return
97
98 self.methods[class_name].append(method_name)
99
100 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
101 if len(base_class) > 1:
102 # Hack for
103 #
104 # class _Executor: pass
105 # versus
106 # class MyExecutor(vm._Executor): pass
107 base_key = base_class[-1]
108
109 # Fail if we have two base classes in different namespaces with the same
110 # name.
111 if base_key in self.base_class_unique:
112 # Make sure we don't have collisions
113 assert (self.base_class_unique[base_key] == base_class or
114 base_class
115 in self.subclasses[self.base_class_unique[base_key]]
116 ), base_class
117 else:
118 self.base_class_unique[base_key] = base_class
119
120 else:
121 base_key = base_class
122
123 self.subclasses[base_class].append(subclass)
124
125 def Calculate(self) -> None:
126 """Call this after the forward declare pass."""
127 for base_class, subclasses in self.subclasses.items():
128 self.can_reorder_fields[base_class] = False
129
130 for subclass in subclasses:
131 self.can_reorder_fields[subclass] = False
132
133 b_methods = self.methods[base_class]
134 s_methods = self.methods[subclass]
135 overlapping = set(b_methods) & set(s_methods)
136 for method in overlapping:
137 self.virtuals[(base_class, method)] = None
138 self.virtuals[(subclass, method)] = (base_class, method)
139 if overlapping:
140 self.has_vtable[base_class] = True
141 self.has_vtable[subclass] = True
142
143 # These is called on the Decl pass
144 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
145 return (class_name, method_name) in self.virtuals
146
147 def HasVTable(self, class_name: SymbolPath) -> bool:
148 return class_name in self.has_vtable
149
150 def CanReorderFields(self, class_name: SymbolPath) -> bool:
151 if class_name in self.can_reorder_fields:
152 return self.can_reorder_fields[class_name]
153 else:
154 return True # by default they can be reordered
155
156
157def SymbolPathToReference(func: str, p: SymbolPath) -> str:
158 if len(p) > 1:
159 return '$ObjectMember({}, {})'.format(join_name(p[:-1], delim='.'),
160 p[-1])
161
162 return '$LocalVariable({}, {})'.format(func, p[0])
163
164
165class Fact(object):
166 """
167 An abstract fact. These can be used to build up datalog programs.
168 """
169
170 def __init__(self) -> None:
171 pass
172
173 @staticmethod
174 def name() -> str:
175 raise NotImplementedError()
176
177 def Generate(self, func: str, statement: int) -> str:
178 raise NotImplementedError()
179
180
181class FunctionCall(Fact):
182
183 def __init__(self, callee: str) -> None:
184 self.callee = callee
185
186 @staticmethod
187 def name() -> str:
188 return 'call'
189
190 def Generate(self, func: str, statement: int) -> str:
191 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
192
193
194class Definition(Fact):
195 """
196 The definition of a variable. This corresponds to an allocation.
197 """
198
199 def __init__(self, ref: SymbolPath, obj: str) -> None:
200 self.ref = ref
201 self.obj = obj
202
203 @staticmethod
204 def name() -> str:
205 return 'assign'
206
207 def Generate(self, func: str, statement: int) -> str:
208 return '{}\t{}\t{}\t{}\n'.format(func, statement,
209 SymbolPathToReference(func, self.ref),
210 self.obj)
211
212
213class Assignment(Fact):
214 """
215 The assignment of one variable or object member to another.
216 """
217
218 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
219 self.lhs = lhs
220 self.rhs = rhs
221
222 @staticmethod
223 def name() -> str:
224 return 'assign'
225
226 def Generate(self, func: str, statement: int) -> str:
227 return '{}\t{}\t{}\t$Ref({})\n'.format(
228 func, statement, SymbolPathToReference(func, self.lhs),
229 SymbolPathToReference(func, self.rhs))
230
231
232class Use(Fact):
233 """
234 The use of a reference.
235
236 In the last assignment below, we would emit Use(foo) and Use(x). We would,
237 however, not emit Use(foo.a) since it is an lvalue and would instead be
238 covered by the Assign fact. Similarly, the first two assignments do not
239 generate Use facts.
240
241 foo = Foo()
242 x = Bar()
243 foo.a = x
244
245 Any time a reference appears in an expression (or expression-statement) it
246 will be considered used.
247
248 some_function(a) => Use(a)
249 a + b => Use(a), Use(b)
250 print(thing.dict[key]) => Use(thing), Use(thing.dict), Use(key)
251 obj.func() => Use(obj)
252 """
253
254 def __init__(self, ref: SymbolPath) -> None:
255 self.ref = ref
256
257 @staticmethod
258 def name() -> str:
259 return 'use'
260
261 def Generate(self, func: str, statement: int) -> str:
262 return '{}\t{}\t{}\n'.format(func, statement,
263 SymbolPathToReference(func, self.ref))
264
265
266class Bind(Fact):
267 """
268 Binding a reference to a positional function parameter.
269 """
270
271 def __init__(self, ref: SymbolPath, callee: SymbolPath,
272 arg_pos: int) -> None:
273 self.ref = ref
274 self.callee = callee
275 self.arg_pos = arg_pos
276
277 @staticmethod
278 def name() -> str:
279 return 'bind'
280
281 def Generate(self, func: str, statement: int) -> str:
282 return '{}\t{}\t{}\t{}\t{}\n'.format(
283 func, statement, SymbolPathToReference(func, self.ref),
284 join_name(self.callee, delim='.'), self.arg_pos)
285
286
287class ControlFlowGraph(object):
288 """
289 A simple control-flow graph.
290
291 Every statement in the program is represented as a node in a graph with
292 unique a numeric ID. Control flow is represented as directed edges through
293 the graph. Loops can introduce back-edges. Every node in the graph will
294 satisfy at least one of the following conditions:
295
296 - Its indegree is at least one.
297
298 - Its outdegree is at least one.
299
300 For simple linear graphs all you need is the AddStatement method. For more
301 complex flows there is a set of context managers below to help simplify
302 construction.
303
304 - For branches-like statements (e.g. if- and try- statements) use
305 CfgBranchContext. It will take care of the details associated with
306 stitching the different branches to statements in the next statement.
307
308 - For loops, use CfgLoopContext. It will take care of adding back-edges
309 and connecting break statements to any statements that proceed the
310 loop.
311
312 - CfgBlockContext can be used for simple cases where you just want to
313 track the beginning and end of a sequence of statements.
314
315 Statements can carry annotations called facts, which are used as inputs to
316 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
317 To annotate a statement, use the AddFact method with any object that
318 implements the Fact interface.
319
320 See the unit tests in pass_state_test.py and the mycpp phase in
321 control_flow_pass.py for detailed examples of usage.
322 """
323
324 def __init__(self) -> None:
325 self.statement_counter: int = 0
326 self.edges: set[tuple[int, int]] = set({})
327 self.block_stack: list[int] = []
328 self.predecessors: set[int] = set({})
329 self.deadends: set[int] = set({})
330
331 # order doesn't actually matter here, but sets require elements to be
332 # hashable
333 self.facts: dict[int, list[Fact]] = defaultdict(list)
334
335 def AddEdge(self, pred: int, succ: int) -> None:
336 """
337 Add a directed edge from pred to succ. If pred is a deadend, its
338 non-deadends will be used instead.
339 """
340 if pred in self.deadends:
341 for w in [u for (u, v) in self.edges if v == pred]:
342 self.AddEdge(w, succ)
343 else:
344 self.edges.add((pred, succ))
345
346 def AddDeadend(self, statement: int):
347 """
348 Mark a statement as a dead-end (e.g. return or continue).
349 """
350 self.deadends.add(statement)
351
352 def AddStatement(self) -> int:
353 """
354 Add a new statement and return its ID.
355 """
356 if len(self.predecessors) == 0:
357 if len(self.block_stack):
358 self.predecessors.add(self.block_stack[-1])
359 else:
360 self.predecessors.add(self.statement_counter)
361
362 self.statement_counter += 1
363 for pred in self.predecessors:
364 self.AddEdge(pred, self.statement_counter)
365
366 self.predecessors = set({})
367
368 if len(self.block_stack):
369 self.block_stack[-1] = self.statement_counter
370
371 return self.statement_counter
372
373 def AddFact(self, statement: int, fact: Fact) -> None:
374 """
375 Annotate a statement with a fact.
376 """
377 self.facts[statement].append(fact)
378
379 def _PushBlock(self, begin: Optional[int] = None) -> int:
380 """
381 Start a block at the given statement ID. If a beginning statement isn't
382 provided one will be created and its ID will be returend.
383
384 Direct use of this function is discouraged. Consider using one of the
385 block context managers below instead.
386 """
387 if begin is None:
388 begin = self.AddStatement()
389 else:
390 self.predecessors.add(begin)
391
392 self.block_stack.append(begin)
393 return begin
394
395 def _PopBlock(self) -> int:
396 """
397 Pop a block from the top of the stack and return the ID of the block's
398 last statement.
399
400 Direct use of this function is discouraged. Consider using one of the
401 block context managers below instead.
402 """
403 assert len(self.block_stack)
404 last = self.block_stack.pop()
405 if len(self.block_stack) and last not in self.deadends:
406 self.block_stack[-1] = last
407
408 return last
409
410
411class CfgBlockContext(object):
412 """
413 Context manager to make dealing with things like with-statements easier.
414 """
415
416 def __init__(self,
417 cfg: ControlFlowGraph,
418 begin: Optional[int] = None) -> None:
419 self.cfg = cfg
420 if cfg is None:
421 return
422
423 self.entry = self.cfg._PushBlock(begin)
424 self.exit = self.entry
425
426 def __enter__(self) -> None:
427 return self if self.cfg else None
428
429 def __exit__(self, *args) -> None:
430 if not self.cfg:
431 return
432
433 self.exit = self.cfg._PopBlock()
434
435
436class CfgBranchContext(object):
437 """
438 Context manager to make dealing with if-else blocks easier.
439 """
440
441 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
442 self.cfg = cfg
443 self.entry = branch_point
444 self.exit = self.entry
445 if cfg is None:
446 return
447
448 self.arms = []
449 self.pushed = False
450
451 def AddBranch(self, entry: Optional[int] = None):
452 if not self.cfg:
453 return CfgBranchContext(None, None)
454
455 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
456 self.cfg._PushBlock(self.arms[-1].entry)
457 self.arms[-1].pushed = True
458 return self.arms[-1]
459
460 def __enter__(self) -> None:
461 return self
462
463 def __exit__(self, *args) -> None:
464 if not self.cfg:
465 return
466
467 if self.pushed:
468 self.exit = self.cfg._PopBlock()
469
470 for arm in self.arms:
471 if arm.exit not in self.cfg.deadends:
472 self.cfg.predecessors.add(arm.exit)
473
474
475class CfgLoopContext(object):
476 """
477 Context manager to make dealing with loops easier.
478 """
479
480 def __init__(self,
481 cfg: ControlFlowGraph,
482 entry: Optional[int] = None) -> None:
483 self.cfg = cfg
484 self.breaks = set({})
485 if cfg is None:
486 return
487
488 self.entry = self.cfg._PushBlock(entry)
489 self.exit = self.entry
490
491 def AddBreak(self, statement: int) -> None:
492 assert self.cfg
493 self.breaks.add(statement)
494 self.cfg.AddDeadend(statement)
495
496 def AddContinue(self, statement: int) -> None:
497 self.cfg.AddEdge(statement, self.entry)
498 self.cfg.AddDeadend(statement)
499
500 def __enter__(self) -> None:
501 return self if self.cfg else None
502
503 def __exit__(self, *args) -> None:
504 if not self.cfg:
505 return
506
507 self.exit = self.cfg._PopBlock()
508 self.cfg.AddEdge(self.exit, self.entry)
509 for pred in self.cfg.predecessors:
510 self.cfg.AddEdge(pred, self.entry)
511
512 # If we had any breaks, arm the predecessor set with the current
513 # statement and the break statements.
514 if len(self.breaks):
515 if len(self.cfg.block_stack):
516 self.cfg.predecessors.add(self.cfg.block_stack[-1])
517 else:
518 self.cfg.predecessors.add(self.cfg.statement_counter)
519
520 for b in self.breaks:
521 self.cfg.deadends.remove(b)
522 self.cfg.predecessors.add(b)
523
524
525class StackRoots(object):
526 """
527 Output of the souffle stack roots solver.
528 """
529
530 def __init__(self, tuples: set[tuple[SymbolPath, SymbolPath]]) -> None:
531 self.root_tuples = tuples
532
533 def needs_root(self, func: SymbolPath, reference: SymbolPath) -> bool:
534 """
535 Returns true if the given reference should have a stack root.
536 """
537 return (func, reference) in self.root_tuples
538
539
540def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
541 facts_dir='_tmp/mycpp-facts') -> None:
542 """
543 Dump the given control flow graphs and associated facts into the given
544 directory as text files that can be consumed by datalog.
545 """
546 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
547
548 os.makedirs(facts_dir, exist_ok=True)
549 # Open files for all facts that we might emit even if we don't end up having
550 # anything to write to them. Souffle will complain if it can't find the file
551 # for anything marked as an input.
552 fact_files = {
553 fact_type.name():
554 open('{}/{}.facts'.format(facts_dir, fact_type.name()), 'w')
555 for fact_type in Fact.__subclasses__()
556 }
557 with open(edge_facts, 'w') as cfg_f:
558 for func, cfg in sorted(cfgs.items()):
559 joined = join_name(func, delim='.')
560 for (u, v) in sorted(cfg.edges):
561 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
562
563 for statement, facts in sorted(cfg.facts.items()):
564 for fact in facts: # already sorted temporally
565 fact_f = fact_files[fact.name()]
566 fact_f.write(fact.Generate(joined, statement))
567
568 for f in fact_files.values():
569 f.close()
570
571
572def ComputeMinimalStackRoots(cfgs: dict[str, ControlFlowGraph],
573 souffle_dir: str = '_tmp') -> StackRoots:
574 """
575 Run the the souffle stack roots solver and translate its output in a format
576 that can be queried by cppgen_pass.
577 """
578 facts_dir = '{}/facts'.format(souffle_dir)
579 os.makedirs(facts_dir)
580 output_dir = '{}/outputs'.format(souffle_dir)
581 os.makedirs(output_dir)
582 DumpControlFlowGraphs(cfgs, facts_dir=facts_dir)
583
584 subprocess.check_call([
585 '_bin/datalog/dataflow',
586 '-F',
587 facts_dir,
588 '-D',
589 output_dir,
590 ])
591
592 tuples: set[tuple[SymbolPath, SymbolPath]] = set({})
593 with open('{}/stack_root_vars.tsv'.format(output_dir),
594 'r') as roots_f:
595 pat = re.compile(r'\$(.*)\((.*), (.*)\)')
596 for line in roots_f:
597 function, ref = line.split('\t')
598 m = pat.match(ref)
599 assert m.group(1) in ('LocalVariable', 'ObjectMember')
600 if m.group(1) == 'LocalVariable':
601 _, ref_func, var_name = m.groups()
602 assert ref_func == function
603 tuples.add((split_py_name(function), (var_name, )))
604
605 if m.group(1) == 'ObjectMember':
606 _, base_obj, member_name = m.groups()
607 tuples.add((split_py_name(function),
608 split_py_name(base_obj) + (member_name, )))
609
610 return StackRoots(tuples)