OILS / mycpp / pass_state.py View on Github | oils.pub

611 lines, 318 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7import re
8import subprocess
9from collections import defaultdict
10
11from mypy.types import Type
12from mypy.nodes import Expression
13
14from mycpp.util import SymbolToString, log, SplitPyName, SymbolPath
15
16from typing import Optional, List, Dict, Tuple, Set, Any
17
18_ = log
19
20
21class member_t(object):
22 pass
23
24
25class ModuleMember(member_t):
26 """
27 A member of a Python module.
28
29 e.g. core.state.Mem => core::state::Mem
30 """
31
32 def __init__(self, module_path: SymbolPath, member: str) -> None:
33 self.module_path = module_path
34 self.member = member
35
36
37class StaticClassMember(member_t):
38 """
39 A static member of an object. Usually a a method like an alternative constructor.
40
41 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
42 """
43
44 def __init__(self, type_name: str, member: str) -> None:
45 self.type_name = type_name
46 self.member = member
47
48
49class HeapObjectMember(member_t):
50 """
51 A member of a heap-allocated object.
52
53 e.g foo.empty() => foo->empty()
54 """
55
56 def __init__(self, object_expr: Expression, object_type: Type,
57 member: str) -> None:
58 self.object_expr = object_expr
59 self.object_type = object_type
60 self.member = member
61
62
63class StackObjectMember(member_t):
64 """
65 A member of a stack-allocated object.
66
67 e.g foo.empty() => foo.empty()
68 """
69
70 def __init__(self, object_expr: Expression, object_type: Type,
71 member: str) -> None:
72 self.object_expr = object_expr
73 self.object_type = object_type
74 self.member = member
75
76
77class Virtual(object):
78 """Calculate which C++ methods need the virtual keyword.
79
80 See unit test for example usage.
81 """
82
83 def __init__(self) -> None:
84 self.methods: Dict[SymbolPath, List[str]] = defaultdict(list)
85 self.subclasses: Dict[SymbolPath, List[SymbolPath]] = defaultdict(list)
86 self.virtuals: Dict[Tuple[SymbolPath, str], Optional[Tuple[SymbolPath,
87 str]]] = {}
88 self.has_vtable: Dict[SymbolPath, bool] = {}
89 self.can_reorder_fields: Dict[SymbolPath, bool] = {}
90
91 # _Executor -> vm::_Executor
92 self.base_class_unique: Dict[str, SymbolPath] = {}
93
94 # These are called on the Forward Declare pass
95 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
96 #log('VIRTUAL OnMethod %s %s', class_name, method_name)
97
98 # __init__ and so forth don't count
99 if method_name.startswith('__') and method_name.endswith('__'):
100 return
101
102 self.methods[class_name].append(method_name)
103
104 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
105 #log('VIRTUAL OnSubclass base %s -> %s', base_class, subclass)
106 if len(base_class) > 1:
107 # Hack for
108 #
109 # class _Executor: pass
110 # versus
111 # class MyExecutor(vm._Executor): pass
112 base_key = base_class[-1]
113
114 # Fail if we have two base classes in different namespaces with the same
115 # name.
116 if base_key in self.base_class_unique:
117 # Make sure we don't have collisions
118 assert (self.base_class_unique[base_key] == base_class or
119 base_class
120 in self.subclasses[self.base_class_unique[base_key]]
121 ), base_class
122 else:
123 self.base_class_unique[base_key] = base_class
124
125 self.subclasses[base_class].append(subclass)
126
127 def Calculate(self) -> None:
128 """Call this after the forward declare pass."""
129 for base_class, subclasses in self.subclasses.items():
130 self.can_reorder_fields[base_class] = False
131
132 for subclass in subclasses:
133 self.can_reorder_fields[subclass] = False
134
135 b_methods = self.methods[base_class]
136 s_methods = self.methods[subclass]
137 overlapping = set(b_methods) & set(s_methods)
138 for method in overlapping:
139 self.virtuals[(base_class, method)] = None
140 self.virtuals[(subclass, method)] = (base_class, method)
141 if overlapping:
142 self.has_vtable[base_class] = True
143 self.has_vtable[subclass] = True
144
145 # These is called on the Decl pass
146 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
147 return (class_name, method_name) in self.virtuals
148
149 def HasVTable(self, class_name: SymbolPath) -> bool:
150 return class_name in self.has_vtable
151
152 def CanReorderFields(self, class_name: SymbolPath) -> bool:
153 if class_name in self.can_reorder_fields:
154 return self.can_reorder_fields[class_name]
155 else:
156 return True # by default they can be reordered
157
158
159def SymbolPathToReference(func: str, p: SymbolPath) -> str:
160 if len(p) > 1:
161 return '$ObjectMember({}, {})'.format(
162 SymbolToString(p[:-1], delim='.'), p[-1])
163
164 return '$LocalVariable({}, {})'.format(func, p[0])
165
166
167class Fact(object):
168 """
169 An abstract fact. These can be used to build up datalog programs.
170 """
171
172 def __init__(self) -> None:
173 pass
174
175 @staticmethod
176 def name() -> str:
177 raise NotImplementedError()
178
179 def Generate(self, func: str, statement: int) -> str:
180 raise NotImplementedError()
181
182
183class FunctionCall(Fact):
184
185 def __init__(self, callee: str) -> None:
186 self.callee = callee
187
188 @staticmethod
189 def name() -> str:
190 return 'call'
191
192 def Generate(self, func: str, statement: int) -> str:
193 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
194
195
196class Definition(Fact):
197 """
198 The definition of a variable. This corresponds to an allocation.
199 """
200
201 def __init__(self, ref: SymbolPath, obj: str) -> None:
202 self.ref = ref
203 self.obj = obj
204
205 @staticmethod
206 def name() -> str:
207 return 'assign'
208
209 def Generate(self, func: str, statement: int) -> str:
210 return '{}\t{}\t{}\t{}\n'.format(func, statement,
211 SymbolPathToReference(func, self.ref),
212 self.obj)
213
214
215class Assignment(Fact):
216 """
217 The assignment of one variable or object member to another.
218 """
219
220 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
221 self.lhs = lhs
222 self.rhs = rhs
223
224 @staticmethod
225 def name() -> str:
226 return 'assign'
227
228 def Generate(self, func: str, statement: int) -> str:
229 return '{}\t{}\t{}\t$Ref({})\n'.format(
230 func, statement, SymbolPathToReference(func, self.lhs),
231 SymbolPathToReference(func, self.rhs))
232
233
234class Use(Fact):
235 """
236 The use of a reference.
237
238 In the last assignment below, we would emit Use(foo) and Use(x). We would,
239 however, not emit Use(foo.a) since it is an lvalue and would instead be
240 covered by the Assign fact. Similarly, the first two assignments do not
241 generate Use facts.
242
243 foo = Foo()
244 x = Bar()
245 foo.a = x
246
247 Any time a reference appears in an expression (or expression-statement) it
248 will be considered used.
249
250 some_function(a) => Use(a)
251 a + b => Use(a), Use(b)
252 print(thing.dict[key]) => Use(thing), Use(thing.dict), Use(key)
253 obj.func() => Use(obj)
254 """
255
256 def __init__(self, ref: SymbolPath) -> None:
257 self.ref = ref
258
259 @staticmethod
260 def name() -> str:
261 return 'use'
262
263 def Generate(self, func: str, statement: int) -> str:
264 return '{}\t{}\t{}\n'.format(func, statement,
265 SymbolPathToReference(func, self.ref))
266
267
268class Bind(Fact):
269 """
270 Binding a reference to a positional function parameter.
271 """
272
273 def __init__(self, ref: SymbolPath, callee: SymbolPath,
274 arg_pos: int) -> None:
275 self.ref = ref
276 self.callee = callee
277 self.arg_pos = arg_pos
278
279 @staticmethod
280 def name() -> str:
281 return 'bind'
282
283 def Generate(self, func: str, statement: int) -> str:
284 return '{}\t{}\t{}\t{}\t{}\n'.format(
285 func, statement, SymbolPathToReference(func, self.ref),
286 SymbolToString(self.callee, delim='.'), self.arg_pos)
287
288
289class ControlFlowGraph(object):
290 """
291 A simple control-flow graph.
292
293 Every statement in the program is represented as a node in a graph with
294 unique a numeric ID. Control flow is represented as directed edges through
295 the graph. Loops can introduce back-edges. Every node in the graph will
296 satisfy at least one of the following conditions:
297
298 - Its indegree is at least one.
299
300 - Its outdegree is at least one.
301
302 For simple linear graphs all you need is the AddStatement method. For more
303 complex flows there is a set of context managers below to help simplify
304 construction.
305
306 - For branches-like statements (e.g. if- and try- statements) use
307 CfgBranchContext. It will take care of the details associated with
308 stitching the different branches to statements in the next statement.
309
310 - For loops, use CfgLoopContext. It will take care of adding back-edges
311 and connecting break statements to any statements that proceed the
312 loop.
313
314 - CfgBlockContext can be used for simple cases where you just want to
315 track the beginning and end of a sequence of statements.
316
317 Statements can carry annotations called facts, which are used as inputs to
318 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
319 To annotate a statement, use the AddFact method with any object that
320 implements the Fact interface.
321
322 See the unit tests in pass_state_test.py and the mycpp phase in
323 control_flow_pass.py for detailed examples of usage.
324 """
325
326 def __init__(self) -> None:
327 self.statement_counter: int = 0
328 self.edges: Set[Tuple[int, int]] = set({})
329 self.block_stack: List[int] = []
330 self.predecessors: Set[int] = set({})
331 self.deadends: Set[int] = set({})
332
333 # order doesn't actually matter here, but sets require elements to be
334 # hashable
335 self.facts: Dict[int, List[Fact]] = defaultdict(list)
336
337 def AddEdge(self, pred: int, succ: int) -> None:
338 """
339 Add a directed edge from pred to succ. If pred is a deadend, its
340 non-deadends will be used instead.
341 """
342 if pred in self.deadends:
343 for w in [u for (u, v) in self.edges if v == pred]:
344 self.AddEdge(w, succ)
345 else:
346 self.edges.add((pred, succ))
347
348 def AddDeadend(self, statement: int) -> None:
349 """
350 Mark a statement as a dead-end (e.g. return or continue).
351 """
352 self.deadends.add(statement)
353
354 def AddStatement(self) -> int:
355 """
356 Add a new statement and return its ID.
357 """
358 if len(self.predecessors) == 0:
359 if len(self.block_stack):
360 self.predecessors.add(self.block_stack[-1])
361 else:
362 self.predecessors.add(self.statement_counter)
363
364 self.statement_counter += 1
365 for pred in self.predecessors:
366 self.AddEdge(pred, self.statement_counter)
367
368 self.predecessors = set({})
369
370 if len(self.block_stack):
371 self.block_stack[-1] = self.statement_counter
372
373 return self.statement_counter
374
375 def AddFact(self, statement: int, fact: Fact) -> None:
376 """
377 Annotate a statement with a fact.
378 """
379 self.facts[statement].append(fact)
380
381 def _PushBlock(self, begin: Optional[int] = None) -> int:
382 """
383 Start a block at the given statement ID. If a beginning statement isn't
384 provided one will be created and its ID will be returend.
385
386 Direct use of this function is discouraged. Consider using one of the
387 block context managers below instead.
388 """
389 if begin is None:
390 begin = self.AddStatement()
391 else:
392 self.predecessors.add(begin)
393
394 self.block_stack.append(begin)
395 return begin
396
397 def _PopBlock(self) -> int:
398 """
399 Pop a block from the top of the stack and return the ID of the block's
400 last statement.
401
402 Direct use of this function is discouraged. Consider using one of the
403 block context managers below instead.
404 """
405 assert len(self.block_stack)
406 last = self.block_stack.pop()
407 if len(self.block_stack) and last not in self.deadends:
408 self.block_stack[-1] = last
409
410 return last
411
412
413class CfgBlockContext(object):
414 """
415 Context manager to make dealing with things like with-statements easier.
416 """
417
418 def __init__(self,
419 cfg: ControlFlowGraph,
420 begin: Optional[int] = None) -> None:
421 self.cfg = cfg
422 if cfg is None:
423 return
424
425 self.entry = self.cfg._PushBlock(begin)
426 self.exit = self.entry
427
428 def __enter__(self) -> Optional['CfgBlockContext']:
429 return self if self.cfg else None
430
431 def __exit__(self, *args: Any) -> None:
432 if not self.cfg:
433 return
434
435 self.exit = self.cfg._PopBlock()
436
437
438class CfgBranchContext(object):
439 """
440 Context manager to make dealing with if-else blocks easier.
441 """
442
443 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
444 self.cfg = cfg
445 self.entry = branch_point
446 self.exit = self.entry
447 if cfg is None:
448 return
449
450 self.arms: List[CfgBranchContext] = []
451 self.pushed = False
452
453 def AddBranch(self, entry: Optional[int] = None) -> 'CfgBranchContext':
454 if not self.cfg:
455 return CfgBranchContext(None, None)
456
457 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
458 self.cfg._PushBlock(self.arms[-1].entry)
459 self.arms[-1].pushed = True
460 return self.arms[-1]
461
462 def __enter__(self) -> 'CfgBranchContext':
463 return self
464
465 def __exit__(self, *args: Any) -> None:
466 if not self.cfg:
467 return
468
469 if self.pushed:
470 self.exit = self.cfg._PopBlock()
471
472 for arm in self.arms:
473 if arm.exit not in self.cfg.deadends:
474 self.cfg.predecessors.add(arm.exit)
475
476
477class CfgLoopContext(object):
478 """
479 Context manager to make dealing with loops easier.
480 """
481
482 def __init__(self,
483 cfg: ControlFlowGraph,
484 entry: Optional[int] = None) -> None:
485 self.cfg = cfg
486 self.breaks: Set[int] = set()
487 if cfg is None:
488 return
489
490 self.entry = self.cfg._PushBlock(entry)
491 self.exit = self.entry
492
493 def AddBreak(self, statement: int) -> None:
494 assert self.cfg
495 self.breaks.add(statement)
496 self.cfg.AddDeadend(statement)
497
498 def AddContinue(self, statement: int) -> None:
499 self.cfg.AddEdge(statement, self.entry)
500 self.cfg.AddDeadend(statement)
501
502 def __enter__(self) -> Optional['CfgLoopContext']:
503 return self if self.cfg else None
504
505 def __exit__(self, *args: Any) -> None:
506 if not self.cfg:
507 return
508
509 self.exit = self.cfg._PopBlock()
510 self.cfg.AddEdge(self.exit, self.entry)
511 for pred in self.cfg.predecessors:
512 self.cfg.AddEdge(pred, self.entry)
513
514 # If we had any breaks, arm the predecessor set with the current
515 # statement and the break statements.
516 if len(self.breaks):
517 if len(self.cfg.block_stack):
518 self.cfg.predecessors.add(self.cfg.block_stack[-1])
519 else:
520 self.cfg.predecessors.add(self.cfg.statement_counter)
521
522 for b in self.breaks:
523 self.cfg.deadends.remove(b)
524 self.cfg.predecessors.add(b)
525
526
527class StackRoots(object):
528 """
529 Output of the souffle stack roots solver.
530 """
531
532 def __init__(self, tuples: Set[Tuple[SymbolPath, SymbolPath]]) -> None:
533 self.root_tuples = tuples
534
535 def needs_root(self, func: SymbolPath, reference: SymbolPath) -> bool:
536 """
537 Returns true if the given reference should have a stack root.
538 """
539 return (func, reference) in self.root_tuples
540
541
542def DumpControlFlowGraphs(cfgs: Dict[SymbolPath, ControlFlowGraph],
543 facts_dir: str = '_tmp/mycpp-facts') -> None:
544 """
545 Dump the given control flow graphs and associated facts into the given
546 directory as text files that can be consumed by datalog.
547 """
548 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
549
550 os.makedirs(facts_dir, exist_ok=True)
551 # Open files for all facts that we might emit even if we don't end up having
552 # anything to write to them. Souffle will complain if it can't find the file
553 # for anything marked as an input.
554 fact_files = {
555 fact_type.name():
556 open('{}/{}.facts'.format(facts_dir, fact_type.name()), 'w')
557 for fact_type in Fact.__subclasses__()
558 }
559 with open(edge_facts, 'w') as cfg_f:
560 for func, cfg in sorted(cfgs.items()):
561 joined = SymbolToString(func, delim='.')
562 for (u, v) in sorted(cfg.edges):
563 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
564
565 for statement, facts in sorted(cfg.facts.items()):
566 for fact in facts: # already sorted temporally
567 fact_f = fact_files[fact.name()]
568 fact_f.write(fact.Generate(joined, statement))
569
570 for f in fact_files.values():
571 f.close()
572
573
574def ComputeMinimalStackRoots(cfgs: Dict[SymbolPath, ControlFlowGraph],
575 souffle_dir: str = '_tmp') -> StackRoots:
576 """
577 Run the the souffle stack roots solver and translate its output in a format
578 that can be queried by cppgen_pass.
579 """
580 facts_dir = '{}/facts'.format(souffle_dir)
581 os.makedirs(facts_dir)
582 output_dir = '{}/outputs'.format(souffle_dir)
583 os.makedirs(output_dir)
584 DumpControlFlowGraphs(cfgs, facts_dir=facts_dir)
585
586 subprocess.check_call([
587 '_bin/datalog/dataflow',
588 '-F',
589 facts_dir,
590 '-D',
591 output_dir,
592 ])
593
594 tuples: Set[Tuple[SymbolPath, SymbolPath]] = set({})
595 with open('{}/stack_root_vars.tsv'.format(output_dir), 'r') as roots_f:
596 pat = re.compile(r'\$(.*)\((.*), (.*)\)')
597 for line in roots_f:
598 function, ref = line.split('\t')
599 m = pat.match(ref)
600 assert m.group(1) in ('LocalVariable', 'ObjectMember')
601 if m.group(1) == 'LocalVariable':
602 _, ref_func, var_name = m.groups()
603 assert ref_func == function
604 tuples.add((SplitPyName(function), (var_name, )))
605
606 if m.group(1) == 'ObjectMember':
607 _, base_obj, member_name = m.groups()
608 tuples.add((SplitPyName(function),
609 SplitPyName(base_obj) + (member_name, )))
610
611 return StackRoots(tuples)