OILS / opy / callgraph.py View on Github | oils.pub

496 lines, 237 significant
1from __future__ import print_function
2"""
3callgraph.py
4"""
5
6import collections
7import os
8import sys
9
10import __builtin__ # For looking up names
11import types
12
13from .lib import dis
14from .lib import inspect
15
16from .util import log
17
18
19def Disassemble(co):
20 """Given a code object, yield instructions of interest.
21
22 Args:
23 co: __code__ attribute
24
25 Structure copied from opy/compiler2/dis_tool.py, which was copied from
26 dis.disassemble().
27 """
28 code = co.co_code
29 extended_arg = 0 # Not used
30
31 i = 0
32 n = len(code)
33 #log('\tLENGTH OF CODE %s: %d', co.co_name, n)
34
35 while i < n:
36 #log('\ti = %d, n = %d', i, n)
37 op = ord(code[i])
38 i += 1
39
40 op_name = dis.opname[op]
41
42 # operation is 1 byte, argument is 2 bytes.
43 if op >= dis.HAVE_ARGUMENT:
44 oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
45 extended_arg = 0
46
47 if op == dis.EXTENDED_ARG:
48 # Hm not used?
49 raise AssertionError
50 extended_arg = oparg*65536L
51
52 i += 2
53
54 const_name = None
55 var_name = None
56
57 if op in dis.hasconst:
58 const_name = co.co_consts[oparg]
59
60 elif op in dis.hasname:
61 try:
62 var_name = co.co_names[oparg]
63 except IndexError:
64 log('Error: %r cannot index %s with %d', op_name, co.co_names,
65 oparg)
66 raise
67
68 elif op in dis.hasjrel:
69 #raise AssertionError(op_name)
70 pass
71
72 elif op in dis.haslocal:
73 #raise AssertionError(op_name)
74 pass
75
76 elif op in dis.hascompare:
77 #raise AssertionError(op_name)
78 pass
79
80 elif op in dis.hasfree:
81 #raise AssertionError(op_name)
82 pass
83
84 yield op_name, const_name, var_name
85 #log('\t==> i = %d, n = %d', i, n)
86
87
88import sre_compile
89
90def _GetAttr(module, name):
91 # Hack for bug in _fixup_range() ! (No longer in Python 3.6 head.)
92 if module is sre_compile and name == 'l':
93 return None
94 # traceback.py has a hasattr() test
95 if module is sys and name == 'tracebacklimit':
96 return None
97
98 try:
99 val = getattr(module, name)
100 except AttributeError:
101 #log('%r not on %r', name, module)
102 # This could raise too
103 val = getattr(__builtin__, name)
104 return val
105
106
107def _Walk(obj, cls, ref, syms):
108 """
109 Discover statically what (globally-accessible) functions and classes are
110 used.
111
112 Args:
113 obj: a callable object
114 cls: an optional class that it was attached to, could be None
115 ref: the way the object was referenced from another place in the code
116 syms: output Symbols()
117
118 Something like this is OK:
119 # def Adder(x):
120 # def f(y):
121 # return x + y
122 # return f
123
124 Because we'll still have access to the inner code object. We probably won't
125 compile it though.
126 """
127 if syms.Seen(obj):
128 return
129
130 module_name = getattr(obj, '__module__', None)
131 # NOTE: We get duplicate objects like this, due to bound methods on different
132 # objects. Not sure how to fix it.
133 #
134 # OBJ <function Parse at 0x7fcd270b4de8>
135 # OBJ <function Parse at 0x7fcd2709e500>
136 # OBJ <built-in method get of dict object at 0x7fcd28c53280>
137 # OBJ <built-in method get of dict object at 0x7fcd28c53398>
138
139 #if obj.__name__ in ('write', 'get', 'Parse'):
140 # log('OBJ %s %d', obj, id(obj))
141 # pass
142
143 # Oh is the namedtuple_ crap because of the Block class byterun/pyobj?
144
145 if module_name is None or module_name in (
146 'namedtuple_Arguments', 'namedtuple_ArgSpec',
147 'namedtuple_Block'):
148 syms.Add(obj, None, ref, None, None, None)
149 return # Can't walk anything
150
151 #log('OBJ %s %d', obj, id(obj))
152 module = sys.modules[obj.__module__]
153
154 co = getattr(obj, '__code__', None)
155 # For example, Builtins don't have bytecode.
156 if isinstance(co, types.CodeType):
157 co = obj.__code__
158 #log('CO %s', co)
159 #path = co.co_filename
160
161 mod_name = None
162 mod_path = co.co_filename
163
164 syms.Add(obj, cls, ref, mod_name, mod_path, co.co_firstlineno)
165 else:
166 mod_name = module.__name__
167 try:
168 mod_path = module.__file__
169 if mod_path.endswith('.pyc'):
170 mod_path = mod_path[:-1]
171 except AttributeError:
172 mod_path = None
173
174 #if obj.__name__ == 'lex_mode_e':
175 # log('!!! %s name %s path %s', obj, mod.__name__, mod.__file__)
176
177 #mod_name = None
178 #mod_path = None
179
180 syms.Add(obj, cls, ref, mod_name, mod_path, None)
181 return
182
183 #log('\tNAME %s', val.__code__.co_name)
184 #log('\tNAMES %s', val.__code__.co_names)
185
186 # Most functions and classes we call are globals!
187 #log('\t_Walk %s %s', obj, module)
188 #log('\t%s', sorted(dir(module)))
189
190 # Have to account for foo.Bar(), which gives this sequence:
191 # 2 0 LOAD_GLOBAL 0 (foo)
192 # 3 LOAD_ATTR 1 (Bar)
193 # 6 CALL_FUNCTION 0
194 #
195 # Also: os.path.join().
196
197 try:
198 last_val = None # value from previous LOAD_GLOBAL or LOAD_ATTR
199 ref = []
200 g = Disassemble(obj.__code__)
201
202 while True:
203 op, const, var = g.next()
204
205 if op == 'LOAD_GLOBAL':
206 val = _GetAttr(module, var)
207 ref = [var] # reset it
208
209 elif op == 'LOAD_ATTR':
210 #if last_val is not None and isinstance(last_val, types.ModuleType):
211 if last_val is not None:
212 #log('%s %s', op, var)
213 val = _GetAttr(last_val, var)
214 ref.append(var)
215
216 # Crawl the methods below. Otherwise we get duplicate bound/unbound
217 # methods, which have unique addresses.
218 # Examples: WAIT_SPEC.Parse, sys.stdout.write
219
220 # BUG: os.fork and sys.stdout.write are the same?
221 # I thought os.fork is types.BuiltinFunctionType, and
222 # sys.stdout.write is types.BuiltinMethodType, but why not?
223
224 if isinstance(val, (types.MethodType, types.BuiltinMethodType)):
225 val = None
226 ref = []
227
228 else:
229 val = None
230 ref = []
231
232 else: # Some other opcode
233 val = None
234 ref = []
235
236 if callable(val):
237 #log('VAL %s module %s', val, val.__module__)
238 # Recursive call.
239
240 # Check for old style:
241 #if isinstance(val, types.ClassType):
242 # print('OLD %s' % val)
243 _Walk(val, None, ref, syms)
244
245 # If the value is a class, walk its methods. Note that we assume ALL
246 # methods are used. It's possible to narrow this down a bit and detect
247 # unused methods.
248 if isinstance(val, type):
249 cls = val
250 #log('type %s', val)
251 for name in dir(val):
252 # prevent error with __abstractmethods__ attribute
253 #if name.startswith('__'):
254 if name == '__abstractmethods__':
255 continue
256 field_val = getattr(val, name)
257 #log('field_val %s', field_val)
258 if isinstance(field_val, types.MethodType):
259 func_obj = field_val.im_func
260 _Walk(func_obj, cls, ref, syms)
261
262 last_val = val # Used on next iteration
263
264 except StopIteration:
265 pass
266
267 #log('\tDone _Walk %s %s', obj, module)
268
269
270def PrintSig(fmt, func):
271 if os.getenv('CALLGRAPH_SIG') != '1':
272 return
273 try:
274 argspec = inspect.getargspec(func)
275 except TypeError:
276 return
277 parts = [':%s' % ' '.join(argspec.args)]
278 # These are keyword-only args?
279 if argspec.varargs:
280 parts.append('varargs=%s' % (argspec.varargs,))
281 if argspec.keywords:
282 parts.append('kw=%s' % (argspec.keywords,))
283 # Hm the default of 'None' is bad -- you can't distinguish a default of None,
284 # which is very common!
285 # It's better to get this by parsing the AST.
286 if argspec.defaults and any(d is not None for d in argspec.defaults):
287 parts.append('defaults=%s' % (argspec.defaults,))
288
289 print(fmt % ' '.join(parts))
290
291
292class Class(object):
293
294 def __init__(self, cls, mod_name, mod_path):
295 self.cls = cls
296 self.mod_name = mod_name
297 self.mod_path = mod_path
298 self.methods = []
299
300 def Name(self):
301 return self.cls.__name__
302
303 def AddMethod(self, m, mod_name, mod_path, line_num):
304 # Just assume the method is in the same file as the class itself.
305 self.methods.append((m, mod_name, mod_path, line_num))
306
307 def Print(self):
308 base_names = ' '.join(c.__name__ for c in self.cls.__bases__)
309 print(' %s(%s)' % (self.cls.__name__, base_names))
310
311 methods = [(m.__name__, m) for (m, _, _, _) in self.methods]
312 methods.sort()
313 for name, m in methods:
314 print(' %s' % name)
315 PrintSig(' %s', m)
316
317 def __repr__(self):
318 return '<Class %s %s %s %s>' % (
319 self.cls, self.mod_name, self.mod_path, self.methods)
320
321
322class Symbols(object):
323 """A sink for discovered symbols."""
324
325 def __init__(self):
326 self.seen = set()
327
328 self.classes = {} # integer id() -> Class
329 self.functions = [] # list of callables
330
331 self.paths = {} # path -> list of functions
332
333 def Seen(self, c):
334 """c: a callable."""
335 id_ = id(c)
336 if id_ in self.seen:
337 return True
338 self.seen.add(id_)
339 return False
340
341 def Add(self, obj, cls, ref, mod_name, mod_path, line_num):
342 """Could be a function, class Constructor, or method.
343 Can also be native (C) or interpreted (Python with __code__ attribute.)
344
345 Returns:
346 True if we haven't yet seen it.
347 """
348 if mod_path is not None:
349 mod_path = os.path.normpath(mod_path)
350
351 if isinstance(obj, type):
352 id_ = id(obj)
353
354 # NOTE: Python's classes don't have a __code__ object, which appears to
355 # be an irregularity. So we have to get location information from the
356 # METHODS.
357
358 # Exception: this type has a __code__ object that is not types.CodeType
359 if obj is not types.FunctionType:
360 assert not hasattr(obj, '__code__'), obj
361 #assert path is None
362 assert line_num is None
363
364 self.classes[id_] = Class(obj, mod_name, mod_path)
365
366 elif cls is not None:
367 id_ = id(cls)
368 descriptor = self.classes[id_]
369 descriptor.AddMethod(obj, mod_name, mod_path, line_num)
370
371 else:
372 self.functions.append((obj, ref, mod_name, mod_path, line_num))
373
374 return True
375
376 def Report(self, f=sys.stdout):
377 # Now categorize into files. We couldn't do that earlier because classes
378 # don't know where they are located!
379
380 py_srcs = collections.defaultdict(SourceFile)
381 c_srcs = collections.defaultdict(SourceFile)
382
383 for func, ref, mod_name, mod_path, line_num in self.functions:
384 if mod_path:
385 py_srcs[mod_path].functions.append((func, ref, line_num))
386 else:
387 c_srcs[mod_name].functions.append((func, ref, line_num))
388
389 for cls in self.classes.values():
390 #if cls.cls.__name__ == 'lex_mode_e':
391 # log('!!! %s', cls)
392
393 if cls.mod_path:
394 py_srcs[cls.mod_path].classes.append(cls)
395 else:
396 c_srcs[cls.mod_name].classes.append(cls)
397
398 prefix = os.getcwd()
399 n = len(prefix) + 1
400
401 #for name in sorted(py_srcs):
402 # print(name)
403
404 #return
405
406 # Still missing: non-enum ASDL types? Why? CompoundObj?
407 # command_e is there, but command and SimpleCommand aren't.
408 # it's because we do
409 # ast.command_e vs. ast.SimpleCommand
410 # in both cases ast is a core.meta _AsdlModule?
411
412 print('PYTHON CODE')
413 print()
414
415 for path in sorted(py_srcs):
416 src = py_srcs[path]
417
418 if path is not None and path.startswith(prefix):
419 path = path[n:]
420
421 print('%s' % path)
422
423 for func, ref, _ in src.functions:
424 third = func
425 #third = ''
426 #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
427 try:
428 print(' %s' % func.__name__)
429 except AttributeError:
430 # In the CI, when we don't have fastlex, we get _MatchOshToken_Slow()
431 # instances
432 print(" NOTE: %s found at top level, but it's not a simple function" % func)
433
434 PrintSig(' %s', func)
435
436 classes = [(c.Name(), c) for c in src.classes]
437 classes.sort()
438 for c in src.classes:
439 c.Print()
440
441 print()
442
443 print('NATIVE CODE')
444 print()
445
446 for mod_name in sorted(c_srcs):
447 src = c_srcs[mod_name]
448
449 print('%s' % mod_name)
450
451 for func, ref, _ in src.functions:
452 #third = func
453 third = ''
454 #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
455 print(' %s' % func.__name__)
456
457 classes = [(c.Name(), c) for c in src.classes]
458 classes.sort()
459 for c in src.classes:
460 c.Print()
461
462 print()
463
464
465class SourceFile(object):
466
467 def __init__(self):
468 self.classes = []
469 self.functions = []
470
471
472def Walk(main, modules):
473 """Given a function main, finds all functions it transitively calls.
474
475 Uses heuristic bytecode analysis. Limitations:
476
477 - functions that are local variables might not work? But this should work:
478
479 if x > 0:
480 f = GlobalFunc
481 else:
482 f = OtherGlobalFunc
483 f() # The LOAD_GLOBAL will be found.
484
485 Args:
486 main: function
487 modules: Dict[str, module]
488 """
489 syms = Symbols()
490 _Walk(main, None, ['main'], syms)
491
492 # TODO:
493 # - co_consts should be unified? So we know how big the const pool is.
494
495 syms.Report()
496