OILS / opy / callgraph.py View on Github | oils.pub

497 lines, 237 significant
1from __future__ import print_function
2"""
3callgraph.py
4"""
5
6import collections
7import os
8import sys
9
10import __builtin__ # For looking up names
11import types
12
13from .lib import dis
14from .lib import inspect
15
16from .util import log
17
18
19def Disassemble(co):
20 """Given a code object, yield instructions of interest.
21
22 Args:
23 co: __code__ attribute
24
25 Structure copied from opy/compiler2/dis_tool.py, which was copied from
26 dis.disassemble().
27 """
28 code = co.co_code
29 extended_arg = 0 # Not used
30
31 i = 0
32 n = len(code)
33 #log('\tLENGTH OF CODE %s: %d', co.co_name, n)
34
35 while i < n:
36 #log('\ti = %d, n = %d', i, n)
37 op = ord(code[i])
38 i += 1
39
40 op_name = dis.opname[op]
41
42 # operation is 1 byte, argument is 2 bytes.
43 if op >= dis.HAVE_ARGUMENT:
44 oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
45 extended_arg = 0
46
47 if op == dis.EXTENDED_ARG:
48 # Hm not used?
49 raise AssertionError
50 extended_arg = oparg*65536L
51
52 i += 2
53
54 const_name = None
55 var_name = None
56
57 if op in dis.hasconst:
58 const_name = co.co_consts[oparg]
59
60 elif op in dis.hasname:
61 try:
62 var_name = co.co_names[oparg]
63 except IndexError:
64 log('Error: %r cannot index %s with %d', op_name, co.co_names,
65 oparg)
66 raise
67
68 elif op in dis.hasjrel:
69 #raise AssertionError(op_name)
70 pass
71
72 elif op in dis.haslocal:
73 #raise AssertionError(op_name)
74 pass
75
76 elif op in dis.hascompare:
77 #raise AssertionError(op_name)
78 pass
79
80 elif op in dis.hasfree:
81 #raise AssertionError(op_name)
82 pass
83
84 yield op_name, const_name, var_name
85 #log('\t==> i = %d, n = %d', i, n)
86
87
88import sre_compile
89
90def _GetAttr(module, name):
91 # Hack for bug in _fixup_range() ! (No longer in Python 3.6 head.)
92 if module is sre_compile and name == 'l':
93 return None
94 # traceback.py has a hasattr() test
95 if module is sys and name == 'tracebacklimit':
96 return None
97
98 try:
99 val = getattr(module, name)
100 except AttributeError:
101 #log('%r not on %r', name, module)
102 # This could raise too
103 val = getattr(__builtin__, name)
104 return val
105
106
107def _Walk(obj, cls, ref, syms):
108 """
109 Discover statically what (globally-accessible) functions and classes are
110 used.
111
112 Args:
113 obj: a callable object
114 cls: an optional class that it was attached to, could be None
115 ref: the way the object was referenced from another place in the code
116 syms: output Symbols()
117
118 Something like this is OK:
119 # def Adder(x):
120 # def f(y):
121 # return x + y
122 # return f
123
124 Because we'll still have access to the inner code object. We probably won't
125 compile it though.
126 """
127 if syms.Seen(obj):
128 return
129
130 module_name = getattr(obj, '__module__', None)
131 # NOTE: We get duplicate objects like this, due to bound methods on different
132 # objects. Not sure how to fix it.
133 #
134 # OBJ <function Parse at 0x7fcd270b4de8>
135 # OBJ <function Parse at 0x7fcd2709e500>
136 # OBJ <built-in method get of dict object at 0x7fcd28c53280>
137 # OBJ <built-in method get of dict object at 0x7fcd28c53398>
138
139 #if obj.__name__ in ('write', 'get', 'Parse'):
140 # log('OBJ %s %d', obj, id(obj))
141 # pass
142
143 # Oh is the namedtuple_ crap because of the Block class byterun/pyobj?
144 # 2025-08: added local module, not sure why
145
146 if module_name is None or module_name in (
147 'namedtuple_Arguments', 'namedtuple_ArgSpec',
148 'namedtuple_Block', 'locale'):
149 syms.Add(obj, None, ref, None, None, None)
150 return # Can't walk anything
151
152 #log('OBJ %s %d', obj, id(obj))
153 module = sys.modules[obj.__module__]
154
155 co = getattr(obj, '__code__', None)
156 # For example, Builtins don't have bytecode.
157 if isinstance(co, types.CodeType):
158 co = obj.__code__
159 #log('CO %s', co)
160 #path = co.co_filename
161
162 mod_name = None
163 mod_path = co.co_filename
164
165 syms.Add(obj, cls, ref, mod_name, mod_path, co.co_firstlineno)
166 else:
167 mod_name = module.__name__
168 try:
169 mod_path = module.__file__
170 if mod_path.endswith('.pyc'):
171 mod_path = mod_path[:-1]
172 except AttributeError:
173 mod_path = None
174
175 #if obj.__name__ == 'lex_mode_e':
176 # log('!!! %s name %s path %s', obj, mod.__name__, mod.__file__)
177
178 #mod_name = None
179 #mod_path = None
180
181 syms.Add(obj, cls, ref, mod_name, mod_path, None)
182 return
183
184 #log('\tNAME %s', val.__code__.co_name)
185 #log('\tNAMES %s', val.__code__.co_names)
186
187 # Most functions and classes we call are globals!
188 #log('\t_Walk %s %s', obj, module)
189 #log('\t%s', sorted(dir(module)))
190
191 # Have to account for foo.Bar(), which gives this sequence:
192 # 2 0 LOAD_GLOBAL 0 (foo)
193 # 3 LOAD_ATTR 1 (Bar)
194 # 6 CALL_FUNCTION 0
195 #
196 # Also: os.path.join().
197
198 try:
199 last_val = None # value from previous LOAD_GLOBAL or LOAD_ATTR
200 ref = []
201 g = Disassemble(obj.__code__)
202
203 while True:
204 op, const, var = g.next()
205
206 if op == 'LOAD_GLOBAL':
207 val = _GetAttr(module, var)
208 ref = [var] # reset it
209
210 elif op == 'LOAD_ATTR':
211 #if last_val is not None and isinstance(last_val, types.ModuleType):
212 if last_val is not None:
213 #log('%s %s', op, var)
214 val = _GetAttr(last_val, var)
215 ref.append(var)
216
217 # Crawl the methods below. Otherwise we get duplicate bound/unbound
218 # methods, which have unique addresses.
219 # Examples: WAIT_SPEC.Parse, sys.stdout.write
220
221 # BUG: os.fork and sys.stdout.write are the same?
222 # I thought os.fork is types.BuiltinFunctionType, and
223 # sys.stdout.write is types.BuiltinMethodType, but why not?
224
225 if isinstance(val, (types.MethodType, types.BuiltinMethodType)):
226 val = None
227 ref = []
228
229 else:
230 val = None
231 ref = []
232
233 else: # Some other opcode
234 val = None
235 ref = []
236
237 if callable(val):
238 #log('VAL %s module %s', val, val.__module__)
239 # Recursive call.
240
241 # Check for old style:
242 #if isinstance(val, types.ClassType):
243 # print('OLD %s' % val)
244 _Walk(val, None, ref, syms)
245
246 # If the value is a class, walk its methods. Note that we assume ALL
247 # methods are used. It's possible to narrow this down a bit and detect
248 # unused methods.
249 if isinstance(val, type):
250 cls = val
251 #log('type %s', val)
252 for name in dir(val):
253 # prevent error with __abstractmethods__ attribute
254 #if name.startswith('__'):
255 if name == '__abstractmethods__':
256 continue
257 field_val = getattr(val, name)
258 #log('field_val %s', field_val)
259 if isinstance(field_val, types.MethodType):
260 func_obj = field_val.im_func
261 _Walk(func_obj, cls, ref, syms)
262
263 last_val = val # Used on next iteration
264
265 except StopIteration:
266 pass
267
268 #log('\tDone _Walk %s %s', obj, module)
269
270
271def PrintSig(fmt, func):
272 if os.getenv('CALLGRAPH_SIG') != '1':
273 return
274 try:
275 argspec = inspect.getargspec(func)
276 except TypeError:
277 return
278 parts = [':%s' % ' '.join(argspec.args)]
279 # These are keyword-only args?
280 if argspec.varargs:
281 parts.append('varargs=%s' % (argspec.varargs,))
282 if argspec.keywords:
283 parts.append('kw=%s' % (argspec.keywords,))
284 # Hm the default of 'None' is bad -- you can't distinguish a default of None,
285 # which is very common!
286 # It's better to get this by parsing the AST.
287 if argspec.defaults and any(d is not None for d in argspec.defaults):
288 parts.append('defaults=%s' % (argspec.defaults,))
289
290 print(fmt % ' '.join(parts))
291
292
293class Class(object):
294
295 def __init__(self, cls, mod_name, mod_path):
296 self.cls = cls
297 self.mod_name = mod_name
298 self.mod_path = mod_path
299 self.methods = []
300
301 def Name(self):
302 return self.cls.__name__
303
304 def AddMethod(self, m, mod_name, mod_path, line_num):
305 # Just assume the method is in the same file as the class itself.
306 self.methods.append((m, mod_name, mod_path, line_num))
307
308 def Print(self):
309 base_names = ' '.join(c.__name__ for c in self.cls.__bases__)
310 print(' %s(%s)' % (self.cls.__name__, base_names))
311
312 methods = [(m.__name__, m) for (m, _, _, _) in self.methods]
313 methods.sort()
314 for name, m in methods:
315 print(' %s' % name)
316 PrintSig(' %s', m)
317
318 def __repr__(self):
319 return '<Class %s %s %s %s>' % (
320 self.cls, self.mod_name, self.mod_path, self.methods)
321
322
323class Symbols(object):
324 """A sink for discovered symbols."""
325
326 def __init__(self):
327 self.seen = set()
328
329 self.classes = {} # integer id() -> Class
330 self.functions = [] # list of callables
331
332 self.paths = {} # path -> list of functions
333
334 def Seen(self, c):
335 """c: a callable."""
336 id_ = id(c)
337 if id_ in self.seen:
338 return True
339 self.seen.add(id_)
340 return False
341
342 def Add(self, obj, cls, ref, mod_name, mod_path, line_num):
343 """Could be a function, class Constructor, or method.
344 Can also be native (C) or interpreted (Python with __code__ attribute.)
345
346 Returns:
347 True if we haven't yet seen it.
348 """
349 if mod_path is not None:
350 mod_path = os.path.normpath(mod_path)
351
352 if isinstance(obj, type):
353 id_ = id(obj)
354
355 # NOTE: Python's classes don't have a __code__ object, which appears to
356 # be an irregularity. So we have to get location information from the
357 # METHODS.
358
359 # Exception: this type has a __code__ object that is not types.CodeType
360 if obj is not types.FunctionType:
361 assert not hasattr(obj, '__code__'), obj
362 #assert path is None
363 assert line_num is None
364
365 self.classes[id_] = Class(obj, mod_name, mod_path)
366
367 elif cls is not None:
368 id_ = id(cls)
369 descriptor = self.classes[id_]
370 descriptor.AddMethod(obj, mod_name, mod_path, line_num)
371
372 else:
373 self.functions.append((obj, ref, mod_name, mod_path, line_num))
374
375 return True
376
377 def Report(self, f=sys.stdout):
378 # Now categorize into files. We couldn't do that earlier because classes
379 # don't know where they are located!
380
381 py_srcs = collections.defaultdict(SourceFile)
382 c_srcs = collections.defaultdict(SourceFile)
383
384 for func, ref, mod_name, mod_path, line_num in self.functions:
385 if mod_path:
386 py_srcs[mod_path].functions.append((func, ref, line_num))
387 else:
388 c_srcs[mod_name].functions.append((func, ref, line_num))
389
390 for cls in self.classes.values():
391 #if cls.cls.__name__ == 'lex_mode_e':
392 # log('!!! %s', cls)
393
394 if cls.mod_path:
395 py_srcs[cls.mod_path].classes.append(cls)
396 else:
397 c_srcs[cls.mod_name].classes.append(cls)
398
399 prefix = os.getcwd()
400 n = len(prefix) + 1
401
402 #for name in sorted(py_srcs):
403 # print(name)
404
405 #return
406
407 # Still missing: non-enum ASDL types? Why? CompoundObj?
408 # command_e is there, but command and SimpleCommand aren't.
409 # it's because we do
410 # ast.command_e vs. ast.SimpleCommand
411 # in both cases ast is a core.meta _AsdlModule?
412
413 print('PYTHON CODE')
414 print()
415
416 for path in sorted(py_srcs):
417 src = py_srcs[path]
418
419 if path is not None and path.startswith(prefix):
420 path = path[n:]
421
422 print('%s' % path)
423
424 for func, ref, _ in src.functions:
425 third = func
426 #third = ''
427 #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
428 try:
429 print(' %s' % func.__name__)
430 except AttributeError:
431 # In the CI, when we don't have fastlex, we get _MatchOshToken_Slow()
432 # instances
433 print(" NOTE: %s found at top level, but it's not a simple function" % func)
434
435 PrintSig(' %s', func)
436
437 classes = [(c.Name(), c) for c in src.classes]
438 classes.sort()
439 for c in src.classes:
440 c.Print()
441
442 print()
443
444 print('NATIVE CODE')
445 print()
446
447 for mod_name in sorted(c_srcs):
448 src = c_srcs[mod_name]
449
450 print('%s' % mod_name)
451
452 for func, ref, _ in src.functions:
453 #third = func
454 third = ''
455 #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
456 print(' %s' % func.__name__)
457
458 classes = [(c.Name(), c) for c in src.classes]
459 classes.sort()
460 for c in src.classes:
461 c.Print()
462
463 print()
464
465
466class SourceFile(object):
467
468 def __init__(self):
469 self.classes = []
470 self.functions = []
471
472
473def Walk(main, modules):
474 """Given a function main, finds all functions it transitively calls.
475
476 Uses heuristic bytecode analysis. Limitations:
477
478 - functions that are local variables might not work? But this should work:
479
480 if x > 0:
481 f = GlobalFunc
482 else:
483 f = OtherGlobalFunc
484 f() # The LOAD_GLOBAL will be found.
485
486 Args:
487 main: function
488 modules: Dict[str, module]
489 """
490 syms = Symbols()
491 _Walk(main, None, ['main'], syms)
492
493 # TODO:
494 # - co_consts should be unified? So we know how big the const pool is.
495
496 syms.Report()
497