OILS / opy / resolve.py View on Github | oils.pub

270 lines, 157 significant
1"""
2resolve.py
3
4TODO: Instead of pickling everything separately, copy sys.modules
5into a dict of dicts.
6
7{ 'osh.cmd_parse': { ... }, # dict instead of a module
8 'osh.bool_parse': { ... },
9}
10Then I think the sharing will work.
11
12- Hook this up to oheap2.py?
13 - Instead of only pickling code objects (with string/tuple/int), you can
14 pickle a graph of user-defined classes after running? Just like pickle.py
15 does it.
16 - Well I was thinking of doing that INSIDE OVM2, rather than in CPython. But
17 either way would work.
18
19- Combine callgraph.py and this module?
20 - resolve.py find all functions/classes/globals via sys.modules
21 - callgraph.py finds all of them via use in the bytecode
22
23- We should produce a unified report and double check.
24- Might also need to combine them with build/cpython-defs.py.
25 - We also need the mapping from filenames to modules, which is really in the
26 build system. _build/oil/module_init.c has the names and extern
27 declarations. We could manually make a list.
28"""
29from __future__ import print_function
30
31import sys
32import pickle
33import copy_reg # dependency of pickle, exclude it
34import types
35
36from opy.util import log
37from pylib import unpickle
38
39import __builtin__ # this is not __builtins__
40
41
42def banner(msg):
43 log('')
44 log(msg)
45 log('')
46
47
48def PrintVars(global_vars):
49 banner('VARIABLES')
50 global_vars.sort() # sort by module name
51
52 import collections
53 type_hist = collections.Counter()
54
55 # 316 globals / constants (513 before deduping)
56 for (mod_name, name, obj) in global_vars:
57 log('%-15s %-15s %r', mod_name, name, obj)
58 type_hist[str(type(obj))] += 1
59
60 # ID_SPEC is in core/meta and frontend/lex
61 for (mod_name, name, obj) in global_vars:
62 if 'IdSpec' in str(type(obj)):
63 log('%-20s %-15s %r', mod_name, name, obj)
64 log('')
65
66 return type_hist
67
68
69def PrintFuncs(funcs):
70 banner('FUNCTIONS')
71 funcs.sort() # sort by module name
72
73 import collections
74 type_hist = collections.Counter()
75
76 # 316 globals / constants (513 before deduping)
77 for (mod_name, name, obj) in funcs:
78 log('%-20s %-15s %r', mod_name, name, obj)
79
80
81OMITTED = (
82 '__class__', '__dict__', '__doc__', '__getattribute__', '__module__',
83 '__reduce__', '__slots__', '__subclasshook__')
84
85def PrintClasses(classes):
86 banner('CLASSES')
87
88 classes.sort() # sort by module name
89
90 import collections
91 type_hist = collections.Counter()
92
93 num_methods = 0
94
95 # Keep ALL unbound methods, so that we force them to have different IDs!
96 # https://stackoverflow.com/questions/13348031/ids-of-bound-and-unbound-method-objects-sometimes-the-same-for-different-o
97 # If we remove this, then the de-duping doesn't work properly. unbound
98 # methods use the silly descriptor protocol.
99
100 all_unbound = []
101 seen_ids = set()
102
103 # 316 globals / constants (513 before deduping)
104 for (mod_name, name, obj) in classes:
105 log('%-20s %-15s %r', mod_name, name, obj)
106 names = []
107 for name in dir(obj):
108 if name in OMITTED:
109 continue
110
111 f = getattr(obj, name)
112 all_unbound.append(f)
113
114 id_ = id(f)
115 if id_ in seen_ids and not isinstance(f, (bool, int, types.NoneType)):
116 #log('skipping %s = %s with id %d', name, f, id_)
117 continue
118 #pass
119 seen_ids.add(id_)
120
121 type_hist[str(type(f))] += 1
122
123 #log('%s %s' , f, type(f))
124 # There are A LOT of other types. Classes are complicated.
125 if isinstance(f, types.MethodType):
126 names.append(name)
127
128 # user-defined class attributes shouldn't be used
129 # None is the tag for SimpleObj.
130 if isinstance(f, (bool, int, long, tuple, list, dict, set, str, type, types.NoneType)):
131 log(' (C) %s %s', name, f)
132
133 names.sort()
134 for n in names:
135 log(' %s', n)
136 num_methods += 1
137
138 return num_methods, type_hist
139
140
141def Walk(mod_dict):
142 """
143 Test if the objects in Oil can be pickled.
144 """
145 #print(sys.modules)
146 #d = dict(sys.modules) # prevent copies
147
148 # vars that aren't not classes or functions, which are presumed to be
149 # constant
150 global_vars = []
151 classes = [] # user-defined classes
152 funcs = [] # functions
153
154 seen_ids = set() # id
155
156 num_objects = 0
157 num_modules = 0
158 n = 0
159 for mod_name, mod in mod_dict.iteritems():
160 if mod is pickle:
161 continue
162 if mod is copy_reg:
163 continue
164 if mod is unpickle:
165 continue
166 if mod is sys: # get rid of it
167 continue
168 if mod is types: # lots of stuff here no
169 continue
170
171 names = dir(mod)
172 log('mod %s', mod)
173 for name in names:
174 if not name.startswith('__'):
175 obj = getattr(mod, name)
176
177 id_ = id(obj)
178 if id_ in seen_ids:
179 continue
180 seen_ids.add(id_)
181
182 log('%r = %r', name, obj)
183 if isinstance(obj, types.ModuleType): # e.g. ASDL modules
184 continue
185 if isinstance(obj, types.FileType): # types_asdl.pickle is opened
186 continue
187 if name == 'Struct': # struct module
188 continue
189 if name in ('InputType', 'OutputType', 'cStringIO_CAPI'): # cStringIO
190 continue
191 if name in ('_pattern_type',): # re
192 continue
193 if obj is __builtin__.Ellipsis:
194 continue
195 if obj is __builtin__.NotImplemented:
196 continue
197 if obj is types.BuiltinFunctionType:
198 continue
199 if obj is types.ClassType:
200 continue
201 if obj is mod_dict: # circular!
202 continue
203
204 s = pickle.dumps(obj)
205
206 # NOTE: this could be inefficient because it's a graph, not a tree.
207 n += len(s)
208 log('%d bytes', n)
209 num_objects += 1
210
211 if name == 'print_function':
212 continue
213 # sys.modules gets polluted because of pickle, etc.
214 # get rid of _sre, _warnings, etc.
215 # still might want _struct
216 if (mod_name.startswith('_') and not mod_name.startswith('_devbuild')
217 or mod_name in ('codecs', 'encodings', 'encodings.aliases', 're',
218 'sre_constants', 'sre_compile', 'sre_parse')):
219 continue
220
221 # user-defined types, not types.ClassType which is old-style
222 # builtin functions can't be compiled.
223 #types_to_compile = (types.FunctionType, types.BuiltinFunctionType, type)
224 if isinstance(obj, types.BuiltinFunctionType):
225 continue # cannot be compiled, not a constant
226
227 if isinstance(obj, types.FunctionType):
228 funcs.append((mod_name, name, obj))
229 continue
230 if isinstance(obj, type):
231 classes.append((mod_name, name, obj))
232 continue
233
234 global_vars.append((mod_name, name, obj))
235
236 num_modules += 1
237
238 log('Pickled %d objects in %d modules', num_objects, num_modules)
239 log('')
240
241 if 0:
242 var_type_hist = PrintVars(global_vars)
243 log('')
244
245 num_methods, attr_type_hist = PrintClasses(classes)
246 log('')
247
248 if 0:
249 PrintFuncs(funcs)
250 log('')
251
252 if 0:
253 log('Global variable types:')
254 for type_str, count in var_type_hist.most_common():
255 log('%10d %s', count, type_str)
256 log('')
257
258 log('Class attribute types:')
259 for type_str, count in attr_type_hist.most_common():
260 log('%10d %s', count, type_str)
261 log('')
262
263 # Audit what's at the top level. int, dict, str, list are most common, then
264 # FlagSpec.
265 log('%d global vars', len(global_vars))
266 log('%d user-defined classes, with %d total methods on them', len(classes),
267 num_methods)
268 log('%d user-defined functions', len(funcs))
269
270