OILS / frontend / consts_gen.py View on Github | oilshell.org

644 lines, 331 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9consts_gen.py - Code generation for consts.py, id_kind_def.py, etc.
10"""
11from __future__ import print_function
12
13import collections
14import os
15import sys
16
17from asdl import gen_cpp
18from mycpp.mylib import log
19from frontend import id_kind_def
20from frontend import builtin_def
21from frontend import option_def
22
23
24def _CreateModule(id_spec, ids):
25 """Create a SYNTHETIC ASDL module to generate code from."""
26 from asdl import ast
27
28 id_variants = [ast.Constructor(name) for name, _ in ids]
29 # Pack it in 16 bits
30 id_sum = ast.SimpleSum(id_variants,
31 generate=['uint16', 'no_namespace_suffix'])
32
33 kind_variants = [ast.Constructor(name) for name in id_spec.kind_name_list]
34 kind_sum = ast.SimpleSum(kind_variants, generate=['no_namespace_suffix'])
35
36 # Id = Word_Compound | Arith_Semi | Arith_Comma | ...
37 id_ = ast.TypeDecl('Id', id_sum)
38 kind_ = ast.TypeDecl('Kind', kind_sum)
39
40 schema_ast = ast.Module('id_kind', [], [id_, kind_])
41 return schema_ast
42
43
44_BUILTINS = builtin_def.All()
45
46
47def GenBuiltinLookup(func_name, kind, f):
48 #log('%r %r', func_name, kind)
49
50 pairs = [(b.name, b.index) for b in _BUILTINS if b.kind == kind]
51
52 GenStringLookup('builtin_t', func_name, pairs, f)
53
54
55def GenStringLookup(type_name, func_name, pairs, f):
56 #log('%s', pairs)
57
58 groups = collections.defaultdict(list)
59 for name, index in pairs:
60 first_char = name[0]
61 groups[first_char].append((name, index))
62
63 if 0:
64 for first_char, pairs in groups.iteritems():
65 log('%s %d', first_char, len(pairs))
66 log('%s', pairs)
67
68 # Note: we could optimize the length check, e.g. have a second level
69 # switch. But we would need to measure the difference. Caching the id on
70 # AST nodes is probably a bigger win, e.g. for loops.
71 #
72 # Size optimization: don't repeat constants literally?
73
74 f.write("""\
75%s %s(BigStr* s) {
76 int length = len(s);
77 if (length == 0) return 0; // consts.NO_INDEX
78
79 const char* data = s->data_;
80 switch (data[0]) {
81""" % (type_name, func_name))
82
83 for first_char in sorted(groups):
84 pairs = groups[first_char]
85 f.write(" case '%s':\n" % first_char)
86 for name, index in pairs:
87 # NOTE: we have to check the length because they're not NUL-terminated
88 f.write('''\
89 if (length == %d && memcmp("%s", data, %d) == 0) return %d;
90''' % (len(name), name, len(name), index))
91 f.write(' break;\n')
92
93 f.write("""\
94 }
95
96 return 0; // consts.NO_INDEX
97}
98
99""")
100
101
102def GenIntStrLookup(func_name, int2str, f):
103 # NOTE: quoting doesn't work, strings must be Identifier Names here
104
105 for i in sorted(int2str):
106 s = int2str[i]
107 f.write('GLOBAL_STR(k%s_%d, "%s");\n' % (func_name, i, s))
108
109 f.write("""\
110
111BigStr* %s(int i) {
112 switch (i) {
113""" % func_name)
114
115 for i in sorted(int2str):
116 s = int2str[i]
117 f.write(' case %d:\n' % i)
118 f.write(' return k%s_%d;\n' % (func_name, i))
119 f.write(' break;\n')
120 f.write("""\
121 default:
122 FAIL(kShouldNotGetHere);
123 }
124}
125
126""")
127
128
129def GenStringMembership(func_name, strs, f):
130 groups = collections.defaultdict(list)
131 for s in strs:
132 first_char = s[0]
133 groups[first_char].append(s)
134
135 f.write("""\
136bool %s(BigStr* s) {
137 int length = len(s);
138 if (length == 0) return false;
139
140 const char* data = s->data_;
141 switch (data[0]) {
142""" % func_name)
143
144 for first_char in sorted(groups):
145 strs = groups[first_char]
146 f.write(" case '%s':\n" % first_char)
147 for s in strs:
148 # NOTE: we have to check the length because they're not NUL-terminated
149 f.write('''\
150 if (length == %d && memcmp("%s", data, %d) == 0) return true;
151''' % (len(s), s, len(s)))
152 f.write(' break;\n')
153
154 f.write("""\
155 }
156
157 return false;
158}
159
160""")
161
162
163C_CHAR = {
164 # '\'' is a single quote in C
165 "'": "\\'",
166 '"': '\\"',
167 '\\': "\\\\",
168 '\t': '\\t',
169 '\r': '\\r',
170 '\n': '\\n',
171 '\v': '\\v',
172 '\0': '\\0',
173 '\a': '\\a',
174 '\b': '\\b',
175 '\f': '\\f',
176 '\x1b': '\\x1b',
177}
178
179
180def CChar(c):
181 return C_CHAR.get(c, c)
182
183
184def GenCharLookup(func_name, lookup, f, required=False):
185 f.write("""\
186BigStr* %s(BigStr* c) {
187 assert(len(c) == 1);
188
189 char ch = c->data_[0];
190
191 // TODO-intern: return value
192 switch (ch) {
193""" % func_name)
194
195 for char_code in sorted(lookup):
196 f.write(" case '%s':\n" % CChar(char_code))
197 f.write(' return StrFromC("%s", 1);\n' % CChar(lookup[char_code]))
198 f.write(" break;\n")
199
200 f.write(" default:\n")
201 if required:
202 f.write(" assert(0);\n")
203 else:
204 f.write(" return nullptr;\n")
205
206 f.write("""
207 }
208}
209""")
210
211
212def GenStrList(l, name, out):
213 element_globals = []
214 for i, elem in enumerate(l):
215 global_name = "k%s_%d" % (name, i)
216 out('GLOBAL_STR(%s, "%s");', global_name, elem)
217 element_globals.append(global_name)
218
219 lit = ' COMMA '.join(element_globals)
220 out('GLOBAL_LIST(%s, BigStr*, %d, {%s});\n', name, len(l), lit)
221
222
223def main(argv):
224 try:
225 action = argv[1]
226 except IndexError:
227 raise RuntimeError('Action required')
228
229 # TODO: Remove duplication in core/meta.py
230 ID_TO_KIND = {}
231 BOOL_ARG_TYPES = {}
232 TEST_UNARY_LOOKUP = {}
233 TEST_BINARY_LOOKUP = {}
234 TEST_OTHER_LOOKUP = {}
235
236 ID_SPEC = id_kind_def.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES)
237
238 id_kind_def.AddKinds(ID_SPEC)
239 id_kind_def.AddBoolKinds(ID_SPEC) # must come second
240
241 id_kind_def.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP,
242 TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP)
243
244 ids = ID_SPEC.id_str2int.items()
245 ids.sort(key=lambda pair: pair[1]) # Sort by ID
246
247 if action == 'c':
248 for name, id_int in ids:
249 print('#define id__%s %s' % (name, id_int))
250
251 elif action == 'cpp':
252 schema_ast = _CreateModule(ID_SPEC, ids)
253
254 out_prefix = argv[2]
255
256 with open(out_prefix + '.h', 'w') as f:
257 f.write("""\
258#ifndef ID_KIND_ASDL_H
259#define ID_KIND_ASDL_H
260
261#include <stdint.h> // uint16_t
262
263class BigStr;
264
265namespace id_kind_asdl {
266
267#define ASDL_NAMES struct
268""")
269
270 v = gen_cpp.ClassDefVisitor(f)
271 v.VisitModule(schema_ast)
272
273 f.write("""
274} // namespace id_kind_asdl
275
276#endif // ID_KIND_ASDL_H
277""")
278
279 with open(out_prefix + '.cc', 'w') as f:
280 f.write("""\
281#include <assert.h>
282#include "_gen/frontend/id_kind.asdl.h"
283#include "mycpp/gc_alloc.h" // StrFromC()
284
285namespace id_kind_asdl {
286
287""")
288
289 v = gen_cpp.MethodDefVisitor(f)
290
291 v.VisitModule(schema_ast)
292
293 f.write('} // namespace id_kind_asdl\n')
294
295 elif action == 'mypy':
296 from asdl import gen_python
297
298 schema_ast = _CreateModule(ID_SPEC, ids)
299 #print(schema_ast)
300
301 f = sys.stdout
302
303 f.write("""\
304from asdl import pybase
305
306""")
307 # Minor style issue: we want Id and Kind, not Id_e and Kind_e
308 v = gen_python.GenMyPyVisitor(f)
309 v.VisitModule(schema_ast)
310
311 elif action == 'cpp-consts':
312
313 # Break circular deps
314
315 from core import pyutil
316 from frontend import consts
317 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
318 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
319
320 LIST_INT = [
321 'STRICT_ALL',
322 'YSH_UPGRADE',
323 'YSH_ALL',
324 'DEFAULT_TRUE',
325 'PARSE_OPTION_NUMS',
326 'SHOPT_OPTION_NUMS',
327 'SET_OPTION_NUMS',
328 'VISIBLE_SHOPT_NUMS',
329 ]
330
331 prefix = argv[2]
332
333 with open(prefix + '.h', 'w') as f:
334
335 def out(fmt, *args):
336 print(fmt % args, file=f)
337
338 out("""\
339#ifndef CONSTS_H
340#define CONSTS_H
341
342#include "mycpp/runtime.h"
343
344#include "_gen/frontend/id_kind.asdl.h"
345#include "_gen/frontend/option.asdl.h"
346#include "_gen/core/runtime.asdl.h"
347#include "_gen/frontend/types.asdl.h"
348
349namespace consts {
350""")
351
352 for name in LIST_INT:
353 out('extern List<int>* %s;', name)
354
355 out('extern List<BigStr*>* BUILTIN_NAMES;')
356 out('extern List<BigStr*>* OSH_KEYWORD_NAMES;')
357 out('extern List<BigStr*>* SET_OPTION_NAMES;')
358 out('extern List<BigStr*>* SHOPT_OPTION_NAMES;')
359
360 out("""\
361
362extern int NO_INDEX;
363
364extern BigStr* gVersion;
365
366int RedirDefaultFd(id_kind_asdl::Id_t id);
367types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id);
368types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id);
369id_kind_asdl::Kind GetKind(id_kind_asdl::Id_t id);
370
371types_asdl::opt_group_t OptionGroupNum(BigStr* s);
372option_asdl::option_t OptionNum(BigStr* s);
373option_asdl::builtin_t LookupNormalBuiltin(BigStr* s);
374option_asdl::builtin_t LookupAssignBuiltin(BigStr* s);
375option_asdl::builtin_t LookupSpecialBuiltin(BigStr* s);
376bool IsControlFlow(BigStr* s);
377BigStr* ControlFlowName(int i);
378bool IsKeyword(BigStr* s);
379BigStr* LookupCharC(BigStr* c);
380BigStr* LookupCharPrompt(BigStr* c);
381
382BigStr* OptionName(option_asdl::option_t opt_num);
383
384Tuple2<runtime_asdl::state_t, runtime_asdl::emit_t> IfsEdge(runtime_asdl::state_t state, runtime_asdl::char_kind_t ch);
385
386extern BigStr* ASSIGN_ARG_RE;
387extern BigStr* TEST_V_RE;
388
389} // namespace consts
390
391#endif // CONSTS_H
392""")
393 with open(prefix + '.cc', 'w') as f:
394
395 def out(fmt, *args):
396 print(fmt % args, file=f)
397
398 out("""\
399#include "_gen/frontend/consts.h"
400
401using id_kind_asdl::Id;
402using id_kind_asdl::Kind;
403using types_asdl::redir_arg_type_e;
404using types_asdl::bool_arg_type_e;
405using option_asdl::builtin_t;
406
407namespace consts {
408
409int NO_INDEX = 0; // duplicated from frontend/consts.py
410""")
411
412 # Generate gVersion, which is read by pyutil::GetVersion()
413 this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
414 root_dir = os.path.join(this_dir, '..') # ~/git/oilshell/oil
415 loader = pyutil._FileResourceLoader(root_dir)
416
417 version_str = pyutil.GetVersion(loader)
418 out('GLOBAL_STR(gVersion, "%s");' % version_str)
419 out('')
420
421 # Note: could use opt_num:: instead of raw ints
422 for name in LIST_INT:
423 val = getattr(consts, name)
424 val_str = ' COMMA '.join(str(i) for i in val)
425 out('GLOBAL_LIST(%s, int, %d, {%s});', name, len(val), val_str)
426
427 out("""\
428
429int RedirDefaultFd(id_kind_asdl::Id_t id) {
430 // relies on "switch lowering"
431 switch (id) {
432""")
433 for id_ in sorted(consts.REDIR_DEFAULT_FD):
434 a = Id_str(id_).replace('.', '::')
435 b = consts.REDIR_DEFAULT_FD[id_]
436 out(' case %s: return %s;' % (a, b))
437 out("""\
438 }
439 FAIL(kShouldNotGetHere);
440}
441""")
442
443 out("""\
444types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id) {
445 // relies on "switch lowering"
446 switch (id) {
447""")
448 for id_ in sorted(consts.REDIR_ARG_TYPES):
449 a = Id_str(id_).replace('.', '::')
450 # redir_arg_type_e::Path, etc.
451 b = redir_arg_type_str(consts.REDIR_ARG_TYPES[id_]).replace(
452 '.', '_e::')
453 out(' case %s: return %s;' % (a, b))
454 out("""\
455 }
456 FAIL(kShouldNotGetHere);
457}
458""")
459
460 out("""\
461types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id) {
462 // relies on "switch lowering"
463 switch (id) {
464""")
465 for id_ in sorted(BOOL_ARG_TYPES):
466 a = Id_str(id_).replace('.', '::')
467 # bool_arg_type_e::BigStr, etc.
468 b = bool_arg_type_str(BOOL_ARG_TYPES[id_]).replace('.', '_e::')
469 out(' case %s: return %s;' % (a, b))
470 out("""\
471 }
472 FAIL(kShouldNotGetHere);
473}
474""")
475
476 out("""\
477Kind GetKind(id_kind_asdl::Id_t id) {
478 // relies on "switch lowering"
479 switch (id) {
480""")
481 for id_ in sorted(ID_TO_KIND):
482 a = Id_str(id_).replace('.', '::')
483 b = Kind_str(ID_TO_KIND[id_]).replace('.', '::')
484 out(' case %s: return %s;' % (a, b))
485 out("""\
486 }
487 FAIL(kShouldNotGetHere);
488}
489""")
490
491 pairs = consts.OPTION_GROUPS.items()
492 GenStringLookup('types_asdl::opt_group_t', 'OptionGroupNum', pairs,
493 f)
494
495 pairs = [(opt.name, opt.index) for opt in option_def.All()]
496 GenStringLookup('option_asdl::option_t', 'OptionNum', pairs, f)
497
498 GenBuiltinLookup('LookupNormalBuiltin', 'normal', f)
499 GenBuiltinLookup('LookupAssignBuiltin', 'assign', f)
500 GenBuiltinLookup('LookupSpecialBuiltin', 'special', f)
501
502 GenStringMembership('IsControlFlow', consts._CONTROL_FLOW_NAMES, f)
503 GenIntStrLookup('ControlFlowName', consts._CONTROL_FLOW_LOOKUP, f)
504
505 GenStringMembership('IsKeyword', consts.OSH_KEYWORD_NAMES, f)
506
507 GenCharLookup('LookupCharC', consts._ONE_CHAR_C, f, required=True)
508 GenCharLookup('LookupCharPrompt', consts._ONE_CHAR_PROMPT, f)
509
510 opt_int2str = {}
511 for opt in option_def.All():
512 opt_int2str[opt.index] = opt.name
513 GenIntStrLookup('OptionName', opt_int2str, f)
514
515 #
516 # Generate a tightly packed 2D array for C, from a Python dict.
517 #
518
519 edges = consts._IFS_EDGES
520 max_state = max(edge[0] for edge in edges)
521 max_char_kind = max(edge[1] for edge in edges)
522
523 edge_array = []
524 for i in xrange(max_state + 1):
525 # unused cells get -1
526 edge_array.append(['-1'] * (max_char_kind + 1))
527
528 for i in xrange(max_state + 1):
529 for j in xrange(max_char_kind + 1):
530 entry = edges.get((i, j))
531 if entry is not None:
532 # pack (new_state, action) into 32 bits
533 edge_array[i][j] = '(%d<<16)|%d' % entry
534
535 parts = []
536 for i in xrange(max_state + 1):
537 parts.append(' {')
538 parts.append(', '.join('%10s' % cell
539 for cell in edge_array[i]))
540 parts.append(' },\n')
541
542 out("""\
543int _IFS_EDGE[%d][%d] = {
544%s
545};
546""" % (max_state + 1, max_char_kind + 1, ''.join(parts)))
547
548 out("""\
549// Note: all of these are integers, e.g. state_i, emit_i, char_kind_i
550using runtime_asdl::state_t;
551using runtime_asdl::emit_t;
552using runtime_asdl::char_kind_t;
553
554Tuple2<state_t, emit_t> IfsEdge(state_t state, runtime_asdl::char_kind_t ch) {
555 int cell = _IFS_EDGE[state][ch];
556 state_t new_state = cell >> 16;
557 emit_t emit = cell & 0xFFFF;
558 return Tuple2<state_t, emit_t>(new_state, emit);
559}
560""")
561
562 GenStrList(consts.BUILTIN_NAMES, 'BUILTIN_NAMES', out)
563 GenStrList(consts.OSH_KEYWORD_NAMES, 'OSH_KEYWORD_NAMES', out)
564 GenStrList(consts.SET_OPTION_NAMES, 'SET_OPTION_NAMES', out)
565 GenStrList(consts.SHOPT_OPTION_NAMES, 'SHOPT_OPTION_NAMES', out)
566
567 def _CString(s):
568 # Hack that does backslash escaping, e.g. \\
569 # We could also use C++ strings
570 import json
571 return json.dumps(s)
572
573 GLOBAL_STRINGS = [
574 'ASSIGN_ARG_RE',
575 'TEST_V_RE',
576 ]
577 for var_name in GLOBAL_STRINGS:
578 out('GLOBAL_STR(%s, %s);', var_name,
579 _CString(getattr(consts, var_name)))
580
581 out("""\
582} // namespace consts
583""")
584
585 elif action == 'py-consts':
586 # It's kind of weird to use the generated code to generate more code.
587 # Can we do this instead with the parsed module for "id" and "types.asdl"?
588
589 from frontend import consts
590 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
591 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
592
593 print("""
594from _devbuild.gen.id_kind_asdl import Id, Kind
595from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e
596""")
597
598 print('')
599 print('BOOL_ARG_TYPES = {')
600 for id_ in sorted(BOOL_ARG_TYPES):
601 v = BOOL_ARG_TYPES[id_]
602 # HACK
603 v = bool_arg_type_str(v).replace('.', '_e.')
604 print(' %s: %s,' % (Id_str(id_), v))
605 print('}')
606
607 print('')
608 print('TEST_UNARY_LOOKUP = {')
609 for op_str in sorted(TEST_UNARY_LOOKUP):
610 v = Id_str(TEST_UNARY_LOOKUP[op_str])
611 print(' %r: %s,' % (op_str, v))
612 print('}')
613
614 print('')
615 print('TEST_BINARY_LOOKUP = {')
616 for op_str in sorted(TEST_BINARY_LOOKUP):
617 v = Id_str(TEST_BINARY_LOOKUP[op_str])
618 print(' %r: %s,' % (op_str, v))
619 print('}')
620
621 print('')
622 print('TEST_OTHER_LOOKUP = {')
623 for op_str in sorted(TEST_OTHER_LOOKUP):
624 v = Id_str(TEST_OTHER_LOOKUP[op_str])
625 print(' %r: %s,' % (op_str, v))
626 print('}')
627
628 print('')
629 print('ID_TO_KIND = {')
630 for id_ in sorted(ID_TO_KIND):
631 v = Kind_str(ID_TO_KIND[id_])
632 print(' %s: %s,' % (Id_str(id_), v))
633 print('}')
634
635 else:
636 raise RuntimeError('Invalid action %r' % action)
637
638
639if __name__ == '__main__':
640 try:
641 main(sys.argv)
642 except RuntimeError as e:
643 print('FATAL: %s' % e, file=sys.stderr)
644 sys.exit(1)