OILS / frontend / consts_gen.py View on Github | oilshell.org

651 lines, 336 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9consts_gen.py - Code generation for consts.py, id_kind_def.py, etc.
10"""
11from __future__ import print_function
12
13import collections
14import os
15import sys
16
17from asdl import gen_cpp
18from mycpp.mylib import log
19from frontend import id_kind_def
20from frontend import builtin_def
21from frontend import option_def
22
23
24def _CreateModule(id_spec, ids):
25 """Create a SYNTHETIC ASDL module to generate code from."""
26 from asdl import ast
27
28 id_variants = [ast.Constructor(name) for name, _ in ids]
29 # Pack it in 16 bits
30 id_sum = ast.SimpleSum(id_variants,
31 generate=['uint16', 'no_namespace_suffix'])
32
33 kind_variants = [ast.Constructor(name) for name in id_spec.kind_name_list]
34 kind_sum = ast.SimpleSum(kind_variants, generate=['no_namespace_suffix'])
35
36 # Id = Word_Compound | Arith_Semi | Arith_Comma | ...
37 id_ = ast.TypeDecl('Id', id_sum)
38 kind_ = ast.TypeDecl('Kind', kind_sum)
39
40 schema_ast = ast.Module('id_kind', [], [id_, kind_])
41 return schema_ast
42
43
44_BUILTINS = builtin_def.All()
45
46
47def GenBuiltinLookup(func_name, kind, f):
48 #log('%r %r', func_name, kind)
49
50 pairs = [(b.name, b.index) for b in _BUILTINS if b.kind == kind]
51
52 GenStringLookup('builtin_t', func_name, pairs, f)
53
54
55def GenStringLookup(type_name, func_name, pairs, f):
56 #log('%s', pairs)
57
58 groups = collections.defaultdict(list)
59 for name, index in pairs:
60 first_char = name[0]
61 groups[first_char].append((name, index))
62
63 if 0:
64 for first_char, pairs in groups.iteritems():
65 log('%s %d', first_char, len(pairs))
66 log('%s', pairs)
67
68 # Note: we could optimize the length check, e.g. have a second level
69 # switch. But we would need to measure the difference. Caching the id on
70 # AST nodes is probably a bigger win, e.g. for loops.
71 #
72 # Size optimization: don't repeat constants literally?
73
74 f.write("""\
75%s %s(BigStr* s) {
76 int length = len(s);
77 if (length == 0) return 0; // consts.NO_INDEX
78
79 const char* data = s->data_;
80 switch (data[0]) {
81""" % (type_name, func_name))
82
83 for first_char in sorted(groups):
84 pairs = groups[first_char]
85 f.write(" case '%s':\n" % first_char)
86 for name, index in pairs:
87 # NOTE: we have to check the length because they're not NUL-terminated
88 f.write('''\
89 if (length == %d && memcmp("%s", data, %d) == 0) return %d;
90''' % (len(name), name, len(name), index))
91 f.write(' break;\n')
92
93 f.write("""\
94 }
95
96 return 0; // consts.NO_INDEX
97}
98
99""")
100
101
102def GenIntStrLookup(func_name, int2str, f):
103 # NOTE: quoting doesn't work, strings must be Identifier Names here
104
105 for i in sorted(int2str):
106 s = int2str[i]
107 f.write('GLOBAL_STR(k%s_%d, "%s");\n' % (func_name, i, s))
108
109 f.write("""\
110
111BigStr* %s(int i) {
112 switch (i) {
113""" % func_name)
114
115 for i in sorted(int2str):
116 s = int2str[i]
117 f.write(' case %d:\n' % i)
118 f.write(' return k%s_%d;\n' % (func_name, i))
119 f.write(' break;\n')
120 f.write("""\
121 default:
122 FAIL(kShouldNotGetHere);
123 }
124}
125
126""")
127
128
129def GenStringMembership(func_name, strs, f):
130 groups = collections.defaultdict(list)
131 for s in strs:
132 first_char = s[0]
133 groups[first_char].append(s)
134
135 f.write("""\
136bool %s(BigStr* s) {
137 int length = len(s);
138 if (length == 0) return false;
139
140 const char* data = s->data_;
141 switch (data[0]) {
142""" % func_name)
143
144 for first_char in sorted(groups):
145 strs = groups[first_char]
146 f.write(" case '%s':\n" % first_char)
147 for s in strs:
148 # NOTE: we have to check the length because they're not NUL-terminated
149 f.write('''\
150 if (length == %d && memcmp("%s", data, %d) == 0) return true;
151''' % (len(s), s, len(s)))
152 f.write(' break;\n')
153
154 f.write("""\
155 }
156
157 return false;
158}
159
160""")
161
162
163C_CHAR = {
164 # '\'' is a single quote in C
165 "'": "\\'",
166 '"': '\\"',
167 '\\': "\\\\",
168 '\t': '\\t',
169 '\r': '\\r',
170 '\n': '\\n',
171 '\v': '\\v',
172 '\0': '\\0',
173 '\a': '\\a',
174 '\b': '\\b',
175 '\f': '\\f',
176 '\x1b': '\\x1b',
177}
178
179
180def CChar(c):
181 return C_CHAR.get(c, c)
182
183
184def GenCharLookup(func_name, lookup, f, required=False):
185 f.write("""\
186BigStr* %s(BigStr* c) {
187 assert(len(c) == 1);
188
189 char ch = c->data_[0];
190
191 // TODO-intern: return value
192 switch (ch) {
193""" % func_name)
194
195 for char_code in sorted(lookup):
196 f.write(" case '%s':\n" % CChar(char_code))
197 f.write(' return StrFromC("%s", 1);\n' % CChar(lookup[char_code]))
198 f.write(" break;\n")
199
200 f.write(" default:\n")
201 if required:
202 f.write(" assert(0);\n")
203 else:
204 f.write(" return nullptr;\n")
205
206 f.write("""
207 }
208}
209""")
210
211
212def GenStrList(l, name, out):
213 element_globals = []
214 for i, elem in enumerate(l):
215 global_name = "k%s_%d" % (name, i)
216 out('GLOBAL_STR(%s, "%s");', global_name, elem)
217 element_globals.append(global_name)
218
219 lit = ' COMMA '.join(element_globals)
220 out('GLOBAL_LIST(%s, BigStr*, %d, {%s});\n', name, len(l), lit)
221
222
223def main(argv):
224 try:
225 action = argv[1]
226 except IndexError:
227 raise RuntimeError('Action required')
228
229 # TODO: Remove duplication in core/meta.py
230 ID_TO_KIND = {}
231 BOOL_ARG_TYPES = {}
232 TEST_UNARY_LOOKUP = {}
233 TEST_BINARY_LOOKUP = {}
234 TEST_OTHER_LOOKUP = {}
235
236 ID_SPEC = id_kind_def.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES)
237
238 id_kind_def.AddKinds(ID_SPEC)
239 id_kind_def.AddBoolKinds(ID_SPEC) # must come second
240
241 id_kind_def.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP,
242 TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP)
243
244 ids = ID_SPEC.id_str2int.items()
245 ids.sort(key=lambda pair: pair[1]) # Sort by ID
246
247 if action == 'c':
248 for name, id_int in ids:
249 print('#define id__%s %s' % (name, id_int))
250
251 elif action == 'cpp':
252 schema_ast = _CreateModule(ID_SPEC, ids)
253
254 out_prefix = argv[2]
255
256 with open(out_prefix + '.h', 'w') as f:
257 f.write("""\
258#ifndef ID_KIND_ASDL_H
259#define ID_KIND_ASDL_H
260
261#include <stdint.h> // uint16_t
262
263class BigStr;
264
265namespace id_kind_asdl {
266
267#define ASDL_NAMES struct
268""")
269
270 v = gen_cpp.ClassDefVisitor(f)
271 v.VisitModule(schema_ast)
272
273 f.write("""
274} // namespace id_kind_asdl
275
276#endif // ID_KIND_ASDL_H
277""")
278
279 with open(out_prefix + '.cc', 'w') as f:
280 f.write("""\
281#include <assert.h>
282#include "_gen/frontend/id_kind.asdl.h"
283#include "mycpp/gc_alloc.h" // StrFromC()
284
285namespace id_kind_asdl {
286
287""")
288
289 v = gen_cpp.MethodDefVisitor(f)
290
291 v.VisitModule(schema_ast)
292
293 f.write('} // namespace id_kind_asdl\n')
294
295 elif action == 'mypy':
296 from asdl import gen_python
297
298 schema_ast = _CreateModule(ID_SPEC, ids)
299 #print(schema_ast)
300
301 f = sys.stdout
302
303 f.write("""\
304from asdl import pybase
305
306""")
307 # Minor style issue: we want Id and Kind, not Id_e and Kind_e
308 v = gen_python.GenMyPyVisitor(f)
309 v.VisitModule(schema_ast)
310
311 elif action == 'cpp-consts':
312
313 # Break circular deps
314
315 from core import pyutil
316 from frontend import consts
317 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
318 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
319
320 LIST_INT = [
321 'STRICT_ALL',
322 'YSH_UPGRADE',
323 'YSH_ALL',
324 'DEFAULT_TRUE',
325 'PARSE_OPTION_NUMS',
326 'SHOPT_OPTION_NUMS',
327 'SET_OPTION_NUMS',
328 'VISIBLE_SHOPT_NUMS',
329 ]
330
331 prefix = argv[2]
332
333 with open(prefix + '.h', 'w') as f:
334
335 def out(fmt, *args):
336 print(fmt % args, file=f)
337
338 out("""\
339#ifndef CONSTS_H
340#define CONSTS_H
341
342#include "mycpp/runtime.h"
343
344#include "_gen/frontend/id_kind.asdl.h"
345#include "_gen/frontend/option.asdl.h"
346#include "_gen/core/runtime.asdl.h"
347#include "_gen/frontend/types.asdl.h"
348
349namespace consts {
350""")
351
352 for name in LIST_INT:
353 out('extern List<int>* %s;', name)
354
355 out('extern List<BigStr*>* BUILTIN_NAMES;')
356 out('extern List<BigStr*>* OSH_KEYWORD_NAMES;')
357 out('extern List<BigStr*>* SET_OPTION_NAMES;')
358 out('extern List<BigStr*>* SHOPT_OPTION_NAMES;')
359
360 out("""\
361
362extern int NO_INDEX;
363
364extern BigStr* gVersion;
365
366int RedirDefaultFd(id_kind_asdl::Id_t id);
367types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id);
368types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id);
369id_kind_asdl::Kind GetKind(id_kind_asdl::Id_t id);
370
371types_asdl::opt_group_t OptionGroupNum(BigStr* s);
372option_asdl::option_t OptionNum(BigStr* s);
373option_asdl::option_t UnimplOptionNum(BigStr* s);
374option_asdl::builtin_t LookupNormalBuiltin(BigStr* s);
375option_asdl::builtin_t LookupAssignBuiltin(BigStr* s);
376option_asdl::builtin_t LookupSpecialBuiltin(BigStr* s);
377bool IsControlFlow(BigStr* s);
378BigStr* ControlFlowName(int i);
379bool IsKeyword(BigStr* s);
380BigStr* LookupCharC(BigStr* c);
381BigStr* LookupCharPrompt(BigStr* c);
382
383BigStr* OptionName(option_asdl::option_t opt_num);
384
385Tuple2<runtime_asdl::state_t, runtime_asdl::emit_t> IfsEdge(runtime_asdl::state_t state, runtime_asdl::char_kind_t ch);
386
387extern BigStr* ASSIGN_ARG_RE;
388extern BigStr* TEST_V_RE;
389
390} // namespace consts
391
392#endif // CONSTS_H
393""")
394 with open(prefix + '.cc', 'w') as f:
395
396 def out(fmt, *args):
397 print(fmt % args, file=f)
398
399 out("""\
400#include "_gen/frontend/consts.h"
401
402using id_kind_asdl::Id;
403using id_kind_asdl::Kind;
404using types_asdl::redir_arg_type_e;
405using types_asdl::bool_arg_type_e;
406using option_asdl::builtin_t;
407
408namespace consts {
409
410int NO_INDEX = 0; // duplicated from frontend/consts.py
411""")
412
413 # Generate gVersion, which is read by pyutil::GetVersion()
414 this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
415 root_dir = os.path.join(this_dir, '..') # ~/git/oilshell/oil
416 loader = pyutil._FileResourceLoader(root_dir)
417
418 version_str = pyutil.GetVersion(loader)
419 out('GLOBAL_STR(gVersion, "%s");' % version_str)
420 out('')
421
422 # Note: could use opt_num:: instead of raw ints
423 for name in LIST_INT:
424 val = getattr(consts, name)
425 val_str = ' COMMA '.join(str(i) for i in val)
426 out('GLOBAL_LIST(%s, int, %d, {%s});', name, len(val), val_str)
427
428 out("""\
429
430int RedirDefaultFd(id_kind_asdl::Id_t id) {
431 // relies on "switch lowering"
432 switch (id) {
433""")
434 for id_ in sorted(consts.REDIR_DEFAULT_FD):
435 a = Id_str(id_).replace('.', '::')
436 b = consts.REDIR_DEFAULT_FD[id_]
437 out(' case %s: return %s;' % (a, b))
438 out("""\
439 }
440 FAIL(kShouldNotGetHere);
441}
442""")
443
444 out("""\
445types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id) {
446 // relies on "switch lowering"
447 switch (id) {
448""")
449 for id_ in sorted(consts.REDIR_ARG_TYPES):
450 a = Id_str(id_).replace('.', '::')
451 # redir_arg_type_e::Path, etc.
452 b = redir_arg_type_str(consts.REDIR_ARG_TYPES[id_]).replace(
453 '.', '_e::')
454 out(' case %s: return %s;' % (a, b))
455 out("""\
456 }
457 FAIL(kShouldNotGetHere);
458}
459""")
460
461 out("""\
462types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id) {
463 // relies on "switch lowering"
464 switch (id) {
465""")
466 for id_ in sorted(BOOL_ARG_TYPES):
467 a = Id_str(id_).replace('.', '::')
468 # bool_arg_type_e::BigStr, etc.
469 b = bool_arg_type_str(BOOL_ARG_TYPES[id_]).replace('.', '_e::')
470 out(' case %s: return %s;' % (a, b))
471 out("""\
472 }
473 FAIL(kShouldNotGetHere);
474}
475""")
476
477 out("""\
478Kind GetKind(id_kind_asdl::Id_t id) {
479 // relies on "switch lowering"
480 switch (id) {
481""")
482 for id_ in sorted(ID_TO_KIND):
483 a = Id_str(id_).replace('.', '::')
484 b = Kind_str(ID_TO_KIND[id_]).replace('.', '::')
485 out(' case %s: return %s;' % (a, b))
486 out("""\
487 }
488 FAIL(kShouldNotGetHere);
489}
490""")
491
492 pairs = consts.OPTION_GROUPS.items()
493 GenStringLookup('types_asdl::opt_group_t', 'OptionGroupNum', pairs,
494 f)
495
496 pairs = [(opt.name, opt.index) for opt in option_def.All()
497 if opt.implemented]
498 #pairs = [(opt.name, opt.index) for opt in option_def.All()]
499 GenStringLookup('option_asdl::option_t', 'OptionNum', pairs, f)
500 pairs2 = [(opt.name, opt.index) for opt in option_def.All()
501 if not opt.implemented]
502 GenStringLookup('option_asdl::option_t', 'UnimplOptionNum', pairs2,
503 f)
504
505 GenBuiltinLookup('LookupNormalBuiltin', 'normal', f)
506 GenBuiltinLookup('LookupAssignBuiltin', 'assign', f)
507 GenBuiltinLookup('LookupSpecialBuiltin', 'special', f)
508
509 GenStringMembership('IsControlFlow', consts._CONTROL_FLOW_NAMES, f)
510 GenIntStrLookup('ControlFlowName', consts._CONTROL_FLOW_LOOKUP, f)
511
512 GenStringMembership('IsKeyword', consts.OSH_KEYWORD_NAMES, f)
513
514 GenCharLookup('LookupCharC', consts._ONE_CHAR_C, f, required=True)
515 GenCharLookup('LookupCharPrompt', consts._ONE_CHAR_PROMPT, f)
516
517 opt_int2str = {}
518 for opt in option_def.All():
519 opt_int2str[opt.index] = opt.name
520 GenIntStrLookup('OptionName', opt_int2str, f)
521
522 #
523 # Generate a tightly packed 2D array for C, from a Python dict.
524 #
525
526 edges = consts._IFS_EDGES
527 max_state = max(edge[0] for edge in edges)
528 max_char_kind = max(edge[1] for edge in edges)
529
530 edge_array = []
531 for i in xrange(max_state + 1):
532 # unused cells get -1
533 edge_array.append(['-1'] * (max_char_kind + 1))
534
535 for i in xrange(max_state + 1):
536 for j in xrange(max_char_kind + 1):
537 entry = edges.get((i, j))
538 if entry is not None:
539 # pack (new_state, action) into 32 bits
540 edge_array[i][j] = '(%d<<16)|%d' % entry
541
542 parts = []
543 for i in xrange(max_state + 1):
544 parts.append(' {')
545 parts.append(', '.join('%10s' % cell
546 for cell in edge_array[i]))
547 parts.append(' },\n')
548
549 out("""\
550int _IFS_EDGE[%d][%d] = {
551%s
552};
553""" % (max_state + 1, max_char_kind + 1, ''.join(parts)))
554
555 out("""\
556// Note: all of these are integers, e.g. state_i, emit_i, char_kind_i
557using runtime_asdl::state_t;
558using runtime_asdl::emit_t;
559using runtime_asdl::char_kind_t;
560
561Tuple2<state_t, emit_t> IfsEdge(state_t state, runtime_asdl::char_kind_t ch) {
562 int cell = _IFS_EDGE[state][ch];
563 state_t new_state = cell >> 16;
564 emit_t emit = cell & 0xFFFF;
565 return Tuple2<state_t, emit_t>(new_state, emit);
566}
567""")
568
569 GenStrList(consts.BUILTIN_NAMES, 'BUILTIN_NAMES', out)
570 GenStrList(consts.OSH_KEYWORD_NAMES, 'OSH_KEYWORD_NAMES', out)
571 GenStrList(consts.SET_OPTION_NAMES, 'SET_OPTION_NAMES', out)
572 GenStrList(consts.SHOPT_OPTION_NAMES, 'SHOPT_OPTION_NAMES', out)
573
574 def _CString(s):
575 # Hack that does backslash escaping, e.g. \\
576 # We could also use C++ strings
577 import json
578 return json.dumps(s)
579
580 GLOBAL_STRINGS = [
581 'ASSIGN_ARG_RE',
582 'TEST_V_RE',
583 ]
584 for var_name in GLOBAL_STRINGS:
585 out('GLOBAL_STR(%s, %s);', var_name,
586 _CString(getattr(consts, var_name)))
587
588 out("""\
589} // namespace consts
590""")
591
592 elif action == 'py-consts':
593 # It's kind of weird to use the generated code to generate more code.
594 # Can we do this instead with the parsed module for "id" and "types.asdl"?
595
596 from frontend import consts
597 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
598 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
599
600 print("""
601from _devbuild.gen.id_kind_asdl import Id, Kind
602from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e
603""")
604
605 print('')
606 print('BOOL_ARG_TYPES = {')
607 for id_ in sorted(BOOL_ARG_TYPES):
608 v = BOOL_ARG_TYPES[id_]
609 # HACK
610 v = bool_arg_type_str(v).replace('.', '_e.')
611 print(' %s: %s,' % (Id_str(id_), v))
612 print('}')
613
614 print('')
615 print('TEST_UNARY_LOOKUP = {')
616 for op_str in sorted(TEST_UNARY_LOOKUP):
617 v = Id_str(TEST_UNARY_LOOKUP[op_str])
618 print(' %r: %s,' % (op_str, v))
619 print('}')
620
621 print('')
622 print('TEST_BINARY_LOOKUP = {')
623 for op_str in sorted(TEST_BINARY_LOOKUP):
624 v = Id_str(TEST_BINARY_LOOKUP[op_str])
625 print(' %r: %s,' % (op_str, v))
626 print('}')
627
628 print('')
629 print('TEST_OTHER_LOOKUP = {')
630 for op_str in sorted(TEST_OTHER_LOOKUP):
631 v = Id_str(TEST_OTHER_LOOKUP[op_str])
632 print(' %r: %s,' % (op_str, v))
633 print('}')
634
635 print('')
636 print('ID_TO_KIND = {')
637 for id_ in sorted(ID_TO_KIND):
638 v = Kind_str(ID_TO_KIND[id_])
639 print(' %s: %s,' % (Id_str(id_), v))
640 print('}')
641
642 else:
643 raise RuntimeError('Invalid action %r' % action)
644
645
646if __name__ == '__main__':
647 try:
648 main(sys.argv)
649 except RuntimeError as e:
650 print('FATAL: %s' % e, file=sys.stderr)
651 sys.exit(1)