OILS / frontend / consts_gen.py View on Github | oils.pub

649 lines, 336 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9consts_gen.py - Code generation for consts.py, id_kind_def.py, etc.
10"""
11from __future__ import print_function
12
13import collections
14import os
15import sys
16
17from asdl import gen_cpp
18from mycpp.mylib import log
19from frontend import id_kind_def
20from frontend import builtin_def
21from frontend import option_def
22
23
24def _CreateModule(id_spec, ids):
25 """Create a SYNTHETIC ASDL module to generate code from."""
26 from asdl import ast
27
28 id_variants = [ast.MakeSimpleVariant(name) for name, _ in ids]
29 # Pack it in 16 bits
30 id_sum = ast.SimpleSum(id_variants,
31 generate=['uint16', 'no_namespace_suffix'])
32
33 kind_variants = [
34 ast.MakeSimpleVariant(name) for name in id_spec.kind_name_list
35 ]
36 kind_sum = ast.SimpleSum(kind_variants, generate=['no_namespace_suffix'])
37
38 # Id = Word_Compound | Arith_Semi | Arith_Comma | ...
39 id_ = ast.TypeDecl('Id', id_sum)
40 kind_ = ast.TypeDecl('Kind', kind_sum)
41
42 schema_ast = ast.Module('id_kind', [], [], [id_, kind_])
43 return schema_ast
44
45
46_BUILTINS = builtin_def.All()
47
48
49def GenBuiltinLookup(func_name, kind, f):
50 #log('%r %r', func_name, kind)
51
52 pairs = [(b.name, b.index) for b in _BUILTINS if b.kind == kind]
53
54 GenStringLookup('builtin_t', func_name, pairs, f)
55
56
57def GenStringLookup(type_name, func_name, pairs, f):
58 #log('%s', pairs)
59
60 groups = collections.defaultdict(list)
61 for name, index in pairs:
62 first_char = name[0]
63 groups[first_char].append((name, index))
64
65 if 0:
66 for first_char, pairs in groups.iteritems():
67 log('%s %d', first_char, len(pairs))
68 log('%s', pairs)
69
70 # Note: we could optimize the length check, e.g. have a second level
71 # switch. But we would need to measure the difference. Caching the id on
72 # AST nodes is probably a bigger win, e.g. for loops.
73 #
74 # Size optimization: don't repeat constants literally?
75
76 f.write("""\
77%s %s(BigStr* s) {
78 int length = len(s);
79 if (length == 0) return 0; // consts.NO_INDEX
80
81 const char* data = s->data_;
82 switch (data[0]) {
83""" % (type_name, func_name))
84
85 for first_char in sorted(groups):
86 pairs = groups[first_char]
87 f.write(" case '%s':\n" % first_char)
88 for name, index in pairs:
89 # NOTE: we have to check the length because they're not NUL-terminated
90 f.write('''\
91 if (length == %d && memcmp("%s", data, %d) == 0) return %d;
92''' % (len(name), name, len(name), index))
93 f.write(' break;\n')
94
95 f.write("""\
96 }
97
98 return 0; // consts.NO_INDEX
99}
100
101""")
102
103
104def GenIntStrLookup(func_name, int2str, f):
105 # NOTE: quoting doesn't work, strings must be Identifier Names here
106
107 for i in sorted(int2str):
108 s = int2str[i]
109 f.write('GLOBAL_STR(k%s_%d, "%s");\n' % (func_name, i, s))
110
111 f.write("""\
112
113BigStr* %s(int i) {
114 switch (i) {
115""" % func_name)
116
117 for i in sorted(int2str):
118 s = int2str[i]
119 f.write(' case %d:\n' % i)
120 f.write(' return k%s_%d;\n' % (func_name, i))
121 f.write(' break;\n')
122 f.write("""\
123 default:
124 FAIL(kShouldNotGetHere);
125 }
126}
127
128""")
129
130
131def GenStringMembership(func_name, strs, f):
132 groups = collections.defaultdict(list)
133 for s in strs:
134 first_char = s[0]
135 groups[first_char].append(s)
136
137 f.write("""\
138bool %s(BigStr* s) {
139 int length = len(s);
140 if (length == 0) return false;
141
142 const char* data = s->data_;
143 switch (data[0]) {
144""" % func_name)
145
146 for first_char in sorted(groups):
147 strs = groups[first_char]
148 f.write(" case '%s':\n" % first_char)
149 for s in strs:
150 # NOTE: we have to check the length because they're not NUL-terminated
151 f.write('''\
152 if (length == %d && memcmp("%s", data, %d) == 0) return true;
153''' % (len(s), s, len(s)))
154 f.write(' break;\n')
155
156 f.write("""\
157 }
158
159 return false;
160}
161
162""")
163
164
165C_CHAR = {
166 # '\'' is a single quote in C
167 "'": "\\'",
168 '"': '\\"',
169 '\\': "\\\\",
170 '\t': '\\t',
171 '\r': '\\r',
172 '\n': '\\n',
173 '\v': '\\v',
174 '\0': '\\0',
175 '\a': '\\a',
176 '\b': '\\b',
177 '\f': '\\f',
178 '\x1b': '\\x1b',
179}
180
181
182def CChar(c):
183 return C_CHAR.get(c, c)
184
185
186def GenCharLookup(func_name, lookup, f, required=False):
187 f.write("""\
188BigStr* %s(BigStr* c) {
189 assert(len(c) == 1);
190
191 char ch = c->data_[0];
192
193 // TODO-intern: return value
194 switch (ch) {
195""" % func_name)
196
197 for char_code in sorted(lookup):
198 f.write(" case '%s':\n" % CChar(char_code))
199 f.write(' return StrFromC("%s", 1);\n' % CChar(lookup[char_code]))
200 f.write(" break;\n")
201
202 f.write(" default:\n")
203 if required:
204 f.write(" assert(0);\n")
205 else:
206 f.write(" return nullptr;\n")
207
208 f.write("""
209 }
210}
211""")
212
213
214def GenStrList(l, name, out):
215 element_globals = []
216 for i, elem in enumerate(l):
217 global_name = "k%s_%d" % (name, i)
218 out('GLOBAL_STR(%s, "%s");', global_name, elem)
219 element_globals.append(global_name)
220
221 lit = ' COMMA '.join(element_globals)
222 out('GLOBAL_LIST(%s, BigStr*, %d, {%s});\n', name, len(l), lit)
223
224
225def main(argv):
226 try:
227 action = argv[1]
228 except IndexError:
229 raise RuntimeError('Action required')
230
231 # TODO: Remove duplication in core/meta.py
232 ID_TO_KIND = {}
233 BOOL_ARG_TYPES = {}
234 TEST_UNARY_LOOKUP = {}
235 TEST_BINARY_LOOKUP = {}
236 TEST_OTHER_LOOKUP = {}
237
238 ID_SPEC = id_kind_def.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES)
239
240 id_kind_def.AddKinds(ID_SPEC)
241 id_kind_def.AddBoolKinds(ID_SPEC) # must come second
242
243 id_kind_def.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP,
244 TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP)
245
246 ids = ID_SPEC.id_str2int.items()
247 ids.sort(key=lambda pair: pair[1]) # Sort by ID
248
249 if action == 'c':
250 for name, id_int in ids:
251 print('#define id__%s %s' % (name, id_int))
252
253 elif action == 'cpp':
254 schema_ast = _CreateModule(ID_SPEC, ids)
255
256 out_prefix = argv[2]
257
258 with open(out_prefix + '.h', 'w') as f:
259 f.write("""\
260#ifndef ID_KIND_ASDL_H
261#define ID_KIND_ASDL_H
262
263#include <stdint.h> // uint16_t
264
265class BigStr;
266
267namespace id_kind_asdl {
268
269#define ASDL_NAMES struct
270""")
271
272 v = gen_cpp.ClassDefVisitor(f)
273 v.VisitModule(schema_ast)
274
275 f.write("""
276} // namespace id_kind_asdl
277
278#endif // ID_KIND_ASDL_H
279""")
280
281 with open(out_prefix + '.cc', 'w') as f:
282 f.write("""\
283#include <assert.h>
284#include "_gen/frontend/id_kind.asdl.h"
285#include "mycpp/gc_alloc.h" // StrFromC()
286
287namespace id_kind_asdl {
288
289""")
290
291 v = gen_cpp.MethodDefVisitor(f)
292
293 v.VisitModule(schema_ast)
294
295 f.write('} // namespace id_kind_asdl\n')
296
297 elif action == 'mypy':
298 from asdl import gen_python
299
300 schema_ast = _CreateModule(ID_SPEC, ids)
301 #print(schema_ast)
302
303 # Minor style issue: we want Id and Kind, not Id_e and Kind_e
304 v = gen_python.GenMyPyVisitor(sys.stdout)
305 v.VisitModule(schema_ast)
306
307 elif action == 'cpp-consts':
308
309 # Break circular deps
310
311 from core import pyutil
312 from frontend import consts
313 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
314 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
315
316 LIST_INT = [
317 'STRICT_ALL',
318 'YSH_UPGRADE',
319 'YSH_ALL',
320 'DEFAULT_TRUE',
321 'PARSE_OPTION_NUMS',
322 'SHOPT_OPTION_NUMS',
323 'SET_OPTION_NUMS',
324 'VISIBLE_SHOPT_NUMS',
325 ]
326
327 prefix = argv[2]
328
329 with open(prefix + '.h', 'w') as f:
330
331 def out(fmt, *args):
332 print(fmt % args, file=f)
333
334 out("""\
335#ifndef CONSTS_H
336#define CONSTS_H
337
338#include "mycpp/runtime.h"
339
340#include "_gen/frontend/id_kind.asdl.h"
341#include "_gen/frontend/option.asdl.h"
342#include "_gen/core/runtime.asdl.h"
343#include "_gen/frontend/types.asdl.h"
344
345namespace consts {
346""")
347
348 for name in LIST_INT:
349 out('extern List<int>* %s;', name)
350
351 out('extern List<BigStr*>* BUILTIN_NAMES;')
352 out('extern List<BigStr*>* OSH_KEYWORD_NAMES;')
353 out('extern List<BigStr*>* SET_OPTION_NAMES;')
354 out('extern List<BigStr*>* SHOPT_OPTION_NAMES;')
355
356 out("""\
357
358extern int NO_INDEX;
359
360extern BigStr* gVersion;
361
362int RedirDefaultFd(id_kind_asdl::Id_t id);
363types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id);
364types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id);
365id_kind_asdl::Kind GetKind(id_kind_asdl::Id_t id);
366
367types_asdl::opt_group_t OptionGroupNum(BigStr* s);
368option_asdl::option_t OptionNum(BigStr* s);
369option_asdl::option_t UnimplOptionNum(BigStr* s);
370option_asdl::builtin_t LookupNormalBuiltin(BigStr* s);
371option_asdl::builtin_t LookupAssignBuiltin(BigStr* s);
372option_asdl::builtin_t LookupSpecialBuiltin(BigStr* s);
373option_asdl::builtin_t LookupPrivateBuiltin(BigStr* s);
374bool IsControlFlow(BigStr* s);
375BigStr* ControlFlowName(int i);
376bool IsKeyword(BigStr* s);
377BigStr* LookupCharC(BigStr* c);
378BigStr* LookupCharPrompt(BigStr* c);
379
380BigStr* OptionName(option_asdl::option_t opt_num);
381
382Tuple2<runtime_asdl::state_t, runtime_asdl::emit_t> IfsEdge(runtime_asdl::state_t state, runtime_asdl::char_kind_t ch);
383
384extern BigStr* ASSIGN_ARG_RE;
385extern BigStr* TEST_V_RE;
386
387} // namespace consts
388
389#endif // CONSTS_H
390""")
391 with open(prefix + '.cc', 'w') as f:
392
393 def out(fmt, *args):
394 print(fmt % args, file=f)
395
396 out("""\
397#include "_gen/frontend/consts.h"
398
399using id_kind_asdl::Id;
400using id_kind_asdl::Kind;
401using types_asdl::redir_arg_type_e;
402using types_asdl::bool_arg_type_e;
403using option_asdl::builtin_t;
404
405namespace consts {
406
407int NO_INDEX = 0; // duplicated from frontend/consts.py
408""")
409
410 # Generate gVersion, which is read by pyutil::GetVersion()
411 this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
412 root_dir = os.path.join(this_dir, '..') # ~/git/oilshell/oil
413 loader = pyutil._FileResourceLoader(root_dir)
414
415 version_str = pyutil.GetVersion(loader)
416 out('GLOBAL_STR(gVersion, "%s");' % version_str)
417 out('')
418
419 # Note: could use opt_num:: instead of raw ints
420 for name in LIST_INT:
421 val = getattr(consts, name)
422 val_str = ' COMMA '.join(str(i) for i in val)
423 out('GLOBAL_LIST(%s, int, %d, {%s});', name, len(val), val_str)
424
425 out("""\
426
427int RedirDefaultFd(id_kind_asdl::Id_t id) {
428 // relies on "switch lowering"
429 switch (id) {
430""")
431 for id_ in sorted(consts.REDIR_DEFAULT_FD):
432 a = Id_str(id_).replace('.', '::')
433 b = consts.REDIR_DEFAULT_FD[id_]
434 out(' case %s: return %s;' % (a, b))
435 out("""\
436 }
437 FAIL(kShouldNotGetHere);
438}
439""")
440
441 out("""\
442types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id) {
443 // relies on "switch lowering"
444 switch (id) {
445""")
446 for id_ in sorted(consts.REDIR_ARG_TYPES):
447 a = Id_str(id_).replace('.', '::')
448 # redir_arg_type_e::Path, etc.
449 b = redir_arg_type_str(consts.REDIR_ARG_TYPES[id_]).replace(
450 '.', '_e::')
451 out(' case %s: return %s;' % (a, b))
452 out("""\
453 }
454 FAIL(kShouldNotGetHere);
455}
456""")
457
458 out("""\
459types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id) {
460 // relies on "switch lowering"
461 switch (id) {
462""")
463 for id_ in sorted(BOOL_ARG_TYPES):
464 a = Id_str(id_).replace('.', '::')
465 # bool_arg_type_e::BigStr, etc.
466 b = bool_arg_type_str(BOOL_ARG_TYPES[id_]).replace('.', '_e::')
467 out(' case %s: return %s;' % (a, b))
468 out("""\
469 }
470 FAIL(kShouldNotGetHere);
471}
472""")
473
474 out("""\
475Kind GetKind(id_kind_asdl::Id_t id) {
476 // relies on "switch lowering"
477 switch (id) {
478""")
479 for id_ in sorted(ID_TO_KIND):
480 a = Id_str(id_).replace('.', '::')
481 b = Kind_str(ID_TO_KIND[id_]).replace('.', '::')
482 out(' case %s: return %s;' % (a, b))
483 out("""\
484 }
485 FAIL(kShouldNotGetHere);
486}
487""")
488
489 pairs = consts.OPTION_GROUPS.items()
490 GenStringLookup('types_asdl::opt_group_t', 'OptionGroupNum', pairs,
491 f)
492
493 pairs = [(opt.name, opt.index) for opt in option_def.All()
494 if opt.implemented]
495 #pairs = [(opt.name, opt.index) for opt in option_def.All()]
496 GenStringLookup('option_asdl::option_t', 'OptionNum', pairs, f)
497 pairs2 = [(opt.name, opt.index) for opt in option_def.All()
498 if not opt.implemented]
499 GenStringLookup('option_asdl::option_t', 'UnimplOptionNum', pairs2,
500 f)
501
502 GenBuiltinLookup('LookupNormalBuiltin', 'normal', f)
503 GenBuiltinLookup('LookupAssignBuiltin', 'assign', f)
504 GenBuiltinLookup('LookupSpecialBuiltin', 'special', f)
505 GenBuiltinLookup('LookupPrivateBuiltin', 'private', f)
506
507 GenStringMembership('IsControlFlow', consts._CONTROL_FLOW_NAMES, f)
508 GenIntStrLookup('ControlFlowName', consts._CONTROL_FLOW_LOOKUP, f)
509
510 GenStringMembership('IsKeyword', consts.OSH_KEYWORD_NAMES, f)
511
512 GenCharLookup('LookupCharC', consts._ONE_CHAR_C, f, required=True)
513 GenCharLookup('LookupCharPrompt', consts._ONE_CHAR_PROMPT, f)
514
515 opt_int2str = {}
516 for opt in option_def.All():
517 opt_int2str[opt.index] = opt.name
518 GenIntStrLookup('OptionName', opt_int2str, f)
519
520 #
521 # Generate a tightly packed 2D array for C, from a Python dict.
522 #
523
524 edges = consts._IFS_EDGES
525 max_state = max(edge[0] for edge in edges)
526 max_char_kind = max(edge[1] for edge in edges)
527
528 edge_array = []
529 for i in xrange(max_state + 1):
530 # unused cells get -1
531 edge_array.append(['-1'] * (max_char_kind + 1))
532
533 for i in xrange(max_state + 1):
534 for j in xrange(max_char_kind + 1):
535 entry = edges.get((i, j))
536 if entry is not None:
537 # pack (new_state, action) into 32 bits
538 edge_array[i][j] = '(%d<<16)|%d' % entry
539
540 parts = []
541 for i in xrange(max_state + 1):
542 parts.append(' {')
543 parts.append(', '.join('%10s' % cell
544 for cell in edge_array[i]))
545 parts.append(' },\n')
546
547 out("""\
548int _IFS_EDGE[%d][%d] = {
549%s
550};
551""" % (max_state + 1, max_char_kind + 1, ''.join(parts)))
552
553 out("""\
554// Note: all of these are integers, e.g. state_i, emit_i, char_kind_i
555using runtime_asdl::state_t;
556using runtime_asdl::emit_t;
557using runtime_asdl::char_kind_t;
558
559Tuple2<state_t, emit_t> IfsEdge(state_t state, runtime_asdl::char_kind_t ch) {
560 int cell = _IFS_EDGE[state][ch];
561 state_t new_state = cell >> 16;
562 emit_t emit = cell & 0xFFFF;
563 return Tuple2<state_t, emit_t>(new_state, emit);
564}
565""")
566
567 GenStrList(consts.BUILTIN_NAMES, 'BUILTIN_NAMES', out)
568 GenStrList(consts.OSH_KEYWORD_NAMES, 'OSH_KEYWORD_NAMES', out)
569 GenStrList(consts.SET_OPTION_NAMES, 'SET_OPTION_NAMES', out)
570 GenStrList(consts.SHOPT_OPTION_NAMES, 'SHOPT_OPTION_NAMES', out)
571
572 def _CString(s):
573 # Hack that does backslash escaping, e.g. \\
574 # We could also use C++ strings
575 import json
576 return json.dumps(s)
577
578 GLOBAL_STRINGS = [
579 'ASSIGN_ARG_RE',
580 'TEST_V_RE',
581 ]
582 for var_name in GLOBAL_STRINGS:
583 out('GLOBAL_STR(%s, %s);', var_name,
584 _CString(getattr(consts, var_name)))
585
586 out("""\
587} // namespace consts
588""")
589
590 elif action == 'py-consts':
591 # It's kind of weird to use the generated code to generate more code.
592 # Can we do this instead with the parsed module for "id" and "types.asdl"?
593
594 from frontend import consts
595 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
596 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
597
598 print("""
599from _devbuild.gen.id_kind_asdl import Id, Kind
600from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e
601""")
602
603 print('')
604 print('BOOL_ARG_TYPES = {')
605 for id_ in sorted(BOOL_ARG_TYPES):
606 v = BOOL_ARG_TYPES[id_]
607 # HACK
608 v = bool_arg_type_str(v).replace('.', '_e.')
609 print(' %s: %s,' % (Id_str(id_), v))
610 print('}')
611
612 print('')
613 print('TEST_UNARY_LOOKUP = {')
614 for op_str in sorted(TEST_UNARY_LOOKUP):
615 v = Id_str(TEST_UNARY_LOOKUP[op_str])
616 print(' %r: %s,' % (op_str, v))
617 print('}')
618
619 print('')
620 print('TEST_BINARY_LOOKUP = {')
621 for op_str in sorted(TEST_BINARY_LOOKUP):
622 v = Id_str(TEST_BINARY_LOOKUP[op_str])
623 print(' %r: %s,' % (op_str, v))
624 print('}')
625
626 print('')
627 print('TEST_OTHER_LOOKUP = {')
628 for op_str in sorted(TEST_OTHER_LOOKUP):
629 v = Id_str(TEST_OTHER_LOOKUP[op_str])
630 print(' %r: %s,' % (op_str, v))
631 print('}')
632
633 print('')
634 print('ID_TO_KIND = {')
635 for id_ in sorted(ID_TO_KIND):
636 v = Kind_str(ID_TO_KIND[id_])
637 print(' %s: %s,' % (Id_str(id_), v))
638 print('}')
639
640 else:
641 raise RuntimeError('Invalid action %r' % action)
642
643
644if __name__ == '__main__':
645 try:
646 main(sys.argv)
647 except RuntimeError as e:
648 print('FATAL: %s' % e, file=sys.stderr)
649 sys.exit(1)