OILS / asdl / front_end.py View on Github | oils.pub

656 lines, 372 significant
1"""front_end.py: Lexer and parser for the ASDL schema language."""
2from __future__ import print_function
3
4import re
5
6from asdl import ast
7from asdl.ast import (Use, Module, TypeDecl, SubTypeDecl, Constructor, Field,
8 Sum, SimpleSum, Product, Extern)
9from asdl.util import log
10
11# type checking not turned on yet
12from typing import List, Dict, Optional, cast, TYPE_CHECKING
13
14_ = log
15
16_KEYWORDS = ['use', 'module', 'generate', 'extern']
17
18_TOKENS = [
19 ('Keyword', ''),
20 ('Name', ''),
21
22 # For operators, the string matters
23 ('Equals', '='),
24 ('Comma', ','),
25 ('Question', '?'),
26 ('Pipe', '|'),
27 ('Asterisk', '*'),
28 ('LParen', '('),
29 ('RParen', ')'),
30 ('LBrace', '{'),
31 ('RBrace', '}'),
32 ('Percent', '%'),
33 ('LessThan', '<'), # for subtyping CompoundWord < List[word_part]
34
35 # Oils addition for parameterized types.
36 ('LBracket', '['),
37 ('RBracket', ']'),
38
39 # - Start with Dict[string, bool].
40 # - List[string] is an alias for string*
41 #
42 # statically typed: Dict and List
43 # dynamically typed: dict and list
44]
45
46_TOKEN_STR = [name for name, _ in _TOKENS] # integer -> string like LParen
47_TOKEN_INT = {} # string like '(' -> integer
48
49
50class TokenKind(object):
51 """ASDL tokens.
52
53 TokenKind.LBrace = 5, etc.
54 """
55 pass
56
57
58for i, (name, val) in enumerate(_TOKENS):
59 setattr(TokenKind, name, i)
60 _TOKEN_INT[val] = i
61
62
63class Token(object):
64
65 def __init__(self, kind, value, lineno):
66 self.kind = kind
67 self.value = value
68 self.lineno = lineno
69
70
71class ASDLSyntaxError(Exception):
72
73 def __init__(self, msg, lineno=None):
74 self.msg = msg
75 self.lineno = lineno or '<unknown>'
76
77 def __str__(self):
78 return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
79
80
81TOKEN_RE = r'\s*(\w+|--.*|#.*|.)'
82
83
84def _Tokenize(f):
85 """Tokenize the given buffer.
86
87 Yield Token objects.
88 """
89 for lineno, line in enumerate(f, 1):
90 for m in re.finditer(TOKEN_RE, line.strip()):
91 c = m.group(1)
92 if c in _KEYWORDS:
93 yield Token(TokenKind.Keyword, c, lineno)
94
95 elif c[0].isalpha() or c[0] == '_':
96 yield Token(TokenKind.Name, c, lineno)
97
98 elif c.startswith('--') or c.startswith('#'):
99 # ASDL comments start with --
100 # Added # comments like Python and shell
101 break
102
103 else:
104 # Operators
105 try:
106 op_kind = _TOKEN_INT[c]
107 except KeyError:
108 raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
109 yield Token(op_kind, c, lineno)
110
111
112def _SumIsSimple(variant_list):
113 """Return True if a sum is a simple.
114
115 A sum is simple if its types have no fields, e.g.
116 unaryop = Invert | Not | UAdd | USub
117 """
118 for t in variant_list:
119 if t.fields or t.shared_type:
120 return False
121 return True
122
123
124_CODE_GEN_OPTIONS = [
125 'no_namespace_suffix', # Id.Foo instead of Id_e.Foo
126 'integers', # integer builtin_i instead of strongly typed builtin_e
127 'uint16', # like integers, but use uint16_t instead
128 'bit_set', # not implemented: 1 << n instead of n
129
130 # probably don't need this
131 # 'common_synthetic_field:left_tok',
132
133 # Put this type, and transitive closure of types it references, in the
134 # unique "first class variant" namespace, and generate type reflection.
135 'reflect_all_types',
136
137 # Squeeze and Freeze, with the number of bits as a option Hm the headers
138 # here still need type reflection. Probably OK.
139 'mirror_all_types:16',
140]
141
142
143class ASDLParser(object):
144 """Parser for ASDL files.
145
146 Create, then call the parse method on a buffer containing ASDL. This
147 is a simple recursive descent parser that uses _Tokenize for the
148 lexing.
149 """
150
151 def __init__(self):
152 self._tokenizer = None
153 self.cur_token = None
154
155 def parse(self, f):
156 """Parse the ASDL in the file and return an AST with a Module root."""
157 self._tokenizer = _Tokenize(f)
158 self._advance()
159 return self._parse_module()
160
161 def _parse_module(self):
162 """
163 type_decl : NAME '=' compound_type
164 module : 'module' NAME '{'
165 use*
166 extern*
167 type_decl*
168 '}'
169
170 We added:
171 - use for imports
172 - generate on sum types
173 """
174 if not self._at_keyword('module'):
175 raise ASDLSyntaxError(
176 'Expected "module" (found {})'.format(self.cur_token.value),
177 self.cur_token.lineno)
178 self._advance()
179 name = self._match(TokenKind.Name)
180 self._match(TokenKind.LBrace)
181
182 uses = []
183 while self._at_keyword('use'):
184 uses.append(self._parse_use())
185
186 externs = []
187 while self._at_keyword('extern'):
188 externs.append(self._parse_extern())
189
190 defs = []
191 while self.cur_token.kind == TokenKind.Name:
192 typename = self._advance()
193 if self.cur_token.kind == TokenKind.Equals:
194 self._advance()
195 type_ = self._parse_compound_type()
196 defs.append(TypeDecl(typename, type_))
197 elif self.cur_token.kind == TokenKind.LessThan:
198 self._advance()
199 type_ = self._parse_type_expr()
200 defs.append(SubTypeDecl(typename, type_))
201 else:
202 raise ASDLSyntaxError(
203 'Expected = or < after type name (found {})'.format(
204 self.cur_token.value), self.cur_token.lineno)
205
206 self._match(TokenKind.RBrace)
207 return Module(name, uses, externs, defs)
208
209 def _parse_use(self):
210 """
211 use: 'use' NAME+ '{' NAME+ '}'
212
213 example: use frontend syntax { Token }
214
215 This means frontend/syntax.asdl.h :: Token
216 """
217 self._advance() # past 'use'
218 module_parts = []
219 while self.cur_token.kind == TokenKind.Name:
220 part = self._advance()
221 module_parts.append(part)
222
223 self._match(TokenKind.LBrace)
224
225 type_names = []
226 while self.cur_token.kind == TokenKind.Name:
227 t = self._advance()
228 type_names.append(t)
229 if self.cur_token.kind == TokenKind.RParen:
230 break
231 elif self.cur_token.kind == TokenKind.Comma:
232 self._advance()
233
234 self._match(TokenKind.RBrace)
235 #print('MOD %s' % module_parts)
236 return Use(module_parts, type_names)
237
238 def _parse_extern(self):
239 """
240 extern: 'extern' '[' NAME+ ']'
241
242 Examples:
243 extern _Builtin
244 extern _Callable
245 """
246 self._advance() # past 'extern'
247
248 self._match(TokenKind.LBracket)
249
250 # At least one name
251 names = [self._match(TokenKind.Name)]
252
253 while self.cur_token.kind == TokenKind.Name:
254 names.append(self._advance())
255
256 self._match(TokenKind.RBracket)
257
258 return Extern(names)
259
260 def _parse_compound_type(self):
261 """
262 constructor : NAME fields?
263 | NAME '%' NAME # shared variant
264
265 sum : constructor ('|' constructor)* generate?
266
267 compound_type : product
268 | sum
269
270 Examples:
271 alloc_members =
272 List
273 | Dict
274 | Struct
275 generate [bit_set]
276
277 -- color::Red, not color_e::Red or color_i::Red
278 color = Red | Green
279 generate [integers, no_sum_suffix]
280 """
281 if self.cur_token.kind == TokenKind.LParen:
282 # If we see a (, it's a product
283 return self._parse_product()
284 else:
285 # Otherwise it's a sum. Look for ConstructorId
286 sumlist = []
287 while True:
288 cons_name = self._match(TokenKind.Name)
289
290 shared_type = None
291 fields = None
292 if self.cur_token.kind == TokenKind.LParen:
293 fields = self._parse_fields()
294 elif self.cur_token.kind == TokenKind.Percent:
295 self._advance()
296 shared_type = self._match(TokenKind.Name)
297 else:
298 pass
299
300 cons = Constructor(cons_name, shared_type, fields)
301 sumlist.append(cons)
302
303 if self.cur_token.kind != TokenKind.Pipe:
304 break
305 self._advance()
306 generate = self._parse_optional_generate()
307
308 # Additional validation
309 if generate is not None:
310 for g in generate:
311 if g not in _CODE_GEN_OPTIONS:
312 raise ASDLSyntaxError('Invalid code gen option %r' % g,
313 self.cur_token.lineno)
314
315 if _SumIsSimple(sumlist):
316 return SimpleSum(sumlist, generate)
317 else:
318 return Sum(sumlist, generate)
319
320 def _parse_type_expr(self):
321 """One or two params:
322
323 type_params : '[' type_expr ( ',' type_expr )* ']'
324
325 type_expr : NAME type_params? (''?' | '*')? # allow one suffix
326
327 NAME is validated against Optional, List, Dict afterward
328 """
329 type_name = self._match(TokenKind.Name)
330
331 # Accept Python-like naming!
332 if type_name == 'str':
333 type_name = 'string'
334
335 children = []
336 if self.cur_token.kind == TokenKind.LBracket:
337 self._advance()
338 children.append(self._parse_type_expr())
339 if self.cur_token.kind == TokenKind.Comma:
340 self._advance()
341 children.append(self._parse_type_expr())
342
343 self._match(TokenKind.RBracket)
344
345 if type_name in ('List', 'Optional'):
346 if len(children) != 1:
347 raise ASDLSyntaxError(
348 'Expected 1 type param to {}'.format(type_name),
349 self.cur_token.lineno)
350 elif type_name == 'Dict':
351 if len(children) != 2:
352 raise ASDLSyntaxError(
353 'Expected 2 type params to {}'.format(type_name),
354 self.cur_token.lineno)
355 else:
356 if len(children) != 0:
357 raise ASDLSyntaxError(
358 'Expected zero type params to {}'.format(type_name),
359 self.cur_token.lineno)
360
361 if len(children):
362 typ = ast.ParameterizedType(type_name, children)
363 else:
364 typ = ast.NamedType(type_name)
365
366 if self.cur_token.kind == TokenKind.Asterisk:
367 # string* is equivalent to List[string]
368 typ = ast.ParameterizedType('List', [typ])
369 self._advance()
370
371 elif self.cur_token.kind == TokenKind.Question:
372 # string* is equivalent to Optional[string]
373 typ = ast.ParameterizedType('Optional', [typ])
374 self._advance()
375
376 return typ
377
378 def _parse_fields(self):
379 """
380 fields_inner: type_expr NAME ( ',' type_expr NAME )* ','?
381
382 fields : '(' fields_inner? ')'
383
384 Name Quantifier? should be changed to typename.
385 """
386 fields = []
387 self._match(TokenKind.LParen)
388 while self.cur_token.kind == TokenKind.Name:
389 typ = self._parse_type_expr()
390 field_name = self._match(TokenKind.Name)
391
392 fields.append(Field(typ, field_name))
393
394 if self.cur_token.kind == TokenKind.RParen:
395 break
396 elif self.cur_token.kind == TokenKind.Comma:
397 self._advance()
398
399 self._match(TokenKind.RParen)
400 return fields
401
402 def _parse_list(self):
403 """
404 list_inner: NAME ( ',' NAME )* ','?
405
406 list : '[' list_inner? ']'
407 """
408 generate = []
409 self._match(TokenKind.LBracket)
410 while self.cur_token.kind == TokenKind.Name:
411 name = self._match(TokenKind.Name)
412
413 generate.append(name)
414
415 if self.cur_token.kind == TokenKind.RBracket:
416 break
417 elif self.cur_token.kind == TokenKind.Comma:
418 self._advance()
419
420 self._match(TokenKind.RBracket)
421 return generate
422
423 def _parse_optional_generate(self):
424 """
425 generate : 'generate' list
426 """
427 if self._at_keyword('generate'):
428 self._advance()
429 return self._parse_list()
430 else:
431 return None
432
433 def _parse_product(self):
434 """Product: fields attributes?"""
435 return Product(self._parse_fields())
436
437 def _advance(self):
438 """Return current token; read next token into self.cur_token."""
439 cur_val = None if self.cur_token is None else self.cur_token.value
440 try:
441 self.cur_token = next(self._tokenizer)
442 except StopIteration:
443 self.cur_token = None
444 return cur_val
445
446 def _match(self, kind):
447 """The 'match' primitive of RD parsers.
448
449 * Verifies that the current token is of the given kind (kind can
450 be a tuple, in which the kind must match one of its members).
451 * Returns the value of the current token
452 * Reads in the next token
453
454 Args:
455 kind: A TokenKind, or a tuple of TokenKind
456 """
457 if self.cur_token.kind == kind:
458 value = self.cur_token.value
459 self._advance()
460 return value
461 else:
462 raise ASDLSyntaxError(
463 'Expected token {}, got {}'.format(_TOKEN_STR[kind],
464 self.cur_token.value),
465 self.cur_token.lineno)
466
467 def _at_keyword(self, keyword):
468 return (self.cur_token.kind == TokenKind.Keyword and
469 self.cur_token.value == keyword)
470
471
472_PRIMITIVE_TYPES = [
473 'string',
474 'int',
475 'uint16', # used for Token length - should we generalize this?
476 'BigInt',
477 'float',
478 'bool',
479
480 # 'any' is used for value.{BuiltinProc,BuiltinFunc}, to cast from class
481 # type
482 'any',
483]
484
485if TYPE_CHECKING:
486 TypeLookup = Dict[str, ast.asdl_type_t]
487
488
489def _ResolveType(typ, type_lookup):
490 # type: (ast.type_expr_t, TypeLookup) -> None
491 """Recursively attach a 'resolved' field to AST nodes."""
492 if isinstance(typ, ast.NamedType):
493 if typ.name not in _PRIMITIVE_TYPES:
494 ast_node = type_lookup.get(typ.name)
495 if ast_node is None:
496 raise ASDLSyntaxError("Couldn't find type %r" % typ.name)
497 typ.resolved = ast_node
498
499 elif isinstance(typ, ast.ParameterizedType):
500 for child in typ.children:
501 _ResolveType(child, type_lookup)
502
503 if typ.type_name == 'Optional':
504 child = typ.children[0]
505 if isinstance(child, ast.NamedType):
506 if child.name in _PRIMITIVE_TYPES and child.name != 'string':
507 raise ASDLSyntaxError(
508 'Optional primitive type {} not allowed'.format(
509 child.name))
510
511 if child.resolved and isinstance(child.resolved,
512 ast.SimpleSum):
513 raise ASDLSyntaxError(
514 'Optional simple sum type {} not allowed'.format(
515 child.name))
516
517 else:
518 raise AssertionError()
519
520
521def _ResolveFields(field_ast_nodes, type_lookup):
522 # type: (List[Field], TypeLookup) -> None
523 """
524 Args:
525 type_lookup: Populated by name resolution
526 """
527 for field in field_ast_nodes:
528 _ResolveType(field.typ, type_lookup)
529
530
531def _ResolveModule(module, app_types, do_count=None):
532 # type: (Module, TypeLookup, Optional[str]) -> None
533 """Name resolution for NamedType."""
534 # Types that fields are declared with: int, id, word_part, etc.
535 # Fields are NOT declared with Constructor names.
536 type_lookup = dict(app_types)
537
538 # Note: we don't actually load the type, and instead leave that to MyPy /
539 # C++. A consequence of this is TypeNameHeuristic().
540 for u in module.uses:
541 for type_name in u.type_names:
542 type_lookup[type_name] = u
543
544 # NOTE: We need two passes because types can be mutually recursive, e.g.
545 # asdl/arith.asdl.
546
547 # First pass: collect declared types and make entries for them.
548 for ex in module.externs:
549 last = ex.names[-1] # e.g. _Callable
550 if last in type_lookup:
551 raise ASDLSyntaxError('Type %r was already defined' % last)
552 type_lookup[last] = ex
553
554 for d in module.dfns:
555 if d.name in type_lookup:
556 raise ASDLSyntaxError('Type %r was already defined' % d.name)
557
558 if isinstance(d, SubTypeDecl):
559 # e.g. CompoundWord < List[str]
560 type_lookup[d.name] = d.base_class
561 elif isinstance(d, TypeDecl):
562 # e.g. Token = (str a)
563 type_lookup[d.name] = d.value
564 else:
565 raise AssertionError()
566
567 # Second pass: add NamedType.resolved field
568 for d in module.dfns:
569 if isinstance(d, SubTypeDecl): # no fields
570 _ResolveType(d.base_class, type_lookup)
571 continue
572
573 d = cast(ast.TypeDecl, d)
574
575 ast_node = d.value
576 if isinstance(ast_node, ast.Product):
577 #log('fields %s', ast_node.fields)
578 _ResolveFields(ast_node.fields, type_lookup)
579 continue
580
581 if isinstance(ast_node, ast.Sum):
582 for cons in ast_node.types:
583 _ResolveFields(cons.fields, type_lookup)
584 continue
585
586 raise AssertionError(ast_node)
587
588 # OPTIONAL Third pass: print metrics about a type, like command_t
589 if do_count:
590 seen = {} # type: Dict[str, bool]
591 for d in module.dfns:
592 if d.name != do_count: # start with command_t type
593 continue
594
595 # TODO: FIx this algorithm !!!
596 #
597 # 1. Walk the fields in each _CompoundAST
598 # 2. For each field, look at foo.resolved
599 #
600 # Is that it?
601
602 if isinstance(d, SubTypeDecl): # no fields
603 _CountType(d.base_class, type_lookup, seen)
604 continue
605
606 ast_node = d.value
607 if isinstance(ast_node, ast.Product):
608 #log('fields %s', ast_node.fields)
609 _CountFields(ast_node.fields, type_lookup, seen)
610 continue
611
612 if isinstance(ast_node, ast.Sum):
613 for cons in ast_node.types:
614 key = '%s.%s' % (d.name, cons.name)
615 seen[key] = True
616 _CountFields(cons.fields, type_lookup, seen)
617 continue
618
619 raise AssertionError(ast_node)
620
621 for name in sorted(seen):
622 print(name)
623
624
625def _CountType(typ, type_lookup, seen):
626 # type: (ast.type_expr_t, TypeLookup, Dict[str, bool]) -> None
627 """Given an AST node, add the types it references to seen"""
628 if isinstance(typ, ast.NamedType):
629 if typ.name not in _PRIMITIVE_TYPES:
630 seen[typ.name] = True
631
632 elif isinstance(typ, ast.ParameterizedType):
633 for child in typ.children:
634 _CountType(child, type_lookup, seen)
635
636 else:
637 raise AssertionError()
638
639
640def _CountFields(field_ast_nodes, type_lookup, seen):
641 # type: (List[Field], TypeLookup, Dict[str, bool]) -> None
642 for field in field_ast_nodes:
643 _CountType(field.typ, type_lookup, seen)
644
645
646def LoadSchema(f, app_types, verbose=False, do_count=None):
647 """Returns an AST for the schema."""
648 p = ASDLParser()
649 schema_ast = p.parse(f)
650 if verbose:
651 import sys
652 schema_ast.Print(sys.stdout, 0)
653
654 # Make sure all the names are valid
655 _ResolveModule(schema_ast, app_types, do_count=do_count)
656 return schema_ast