OILS / asdl / front_end.py View on Github | oils.pub

602 lines, 341 significant
1"""front_end.py: Lexer and parser for the ASDL schema language."""
2from __future__ import print_function
3
4import re
5
6from asdl import ast
7from asdl.ast import (Use, Module, TypeDecl, SubTypeDecl, Constructor, Field,
8 Sum, SimpleSum, Product, Extern)
9from asdl.util import log
10
11from typing import List, Dict, cast, TYPE_CHECKING
12
13if TYPE_CHECKING:
14 TypeLookup = Dict[str, ast.asdl_type_t]
15
16_ = log
17
18_KEYWORDS = ['use', 'module', 'generate', 'extern']
19
20_TOKENS = [
21 ('Keyword', ''),
22 ('Name', ''),
23
24 # For operators, the string matters
25 ('Equals', '='),
26 ('Comma', ','),
27 ('Question', '?'),
28 ('Pipe', '|'),
29 ('Asterisk', '*'),
30 ('LParen', '('),
31 ('RParen', ')'),
32 ('LBrace', '{'),
33 ('RBrace', '}'),
34 ('Percent', '%'),
35 ('LessThan', '<'), # for subtyping CompoundWord < List[word_part]
36
37 # Oils addition for parameterized types.
38 ('LBracket', '['),
39 ('RBracket', ']'),
40
41 # - Start with Dict[string, bool].
42 # - List[string] is an alias for string*
43 #
44 # statically typed: Dict and List
45 # dynamically typed: dict and list
46]
47
48_TOKEN_STR = [name for name, _ in _TOKENS] # integer -> string like LParen
49_TOKEN_INT = {} # string like '(' -> integer
50
51
52class TokenKind(object):
53 """ASDL tokens.
54
55 TokenKind.LBrace = 5, etc.
56 """
57 pass
58
59
60for i, (name, val) in enumerate(_TOKENS):
61 setattr(TokenKind, name, i)
62 _TOKEN_INT[val] = i
63
64
65class Token(object):
66
67 def __init__(self, kind, value, lineno):
68 self.kind = kind
69 self.value = value
70 self.lineno = lineno
71
72
73class ASDLSyntaxError(Exception):
74
75 def __init__(self, msg, lineno=None):
76 self.msg = msg
77 self.lineno = lineno or '<unknown>'
78
79 def __str__(self):
80 return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
81
82
83TOKEN_RE = r'\s*(\w+|--.*|#.*|.)'
84
85
86def _Tokenize(f):
87 """Tokenize the given buffer.
88
89 Yield Token objects.
90 """
91 for lineno, line in enumerate(f, 1):
92 for m in re.finditer(TOKEN_RE, line.strip()):
93 c = m.group(1)
94 if c in _KEYWORDS:
95 yield Token(TokenKind.Keyword, c, lineno)
96
97 elif c[0].isalpha() or c[0] == '_':
98 yield Token(TokenKind.Name, c, lineno)
99
100 elif c.startswith('--') or c.startswith('#'):
101 # ASDL comments start with --
102 # Added # comments like Python and shell
103 break
104
105 else:
106 # Operators
107 try:
108 op_kind = _TOKEN_INT[c]
109 except KeyError:
110 raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
111 yield Token(op_kind, c, lineno)
112
113
114def _SumIsSimple(variant_list):
115 """Return True if a sum is a simple.
116
117 A sum is simple if its types have no fields, e.g.
118 unaryop = Invert | Not | UAdd | USub
119 """
120 for t in variant_list:
121 if t.fields or t.shared_type:
122 return False
123 return True
124
125
126_CODE_GEN_OPTIONS = [
127 'no_namespace_suffix', # Id.Foo instead of Id_e.Foo
128 'integers', # integer builtin_i instead of strongly typed builtin_e
129 'uint16', # like integers, but use uint16_t instead
130 'bit_set', # not implemented: 1 << n instead of n
131
132 # probably don't need this
133 # 'common_synthetic_field:left_tok',
134
135 # Put this type, and transitive closure of types it references, in the
136 # unique "first class variant" namespace, and generate type reflection.
137 'reflect_all_types',
138
139 # Squeeze and Freeze, with the number of bits as a option Hm the headers
140 # here still need type reflection. Probably OK.
141 'mirror_all_types:16',
142]
143
144
145class ASDLParser(object):
146 """Parser for ASDL files.
147
148 Create, then call the parse method on a buffer containing ASDL. This
149 is a simple recursive descent parser that uses _Tokenize for the
150 lexing.
151 """
152
153 def __init__(self):
154 self._tokenizer = None
155 self.cur_token = None
156
157 def parse(self, f):
158 """Parse the ASDL in the file and return an AST with a Module root."""
159 self._tokenizer = _Tokenize(f)
160 self._advance()
161 return self._parse_module()
162
163 def _parse_module(self):
164 """
165 type_decl : NAME '=' compound_type
166 module : 'module' NAME '{'
167 use*
168 extern*
169 type_decl*
170 '}'
171
172 We added:
173 - use for imports
174 - generate on sum types
175 """
176 if not self._at_keyword('module'):
177 raise ASDLSyntaxError(
178 'Expected "module" (found {})'.format(self.cur_token.value),
179 self.cur_token.lineno)
180 self._advance()
181 name = self._match(TokenKind.Name)
182 self._match(TokenKind.LBrace)
183
184 uses = []
185 while self._at_keyword('use'):
186 uses.append(self._parse_use())
187
188 externs = []
189 while self._at_keyword('extern'):
190 externs.append(self._parse_extern())
191
192 defs = []
193 while self.cur_token.kind == TokenKind.Name:
194 typename = self._advance()
195 if self.cur_token.kind == TokenKind.Equals:
196 self._advance()
197 type_ = self._parse_compound_type()
198 defs.append(TypeDecl(typename, type_))
199 elif self.cur_token.kind == TokenKind.LessThan:
200 self._advance()
201 type_ = self._parse_type_expr()
202 defs.append(SubTypeDecl(typename, type_))
203 else:
204 raise ASDLSyntaxError(
205 'Expected = or < after type name (found {})'.format(
206 self.cur_token.value), self.cur_token.lineno)
207
208 self._match(TokenKind.RBrace)
209 return Module(name, uses, externs, defs)
210
211 def _parse_use(self):
212 """
213 use: 'use' NAME+ '{' NAME+ '}'
214
215 example: use frontend syntax { Token }
216
217 This means frontend/syntax.asdl.h :: Token
218 """
219 self._advance() # past 'use'
220 module_parts = []
221 while self.cur_token.kind == TokenKind.Name:
222 part = self._advance()
223 module_parts.append(part)
224
225 self._match(TokenKind.LBrace)
226
227 type_names = []
228 while self.cur_token.kind == TokenKind.Name:
229 t = self._advance()
230 type_names.append(t)
231 if self.cur_token.kind == TokenKind.RParen:
232 break
233 elif self.cur_token.kind == TokenKind.Comma:
234 self._advance()
235
236 self._match(TokenKind.RBrace)
237 #print('MOD %s' % module_parts)
238 return Use(module_parts, type_names)
239
240 def _parse_extern(self):
241 """
242 extern: 'extern' '[' NAME+ ']'
243
244 Examples:
245 extern _Builtin
246 extern _Callable
247 """
248 self._advance() # past 'extern'
249
250 self._match(TokenKind.LBracket)
251
252 # At least one name
253 names = [self._match(TokenKind.Name)]
254
255 while self.cur_token.kind == TokenKind.Name:
256 names.append(self._advance())
257
258 self._match(TokenKind.RBracket)
259
260 return Extern(names)
261
262 def _parse_compound_type(self):
263 """
264 constructor : NAME fields?
265 | NAME '%' NAME # shared variant
266
267 sum : constructor ('|' constructor)* generate?
268
269 compound_type : product
270 | sum
271
272 Examples:
273 alloc_members =
274 List
275 | Dict
276 | Struct
277 generate [bit_set]
278
279 -- color::Red, not color_e::Red or color_i::Red
280 color = Red | Green
281 generate [integers, no_sum_suffix]
282 """
283 if self.cur_token.kind == TokenKind.LParen:
284 # If we see a (, it's a product
285 return self._parse_product()
286 else:
287 # Otherwise it's a sum. Look for ConstructorId
288 sumlist = []
289 while True:
290 cons_name = self._match(TokenKind.Name)
291
292 shared_type = None
293 fields = None
294 if self.cur_token.kind == TokenKind.LParen:
295 fields = self._parse_fields()
296 elif self.cur_token.kind == TokenKind.Percent:
297 self._advance()
298 shared_type = self._match(TokenKind.Name)
299 else:
300 pass
301
302 cons = Constructor(cons_name, shared_type, fields)
303 sumlist.append(cons)
304
305 if self.cur_token.kind != TokenKind.Pipe:
306 break
307 self._advance()
308 generate = self._parse_optional_generate()
309
310 # Additional validation
311 if generate is not None:
312 for g in generate:
313 if g not in _CODE_GEN_OPTIONS:
314 raise ASDLSyntaxError('Invalid code gen option %r' % g,
315 self.cur_token.lineno)
316
317 if _SumIsSimple(sumlist):
318 return SimpleSum(sumlist, generate)
319 else:
320 return Sum(sumlist, generate)
321
322 def _parse_type_expr(self):
323 """One or two params:
324
325 type_params : '[' type_expr ( ',' type_expr )* ']'
326
327 type_expr : NAME type_params? (''?' | '*')? # allow one suffix
328
329 NAME is validated against Optional, List, Dict afterward
330 """
331 type_name = self._match(TokenKind.Name)
332
333 # Accept Python-like naming!
334 if type_name == 'str':
335 type_name = 'string'
336
337 children = []
338 if self.cur_token.kind == TokenKind.LBracket:
339 self._advance()
340 children.append(self._parse_type_expr())
341 if self.cur_token.kind == TokenKind.Comma:
342 self._advance()
343 children.append(self._parse_type_expr())
344
345 self._match(TokenKind.RBracket)
346
347 if type_name in ('List', 'Optional'):
348 if len(children) != 1:
349 raise ASDLSyntaxError(
350 'Expected 1 type param to {}'.format(type_name),
351 self.cur_token.lineno)
352 elif type_name == 'Dict':
353 if len(children) != 2:
354 raise ASDLSyntaxError(
355 'Expected 2 type params to {}'.format(type_name),
356 self.cur_token.lineno)
357 else:
358 if len(children) != 0:
359 raise ASDLSyntaxError(
360 'Expected zero type params to {}'.format(type_name),
361 self.cur_token.lineno)
362
363 if len(children):
364 typ = ast.ParameterizedType(type_name, children)
365 else:
366 typ = ast.NamedType(type_name)
367
368 if self.cur_token.kind == TokenKind.Asterisk:
369 # string* is equivalent to List[string]
370 typ = ast.ParameterizedType('List', [typ])
371 self._advance()
372
373 elif self.cur_token.kind == TokenKind.Question:
374 # string* is equivalent to Optional[string]
375 typ = ast.ParameterizedType('Optional', [typ])
376 self._advance()
377
378 return typ
379
380 def _parse_fields(self):
381 """
382 fields_inner: type_expr NAME ( ',' type_expr NAME )* ','?
383
384 fields : '(' fields_inner? ')'
385
386 Name Quantifier? should be changed to typename.
387 """
388 fields = []
389 self._match(TokenKind.LParen)
390 while self.cur_token.kind == TokenKind.Name:
391 typ = self._parse_type_expr()
392 field_name = self._match(TokenKind.Name)
393
394 fields.append(Field(typ, field_name))
395
396 if self.cur_token.kind == TokenKind.RParen:
397 break
398 elif self.cur_token.kind == TokenKind.Comma:
399 self._advance()
400
401 self._match(TokenKind.RParen)
402 return fields
403
404 def _parse_list(self):
405 """
406 list_inner: NAME ( ',' NAME )* ','?
407
408 list : '[' list_inner? ']'
409 """
410 generate = []
411 self._match(TokenKind.LBracket)
412 while self.cur_token.kind == TokenKind.Name:
413 name = self._match(TokenKind.Name)
414
415 generate.append(name)
416
417 if self.cur_token.kind == TokenKind.RBracket:
418 break
419 elif self.cur_token.kind == TokenKind.Comma:
420 self._advance()
421
422 self._match(TokenKind.RBracket)
423 return generate
424
425 def _parse_optional_generate(self):
426 """
427 generate : 'generate' list
428 """
429 if self._at_keyword('generate'):
430 self._advance()
431 return self._parse_list()
432 else:
433 return None
434
435 def _parse_product(self):
436 """Product: fields attributes?"""
437 return Product(self._parse_fields())
438
439 def _advance(self):
440 """Return current token; read next token into self.cur_token."""
441 cur_val = None if self.cur_token is None else self.cur_token.value
442 try:
443 self.cur_token = next(self._tokenizer)
444 except StopIteration:
445 self.cur_token = None
446 return cur_val
447
448 def _match(self, kind):
449 """The 'match' primitive of RD parsers.
450
451 * Verifies that the current token is of the given kind (kind can
452 be a tuple, in which the kind must match one of its members).
453 * Returns the value of the current token
454 * Reads in the next token
455
456 Args:
457 kind: A TokenKind, or a tuple of TokenKind
458 """
459 if self.cur_token.kind == kind:
460 value = self.cur_token.value
461 self._advance()
462 return value
463 else:
464 raise ASDLSyntaxError(
465 'Expected token {}, got {}'.format(_TOKEN_STR[kind],
466 self.cur_token.value),
467 self.cur_token.lineno)
468
469 def _at_keyword(self, keyword):
470 return (self.cur_token.kind == TokenKind.Keyword and
471 self.cur_token.value == keyword)
472
473
474_PRIMITIVE_TYPES = [
475 'string',
476 'int',
477 'uint16', # used for Token length - should we generalize this?
478 'BigInt',
479 'float',
480 'bool',
481
482 # 'any' is used for value.{BuiltinProc,BuiltinFunc}, to cast from class
483 # type
484 'any',
485]
486
487
488def _ResolveType(typ, type_lookup):
489 # type: (ast.type_expr_t, TypeLookup) -> None
490 """Recursively attach a 'resolved' field to AST nodes."""
491 if isinstance(typ, ast.NamedType):
492 if typ.name not in _PRIMITIVE_TYPES:
493 ast_node = type_lookup.get(typ.name)
494 if ast_node is None:
495 raise ASDLSyntaxError("Couldn't find type %r" % typ.name)
496 typ.resolved = ast_node
497
498 elif isinstance(typ, ast.ParameterizedType):
499 for child in typ.children:
500 _ResolveType(child, type_lookup)
501
502 if typ.type_name == 'Optional':
503 child = typ.children[0]
504 if isinstance(child, ast.NamedType):
505 if child.name in _PRIMITIVE_TYPES and child.name != 'string':
506 raise ASDLSyntaxError(
507 'Optional primitive type {} not allowed'.format(
508 child.name))
509
510 if child.resolved and isinstance(child.resolved,
511 ast.SimpleSum):
512 raise ASDLSyntaxError(
513 'Optional simple sum type {} not allowed'.format(
514 child.name))
515
516 else:
517 raise AssertionError()
518
519
520def _ResolveFields(field_ast_nodes, type_lookup):
521 # type: (List[Field], TypeLookup) -> None
522 """
523 Args:
524 type_lookup: Populated by name resolution
525 """
526 for field in field_ast_nodes:
527 _ResolveType(field.typ, type_lookup)
528
529
530def _ResolveModule(module, type_lookup):
531 # type: (Module, TypeLookup) -> None
532 """Name resolution for NamedType."""
533
534 # Note: we don't actually load the type, and instead leave that to MyPy /
535 # C++. A consequence of this is TypeNameHeuristic().
536 for u in module.uses:
537 for type_name in u.type_names:
538 type_lookup[type_name] = u
539
540 # NOTE: We need two passes because types can be mutually recursive, e.g.
541 # asdl/arith.asdl.
542
543 # First pass: collect declared types and make entries for them.
544 for ex in module.externs:
545 last = ex.names[-1] # e.g. _Callable
546 if last in type_lookup:
547 raise ASDLSyntaxError('Type %r was already defined' % last)
548 type_lookup[last] = ex
549
550 for d in module.dfns:
551
552 if isinstance(d, SubTypeDecl):
553 if d.name in type_lookup:
554 raise ASDLSyntaxError('Type %r was already defined' % d.name)
555
556 # e.g. CompoundWord < List[str]
557 type_lookup[d.name] = ast.DummyType() # this value isn't used?
558
559 elif isinstance(d, TypeDecl):
560 if d.name in type_lookup:
561 raise ASDLSyntaxError('Type %r was already defined' % d.name)
562
563 # e.g. Token = (str a)
564 type_lookup[d.name] = d.value
565
566 else:
567 raise AssertionError()
568
569 # Second pass: add NamedType.resolved field
570 for d in module.dfns:
571 if isinstance(d, SubTypeDecl): # no fields
572 _ResolveType(d.base_class, type_lookup)
573 continue
574
575 d = cast(ast.TypeDecl, d)
576
577 ast_node = d.value
578 if isinstance(ast_node, ast.Product):
579 #log('fields %s', ast_node.fields)
580 _ResolveFields(ast_node.fields, type_lookup)
581 continue
582
583 if isinstance(ast_node, ast.Sum):
584 for cons in ast_node.types:
585 _ResolveFields(cons.fields, type_lookup)
586 continue
587
588 raise AssertionError(ast_node)
589
590
591def LoadSchema(f, verbose=False):
592 """Returns an AST for the schema."""
593 p = ASDLParser()
594 schema_ast = p.parse(f)
595 if verbose:
596 import sys
597 schema_ast.Print(sys.stdout, 0)
598
599 # Make sure all the names are valid
600 type_lookup = {}
601 _ResolveModule(schema_ast, type_lookup)
602 return schema_ast, type_lookup