OILS / asdl / front_end.py View on Github | oilshell.org

548 lines, 316 significant
1"""front_end.py: Lexer and parser for the ASDL schema language."""
2from __future__ import print_function
3
4import re
5
6from asdl import ast
7from asdl.ast import (AST, Use, Module, TypeDecl, SubTypeDecl, Constructor,
8 Field, Sum, SimpleSum, Product)
9from asdl.util import log
10
11_ = log
12
13_KEYWORDS = ['use', 'module', 'generate']
14
15_TOKENS = [
16 ('Keyword', ''),
17 ('Name', ''),
18
19 # For operators, the string matters
20 ('Equals', '='),
21 ('Comma', ','),
22 ('Question', '?'),
23 ('Pipe', '|'),
24 ('Asterisk', '*'),
25 ('LParen', '('),
26 ('RParen', ')'),
27 ('LBrace', '{'),
28 ('RBrace', '}'),
29 ('Percent', '%'),
30 ('LessThan', '<'), # for subtyping CompoundWord < List[word_part]
31
32 # Oils addition for parameterized types.
33 ('LBracket', '['),
34 ('RBracket', ']'),
35
36 # - Start with Dict[string, bool].
37 # - List[string] is an alias for string*
38 #
39 # statically typed: Dict and List
40 # dynamically typed: dict and list
41]
42
43_TOKEN_STR = [name for name, _ in _TOKENS] # integer -> string like LParen
44_TOKEN_INT = {} # string like '(' -> integer
45
46
47class TokenKind(object):
48 """ASDL tokens.
49
50 TokenKind.LBrace = 5, etc.
51 """
52 pass
53
54
55for i, (name, val) in enumerate(_TOKENS):
56 setattr(TokenKind, name, i)
57 _TOKEN_INT[val] = i
58
59
60class Token(object):
61
62 def __init__(self, kind, value, lineno):
63 self.kind = kind
64 self.value = value
65 self.lineno = lineno
66
67
68class ASDLSyntaxError(Exception):
69
70 def __init__(self, msg, lineno=None):
71 self.msg = msg
72 self.lineno = lineno or '<unknown>'
73
74 def __str__(self):
75 return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
76
77
78def _Tokenize(f):
79 """Tokenize the given buffer.
80
81 Yield Token objects.
82 """
83 for lineno, line in enumerate(f, 1):
84 for m in re.finditer(r'\s*(\w+|--.*|#.*|.)', line.strip()):
85 c = m.group(1)
86 if c in _KEYWORDS:
87 yield Token(TokenKind.Keyword, c, lineno)
88
89 elif c[0].isalpha():
90 yield Token(TokenKind.Name, c, lineno)
91
92 elif c.startswith('--') or c.startswith('#'):
93 # ASDL comments start with --
94 # Added # comments like Python and shell
95 break
96
97 else:
98 # Operators
99 try:
100 op_kind = _TOKEN_INT[c]
101 except KeyError:
102 raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
103 yield Token(op_kind, c, lineno)
104
105
106def _SumIsSimple(variant_list):
107 """Return True if a sum is a simple.
108
109 A sum is simple if its types have no fields, e.g. unaryop = Invert |
110 Not | UAdd | USub
111 """
112 for t in variant_list:
113 if t.fields or t.shared_type:
114 return False
115 return True
116
117
118_CODE_GEN_OPTIONS = [
119 'no_namespace_suffix', # Id.Foo instead of Id_e.Foo
120 'integers', # integer builtin_i instead of strongly typed builtin_e
121 'uint16', # like integers, but use uint16_t instead
122 'bit_set', # not implemented: 1 << n instead of n
123
124 # probably don't need this
125 # 'common_synthetic_field:left_tok',
126
127 # Put this type, and transitive closure of types it references, in the
128 # unique "first class variant" namespace, and generate type reflection.
129 'reflect_all_types',
130
131 # Squeeze and Freeze, with the number of bits as a option Hm the headers
132 # here still need type reflection. Probably OK.
133 'mirror_all_types:16',
134]
135
136
137class ASDLParser(object):
138 """Parser for ASDL files.
139
140 Create, then call the parse method on a buffer containing ASDL. This
141 is a simple recursive descent parser that uses _Tokenize for the
142 lexing.
143 """
144
145 def __init__(self):
146 self._tokenizer = None
147 self.cur_token = None
148
149 def parse(self, f):
150 """Parse the ASDL in the file and return an AST with a Module root."""
151 self._tokenizer = _Tokenize(f)
152 self._advance()
153 return self._parse_module()
154
155 def _parse_module(self):
156 """
157 type_decl : NAME '=' compound_type
158 module : 'module' NAME '{' use* type_decl* '}'
159
160 We added:
161 - use for imports
162 - generate on sum types
163 """
164 if not self._at_keyword('module'):
165 raise ASDLSyntaxError(
166 'Expected "module" (found {})'.format(self.cur_token.value),
167 self.cur_token.lineno)
168 self._advance()
169 name = self._match(TokenKind.Name)
170 self._match(TokenKind.LBrace)
171
172 uses = []
173 while self._at_keyword('use'):
174 uses.append(self._parse_use())
175
176 defs = []
177 while self.cur_token.kind == TokenKind.Name:
178 typename = self._advance()
179 if self.cur_token.kind == TokenKind.Equals:
180 self._advance()
181 type_ = self._parse_compound_type()
182 defs.append(TypeDecl(typename, type_))
183 elif self.cur_token.kind == TokenKind.LessThan:
184 self._advance()
185 type_ = self._parse_type_expr()
186 defs.append(SubTypeDecl(typename, type_))
187 else:
188 raise ASDLSyntaxError(
189 'Expected = or < after type name (found {})'.format(
190 self.cur_token.value), self.cur_token.lineno)
191
192 self._match(TokenKind.RBrace)
193 return Module(name, uses, defs)
194
195 def _parse_use(self):
196 """
197 use: 'use' NAME+ '{' NAME+ '}'
198
199 example: use frontend syntax { Token }
200
201 This means frontend/syntax.asdl.h :: Token
202 """
203 self._advance() # past 'use'
204 module_parts = []
205 while self.cur_token.kind == TokenKind.Name:
206 part = self._advance()
207 module_parts.append(part)
208
209 self._match(TokenKind.LBrace)
210
211 type_names = []
212 while self.cur_token.kind == TokenKind.Name:
213 t = self._advance()
214 type_names.append(t)
215 if self.cur_token.kind == TokenKind.RParen:
216 break
217 elif self.cur_token.kind == TokenKind.Comma:
218 self._advance()
219
220 self._match(TokenKind.RBrace)
221 #print('MOD %s' % module_parts)
222 return Use(module_parts, type_names)
223
224 def _parse_compound_type(self):
225 """
226 constructor : NAME fields?
227 | NAME '%' NAME # shared variant
228
229 sum : constructor ('|' constructor)* generate?
230
231 compound_type : product
232 | sum
233
234 Examples:
235 alloc_members =
236 List
237 | Dict
238 | Struct
239 generate [bit_set]
240
241 -- color::Red, not color_e::Red or color_i::Red
242 color = Red | Green
243 generate [integers, no_sum_suffix]
244 """
245 if self.cur_token.kind == TokenKind.LParen:
246 # If we see a (, it's a product
247 return self._parse_product()
248 else:
249 # Otherwise it's a sum. Look for ConstructorId
250 sumlist = []
251 while True:
252 cons_name = self._match(TokenKind.Name)
253
254 shared_type = None
255 fields = None
256 if self.cur_token.kind == TokenKind.LParen:
257 fields = self._parse_fields()
258 elif self.cur_token.kind == TokenKind.Percent:
259 self._advance()
260 shared_type = self._match(TokenKind.Name)
261 else:
262 pass
263
264 cons = Constructor(cons_name, shared_type, fields)
265 sumlist.append(cons)
266
267 if self.cur_token.kind != TokenKind.Pipe:
268 break
269 self._advance()
270 generate = self._parse_optional_generate()
271
272 # Additional validation
273 if generate is not None:
274 for g in generate:
275 if g not in _CODE_GEN_OPTIONS:
276 raise ASDLSyntaxError('Invalid code gen option %r' % g,
277 self.cur_token.lineno)
278
279 if _SumIsSimple(sumlist):
280 return SimpleSum(sumlist, generate)
281 else:
282 return Sum(sumlist, generate)
283
284 def _parse_type_expr(self):
285 """One or two params:
286
287 type_params : '[' type_expr ( ',' type_expr )* ']'
288
289 type_expr : NAME type_params? (''?' | '*')? # allow one suffix
290
291 NAME is validated against Optional, List, Dict afterward
292 """
293 type_name = self._match(TokenKind.Name)
294
295 # Accept Python-like naming!
296 if type_name == 'str':
297 type_name = 'string'
298
299 children = []
300 if self.cur_token.kind == TokenKind.LBracket:
301 self._advance()
302 children.append(self._parse_type_expr())
303 if self.cur_token.kind == TokenKind.Comma:
304 self._advance()
305 children.append(self._parse_type_expr())
306
307 self._match(TokenKind.RBracket)
308
309 if type_name in ('List', 'Optional'):
310 if len(children) != 1:
311 raise ASDLSyntaxError(
312 'Expected 1 type param to {}'.format(type_name),
313 self.cur_token.lineno)
314 elif type_name == 'Dict':
315 if len(children) != 2:
316 raise ASDLSyntaxError(
317 'Expected 2 type params to {}'.format(type_name),
318 self.cur_token.lineno)
319 else:
320 if len(children) != 0:
321 raise ASDLSyntaxError(
322 'Expected zero type params to {}'.format(type_name),
323 self.cur_token.lineno)
324
325 if len(children):
326 typ = ast.ParameterizedType(type_name, children)
327 else:
328 typ = ast.NamedType(type_name)
329
330 if self.cur_token.kind == TokenKind.Asterisk:
331 # string* is equivalent to List[string]
332 typ = ast.ParameterizedType('List', [typ])
333 self._advance()
334
335 elif self.cur_token.kind == TokenKind.Question:
336 # string* is equivalent to Optional[string]
337 typ = ast.ParameterizedType('Optional', [typ])
338 self._advance()
339
340 return typ
341
342 def _parse_fields(self):
343 """
344 fields_inner: type_expr NAME ( ',' type_expr NAME )* ','?
345
346 fields : '(' fields_inner? ')'
347
348 Name Quantifier? should be changed to typename.
349 """
350 fields = []
351 self._match(TokenKind.LParen)
352 while self.cur_token.kind == TokenKind.Name:
353 typ = self._parse_type_expr()
354 field_name = self._match(TokenKind.Name)
355
356 fields.append(Field(typ, field_name))
357
358 if self.cur_token.kind == TokenKind.RParen:
359 break
360 elif self.cur_token.kind == TokenKind.Comma:
361 self._advance()
362
363 self._match(TokenKind.RParen)
364 return fields
365
366 def _parse_list(self):
367 """
368 list_inner: NAME ( ',' NAME )* ','?
369
370 list : '[' list_inner? ']'
371 """
372 generate = []
373 self._match(TokenKind.LBracket)
374 while self.cur_token.kind == TokenKind.Name:
375 name = self._match(TokenKind.Name)
376
377 generate.append(name)
378
379 if self.cur_token.kind == TokenKind.RBracket:
380 break
381 elif self.cur_token.kind == TokenKind.Comma:
382 self._advance()
383
384 self._match(TokenKind.RBracket)
385 return generate
386
387 def _parse_optional_generate(self):
388 """
389 generate : 'generate' list
390 """
391 if self._at_keyword('generate'):
392 self._advance()
393 return self._parse_list()
394 else:
395 return None
396
397 def _parse_product(self):
398 """Product: fields attributes?"""
399 return Product(self._parse_fields())
400
401 def _advance(self):
402 """Return current token; read next token into self.cur_token."""
403 cur_val = None if self.cur_token is None else self.cur_token.value
404 try:
405 self.cur_token = next(self._tokenizer)
406 except StopIteration:
407 self.cur_token = None
408 return cur_val
409
410 def _match(self, kind):
411 """The 'match' primitive of RD parsers.
412
413 * Verifies that the current token is of the given kind (kind can
414 be a tuple, in which the kind must match one of its members).
415 * Returns the value of the current token
416 * Reads in the next token
417
418 Args:
419 kind: A TokenKind, or a tuple of TokenKind
420 """
421 if self.cur_token.kind == kind:
422 value = self.cur_token.value
423 self._advance()
424 return value
425 else:
426 raise ASDLSyntaxError(
427 'Expected token {}, got {}'.format(_TOKEN_STR[kind],
428 self.cur_token.value),
429 self.cur_token.lineno)
430
431 def _at_keyword(self, keyword):
432 return (self.cur_token.kind == TokenKind.Keyword and
433 self.cur_token.value == keyword)
434
435
436_PRIMITIVE_TYPES = [
437 'string',
438 'int',
439 'uint16', # used for Token length - should we generalize this?
440 'BigInt',
441 'float',
442 'bool',
443
444 # 'any' is used for value.{BuiltinProc,BuiltinFunc}, to cast from class
445 # type
446 'any',
447]
448
449
450def _ResolveType(typ, type_lookup):
451 # type: (AST, Dict[str, Any]) -> None
452 """Recursively attach a 'resolved' field to AST nodes."""
453 if isinstance(typ, ast.NamedType):
454 if typ.name not in _PRIMITIVE_TYPES:
455 ast_node = type_lookup.get(typ.name)
456 if ast_node is None:
457 raise ASDLSyntaxError("Couldn't find type %r" % typ.name)
458 typ.resolved = ast_node
459
460 elif isinstance(typ, ast.ParameterizedType):
461 for child in typ.children:
462 _ResolveType(child, type_lookup)
463
464 if typ.type_name == 'Optional':
465 child = typ.children[0]
466 if isinstance(child, ast.NamedType):
467 if child.name in _PRIMITIVE_TYPES and child.name != 'string':
468 raise ASDLSyntaxError(
469 'Optional primitive type {} not allowed'.format(
470 child.name))
471
472 if child.resolved and isinstance(child.resolved,
473 ast.SimpleSum):
474 raise ASDLSyntaxError(
475 'Optional simple sum type {} not allowed'.format(
476 child.name))
477
478 else:
479 raise AssertionError()
480
481
482def _ResolveFields(field_ast_nodes, type_lookup):
483 """
484 Args:
485 type_lookup: Populated by name resolution
486 """
487 for field in field_ast_nodes:
488 _ResolveType(field.typ, type_lookup)
489
490
491def _ResolveModule(module, app_types):
492 """Name resolution for NamedType."""
493 # Types that fields are declared with: int, id, word_part, etc.
494 # Fields are NOT declared with Constructor names.
495 type_lookup = dict(app_types)
496
497 # Note: we don't actually load the type, and instead leave that to MyPy /
498 # C++. A consequence of this is TypeNameHeuristic().
499 for u in module.uses:
500 for type_name in u.type_names:
501 type_lookup[type_name] = u # type: ast.Use()
502
503 # NOTE: We need two passes because types can be mutually recursive, e.g.
504 # asdl/arith.asdl.
505
506 # First pass: collect declared types and make entries for them.
507 for d in module.dfns:
508 if d.name in type_lookup:
509 raise ASDLSyntaxError('Type %r was already defined' % d.name)
510
511 if isinstance(d, SubTypeDecl):
512 # e.g. CompoundWord < List[str]
513 type_lookup[d.name] = d.base_class
514 else:
515 # e.g. Token = (str a)
516 type_lookup[d.name] = d.value
517
518 # Second pass: add NamedType.resolved field
519 for d in module.dfns:
520 if isinstance(d, SubTypeDecl): # no fields
521 _ResolveType(d.base_class, type_lookup)
522 continue
523
524 ast_node = d.value
525 if isinstance(ast_node, ast.Product):
526 #log('fields %s', ast_node.fields)
527 _ResolveFields(ast_node.fields, type_lookup)
528 continue
529
530 if isinstance(ast_node, ast.Sum):
531 for cons in ast_node.types:
532 _ResolveFields(cons.fields, type_lookup)
533 continue
534
535 raise AssertionError(ast_node)
536
537
538def LoadSchema(f, app_types, verbose=False):
539 """Returns an AST for the schema."""
540 p = ASDLParser()
541 schema_ast = p.parse(f)
542 if verbose:
543 import sys
544 schema_ast.Print(sys.stdout, 0)
545
546 # Make sure all the names are valid
547 _ResolveModule(schema_ast, app_types)
548 return schema_ast