OILS / asdl / examples / tdop.py View on Github | oilshell.org

205 lines, 93 significant
1"""
2tdop.py
3"""
4
5from _devbuild.gen.typed_arith_asdl import arith_expr_t
6from typing import (Dict, List, Callable, Optional, Iterator, Tuple, NoReturn)
7from typing import TYPE_CHECKING
8
9
10class ParseError(Exception):
11 pass
12
13
14#
15# Default parsing functions give errors
16#
17
18
19def NullError(p, token, bp):
20 # type: (Parser, Token, int) -> NoReturn
21 raise ParseError("%s can't be used in prefix position" % token)
22
23
24def LeftError(p, token, left, rbp):
25 # type: (Parser, Token, arith_expr_t, int) -> NoReturn
26 # Hm is this not called because of binding power?
27 raise ParseError("%s can't be used in infix position" % token)
28
29
30#
31# Input
32#
33
34
35class Token(object):
36
37 def __init__(self, type, val, loc=None):
38 # type: (str, str, Optional[Tuple[int, int]]) -> None
39 self.type = type
40 self.val = val
41
42 def __repr__(self):
43 # type: () -> str
44 return '<Token %s %s>' % (self.type, self.val)
45
46
47#
48# Parser definition
49#
50
51
52class LeftInfo(object):
53 """Row for operator.
54
55 In C++ this should be a big array.
56 """
57
58 def __init__(self, led=None, lbp=0, rbp=0):
59 # type: (Optional[LeftFunc], int, int) -> None
60 self.led = led or LeftError
61 self.lbp = lbp
62 self.rbp = rbp
63
64
65class NullInfo(object):
66 """Row for operator.
67
68 In C++ this should be a big array.
69 """
70
71 def __init__(self, nud=None, bp=0):
72 # type: (Optional[NullFunc], int) -> None
73 self.nud = nud or NullError
74 self.bp = bp
75
76
77class ParserSpec(object):
78 """Specification for a TDOP parser."""
79
80 def __init__(self):
81 # type: () -> None
82 self.null_lookup = {} # type: Dict[str, NullInfo]
83 self.left_lookup = {} # type: Dict[str, LeftInfo]
84
85 def Null(self, bp, nud, tokens):
86 # type: (int, NullFunc, List[str]) -> None
87 """Register a token that doesn't take anything on the left.
88
89 Examples: constant, prefix operator, error.
90 """
91 for token in tokens:
92 self.null_lookup[token] = NullInfo(nud=nud, bp=bp)
93 if token not in self.left_lookup:
94 self.left_lookup[token] = LeftInfo() # error
95
96 def _RegisterLed(self, lbp, rbp, led, tokens):
97 # type: (int, int, LeftFunc, List[str]) -> None
98 for token in tokens:
99 if token not in self.null_lookup:
100 self.null_lookup[token] = NullInfo(NullError)
101 self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
102
103 def Left(self, bp, led, tokens):
104 # type: (int, LeftFunc, List[str]) -> None
105 """Register a token that takes an expression on the left."""
106 self._RegisterLed(bp, bp, led, tokens)
107
108 def LeftRightAssoc(self, bp, led, tokens):
109 # type: (int, LeftFunc, List[str]) -> None
110 """Register a right associative operator."""
111 self._RegisterLed(bp, bp - 1, led, tokens)
112
113 def LookupNull(self, token):
114 # type: (str) -> NullInfo
115 """Get the parsing function and precedence for a null position token."""
116 try:
117 nud = self.null_lookup[token]
118 except KeyError:
119 raise ParseError('Unexpected token %r' % token)
120 return nud
121
122 def LookupLeft(self, token):
123 # type: (str) -> LeftInfo
124 """Get the parsing function and precedence for a left position token."""
125 try:
126 led = self.left_lookup[token]
127 except KeyError:
128 raise ParseError('Unexpected token %r' % token)
129 return led
130
131
132EOF_TOKEN = Token('eof', 'eof')
133
134
135class Parser(object):
136 """Recursive TDOP parser."""
137
138 def __init__(self, spec, lexer):
139 # type: (ParserSpec, Iterator[Token]) -> None
140 self.spec = spec
141 self.lexer = lexer # iterable
142 self.token = Token('undefined', '') # current token
143
144 def AtToken(self, token_type):
145 # type: (str) -> bool
146 """Test if we are looking at a token."""
147 return self.token.type == token_type
148
149 def Next(self):
150 # type: () -> None
151 """Move to the next token."""
152 try:
153 t = self.lexer.next()
154 except StopIteration:
155 t = EOF_TOKEN
156 self.token = t
157
158 def Eat(self, val):
159 # type: (str) -> None
160 """Assert the value of the current token, then move to the next token."""
161 if val and not self.AtToken(val):
162 raise ParseError('expected %s, got %s' % (val, self.token))
163 self.Next()
164
165 def ParseUntil(self, rbp):
166 # type: (int) -> arith_expr_t
167 """
168 Parse to the right, eating tokens until we encounter a token with binding
169 power LESS THAN OR EQUAL TO rbp.
170 """
171 if self.AtToken('eof'):
172 raise ParseError('Unexpected end of input')
173
174 t = self.token
175 self.Next() # skip over the token, e.g. ! ~ + -
176
177 null_info = self.spec.LookupNull(t.type)
178 node = null_info.nud(self, t, null_info.bp)
179
180 while True:
181 t = self.token
182 left_info = self.spec.LookupLeft(t.type)
183
184 # Examples:
185 # If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
186 # If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
187 # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
188 if rbp >= left_info.lbp:
189 break
190 self.Next() # skip over the token, e.g. / *
191
192 node = left_info.led(self, t, node, left_info.rbp)
193
194 return node
195
196 def Parse(self):
197 # type: () -> arith_expr_t
198 self.Next()
199 return self.ParseUntil(0)
200
201
202# Must define these aliases AFTER Parser and Token are defined.
203if TYPE_CHECKING:
204 NullFunc = Callable[[Parser, Token, int], arith_expr_t]
205 LeftFunc = Callable[[Parser, Token, arith_expr_t, int], arith_expr_t]