1 | """
|
2 | tdop.py
|
3 | """
|
4 |
|
5 | from _devbuild.gen.typed_arith_asdl import arith_expr_t
|
6 | from typing import (Dict, List, Callable, Optional, Iterator, Tuple, NoReturn)
|
7 | from typing import TYPE_CHECKING
|
8 |
|
9 |
|
10 | class ParseError(Exception):
|
11 | pass
|
12 |
|
13 |
|
14 | #
|
15 | # Default parsing functions give errors
|
16 | #
|
17 |
|
18 |
|
19 | def NullError(p, token, bp):
|
20 | # type: (Parser, Token, int) -> NoReturn
|
21 | raise ParseError("%s can't be used in prefix position" % token)
|
22 |
|
23 |
|
24 | def LeftError(p, token, left, rbp):
|
25 | # type: (Parser, Token, arith_expr_t, int) -> NoReturn
|
26 | # Hm is this not called because of binding power?
|
27 | raise ParseError("%s can't be used in infix position" % token)
|
28 |
|
29 |
|
30 | #
|
31 | # Input
|
32 | #
|
33 |
|
34 |
|
35 | class Token(object):
|
36 |
|
37 | def __init__(self, type, val, loc=None):
|
38 | # type: (str, str, Optional[Tuple[int, int]]) -> None
|
39 | self.type = type
|
40 | self.val = val
|
41 |
|
42 | def __repr__(self):
|
43 | # type: () -> str
|
44 | return '<Token %s %s>' % (self.type, self.val)
|
45 |
|
46 |
|
47 | #
|
48 | # Parser definition
|
49 | #
|
50 |
|
51 |
|
52 | class LeftInfo(object):
|
53 | """Row for operator.
|
54 |
|
55 | In C++ this should be a big array.
|
56 | """
|
57 |
|
58 | def __init__(self, led=None, lbp=0, rbp=0):
|
59 | # type: (Optional[LeftFunc], int, int) -> None
|
60 | self.led = led or LeftError
|
61 | self.lbp = lbp
|
62 | self.rbp = rbp
|
63 |
|
64 |
|
65 | class NullInfo(object):
|
66 | """Row for operator.
|
67 |
|
68 | In C++ this should be a big array.
|
69 | """
|
70 |
|
71 | def __init__(self, nud=None, bp=0):
|
72 | # type: (Optional[NullFunc], int) -> None
|
73 | self.nud = nud or NullError
|
74 | self.bp = bp
|
75 |
|
76 |
|
77 | class ParserSpec(object):
|
78 | """Specification for a TDOP parser."""
|
79 |
|
80 | def __init__(self):
|
81 | # type: () -> None
|
82 | self.null_lookup = {} # type: Dict[str, NullInfo]
|
83 | self.left_lookup = {} # type: Dict[str, LeftInfo]
|
84 |
|
85 | def Null(self, bp, nud, tokens):
|
86 | # type: (int, NullFunc, List[str]) -> None
|
87 | """Register a token that doesn't take anything on the left.
|
88 |
|
89 | Examples: constant, prefix operator, error.
|
90 | """
|
91 | for token in tokens:
|
92 | self.null_lookup[token] = NullInfo(nud=nud, bp=bp)
|
93 | if token not in self.left_lookup:
|
94 | self.left_lookup[token] = LeftInfo() # error
|
95 |
|
96 | def _RegisterLed(self, lbp, rbp, led, tokens):
|
97 | # type: (int, int, LeftFunc, List[str]) -> None
|
98 | for token in tokens:
|
99 | if token not in self.null_lookup:
|
100 | self.null_lookup[token] = NullInfo(NullError)
|
101 | self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
|
102 |
|
103 | def Left(self, bp, led, tokens):
|
104 | # type: (int, LeftFunc, List[str]) -> None
|
105 | """Register a token that takes an expression on the left."""
|
106 | self._RegisterLed(bp, bp, led, tokens)
|
107 |
|
108 | def LeftRightAssoc(self, bp, led, tokens):
|
109 | # type: (int, LeftFunc, List[str]) -> None
|
110 | """Register a right associative operator."""
|
111 | self._RegisterLed(bp, bp - 1, led, tokens)
|
112 |
|
113 | def LookupNull(self, token):
|
114 | # type: (str) -> NullInfo
|
115 | """Get the parsing function and precedence for a null position token."""
|
116 | try:
|
117 | nud = self.null_lookup[token]
|
118 | except KeyError:
|
119 | raise ParseError('Unexpected token %r' % token)
|
120 | return nud
|
121 |
|
122 | def LookupLeft(self, token):
|
123 | # type: (str) -> LeftInfo
|
124 | """Get the parsing function and precedence for a left position token."""
|
125 | try:
|
126 | led = self.left_lookup[token]
|
127 | except KeyError:
|
128 | raise ParseError('Unexpected token %r' % token)
|
129 | return led
|
130 |
|
131 |
|
132 | EOF_TOKEN = Token('eof', 'eof')
|
133 |
|
134 |
|
135 | class Parser(object):
|
136 | """Recursive TDOP parser."""
|
137 |
|
138 | def __init__(self, spec, lexer):
|
139 | # type: (ParserSpec, Iterator[Token]) -> None
|
140 | self.spec = spec
|
141 | self.lexer = lexer # iterable
|
142 | self.token = Token('undefined', '') # current token
|
143 |
|
144 | def AtToken(self, token_type):
|
145 | # type: (str) -> bool
|
146 | """Test if we are looking at a token."""
|
147 | return self.token.type == token_type
|
148 |
|
149 | def Next(self):
|
150 | # type: () -> None
|
151 | """Move to the next token."""
|
152 | try:
|
153 | t = self.lexer.next()
|
154 | except StopIteration:
|
155 | t = EOF_TOKEN
|
156 | self.token = t
|
157 |
|
158 | def Eat(self, val):
|
159 | # type: (str) -> None
|
160 | """Assert the value of the current token, then move to the next token."""
|
161 | if val and not self.AtToken(val):
|
162 | raise ParseError('expected %s, got %s' % (val, self.token))
|
163 | self.Next()
|
164 |
|
165 | def ParseUntil(self, rbp):
|
166 | # type: (int) -> arith_expr_t
|
167 | """
|
168 | Parse to the right, eating tokens until we encounter a token with binding
|
169 | power LESS THAN OR EQUAL TO rbp.
|
170 | """
|
171 | if self.AtToken('eof'):
|
172 | raise ParseError('Unexpected end of input')
|
173 |
|
174 | t = self.token
|
175 | self.Next() # skip over the token, e.g. ! ~ + -
|
176 |
|
177 | null_info = self.spec.LookupNull(t.type)
|
178 | node = null_info.nud(self, t, null_info.bp)
|
179 |
|
180 | while True:
|
181 | t = self.token
|
182 | left_info = self.spec.LookupLeft(t.type)
|
183 |
|
184 | # Examples:
|
185 | # If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
|
186 | # If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
|
187 | # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
|
188 | if rbp >= left_info.lbp:
|
189 | break
|
190 | self.Next() # skip over the token, e.g. / *
|
191 |
|
192 | node = left_info.led(self, t, node, left_info.rbp)
|
193 |
|
194 | return node
|
195 |
|
196 | def Parse(self):
|
197 | # type: () -> arith_expr_t
|
198 | self.Next()
|
199 | return self.ParseUntil(0)
|
200 |
|
201 |
|
202 | # Must define these aliases AFTER Parser and Token are defined.
|
203 | if TYPE_CHECKING:
|
204 | NullFunc = Callable[[Parser, Token, int], arith_expr_t]
|
205 | LeftFunc = Callable[[Parser, Token, arith_expr_t, int], arith_expr_t]
|