OILS / opy / _regtest / src / asdl / tdop.py View on Github | oils.pub

230 lines, 117 significant
1#!/usr/bin/env python
2"""
3tdop.py
4"""
5
6import re
7
8
9class ParseError(Exception):
10 pass
11
12
13#
14# Default parsing functions give errors
15#
16
17def NullError(p, token, bp):
18 raise ParseError("%s can't be used in prefix position" % token)
19
20
21def LeftError(p, token, left, rbp):
22 # Hm is this not called because of binding power?
23 raise ParseError("%s can't be used in infix position" % token)
24
25
26#
27# Input
28#
29
30class Token:
31 def __init__(self, type, val, loc=None):
32 self.type = type
33 self.val = val
34
35 def __repr__(self):
36 return '<Token %s %s>' % (self.type, self.val)
37
38
39#
40# Using the pattern here: http://effbot.org/zone/xml-scanner.htm
41#
42
43# NOTE: () and [] need to be on their own so (-1+2) works
44TOKEN_RE = re.compile("""
45\s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) )
46""", re.VERBOSE)
47
48def Tokenize(s):
49 for item in TOKEN_RE.findall(s):
50 if item[0]:
51 typ = 'number'
52 val = int(item[0])
53 elif item[1]:
54 typ = 'name'
55 val = item[1]
56 elif item[2]:
57 typ = item[2]
58 val = item[2]
59 elif item[3]:
60 typ = item[3]
61 val = item[3]
62 yield Token(typ, val, loc=(0, 0))
63
64
65#
66# Simple and Composite AST nodes
67#
68
69class Node(object):
70 def __init__(self, token):
71 """
72 Args:
73 type: token type (operator, etc.)
74 val: token val, only important for number and string
75 """
76 self.token = token
77
78 def __repr__(self):
79 return str(self.token.val)
80
81
82class CompositeNode(Node):
83 def __init__(self, token, children):
84 """
85 Args:
86 type: token type (operator, etc.)
87 """
88 Node.__init__(self, token)
89 self.children = children
90
91 def __repr__(self):
92 args = ''.join([" " + repr(c) for c in self.children])
93 return "(" + self.token.type + args + ")"
94
95
96#
97# Parser definition
98#
99
100class LeftInfo(object):
101 """Row for operator.
102
103 In C++ this should be a big array.
104 """
105 def __init__(self, led=None, lbp=0, rbp=0):
106 self.led = led or LeftError
107 self.lbp = lbp
108 self.rbp = rbp
109
110
111class NullInfo(object):
112 """Row for operator.
113
114 In C++ this should be a big array.
115 """
116 def __init__(self, nud=None, bp=0):
117 self.nud = nud or NullError
118 self.bp = bp
119
120
121class ParserSpec(object):
122 """Specification for a TDOP parser."""
123
124 def __init__(self):
125 self.null_lookup = {}
126 self.left_lookup = {}
127
128 def Null(self, bp, nud, tokens):
129 """Register a token that doesn't take anything on the left.
130
131 Examples: constant, prefix operator, error.
132 """
133 for token in tokens:
134 self.null_lookup[token] = NullInfo(nud=nud, bp=bp)
135 if token not in self.left_lookup:
136 self.left_lookup[token] = LeftInfo() # error
137
138 def _RegisterLed(self, lbp, rbp, led, tokens):
139 for token in tokens:
140 if token not in self.null_lookup:
141 self.null_lookup[token] = NullInfo(NullError)
142 self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
143
144 def Left(self, bp, led, tokens):
145 """Register a token that takes an expression on the left."""
146 self._RegisterLed(bp, bp, led, tokens)
147
148 def LeftRightAssoc(self, bp, led, tokens):
149 """Register a right associative operator."""
150 self._RegisterLed(bp, bp-1, led, tokens)
151
152 def LookupNull(self, token):
153 """Get the parsing function and precedence for a null position token."""
154 try:
155 nud = self.null_lookup[token]
156 except KeyError:
157 raise ParseError('Unexpected token %r' % token)
158 return nud
159
160 def LookupLeft(self, token):
161 """Get the parsing function and precedence for a left position token."""
162 try:
163 led = self.left_lookup[token]
164 except KeyError:
165 raise ParseError('Unexpected token %r' % token)
166 return led
167
168
169EOF_TOKEN = Token('eof', 'eof')
170
171
172class Parser(object):
173 """Recursive TDOP parser."""
174
175 def __init__(self, spec, lexer):
176 self.spec = spec
177 self.lexer = lexer # iterable
178 self.token = None # current token
179
180 def AtToken(self, token_type):
181 """Test if we are looking at a token."""
182 return self.token.type == token_type
183
184 def Next(self):
185 """Move to the next token."""
186 try:
187 t = self.lexer.next()
188 except StopIteration:
189 t = EOF_TOKEN
190 self.token = t
191
192 def Eat(self, val):
193 """Assert the value of the current token, then move to the next token."""
194 if val and not self.AtToken(val):
195 raise ParseError('expected %s, got %s' % (val, self.token))
196 self.Next()
197
198 def ParseUntil(self, rbp):
199 """
200 Parse to the right, eating tokens until we encounter a token with binding
201 power LESS THAN OR EQUAL TO rbp.
202 """
203 if self.AtToken('eof'):
204 raise ParseError('Unexpected end of input')
205
206 t = self.token
207 self.Next() # skip over the token, e.g. ! ~ + -
208
209 null_info = self.spec.LookupNull(t.type)
210 node = null_info.nud(self, t, null_info.bp)
211
212 while True:
213 t = self.token
214 left_info = self.spec.LookupLeft(t.type)
215
216 # Examples:
217 # If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
218 # If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
219 # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
220 if rbp >= left_info.lbp:
221 break
222 self.Next() # skip over the token, e.g. / *
223
224 node = left_info.led(self, t, node, left_info.rbp)
225
226 return node
227
228 def Parse(self):
229 self.Next()
230 return self.ParseUntil(0)