OILS / asdl / examples / tdop_lexer.py View on Github | oilshell.org

44 lines, 26 significant
1"""
2tdop_lexer.py
3"""
4from __future__ import print_function
5
6import re
7from typing import Iterator, Tuple, cast, TYPE_CHECKING
8
9from asdl.examples.tdop import Token
10
11if TYPE_CHECKING:
12 TupleStr4 = Tuple[str, str, str, str]
13else:
14 TupleStr4 = None # Using runtime stub
15
16#
17# Using the pattern here: http://effbot.org/zone/xml-scanner.htm
18#
19
20# NOTE: () and [] need to be on their own so (-1+2) works
21TOKEN_RE = re.compile(
22 r"""
23\s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) )
24""", re.VERBOSE)
25
26
27def Tokenize(s):
28 # type: (str) -> Iterator[Token]
29 for item in TOKEN_RE.findall(s):
30 # The type checker can't know the true type of item!
31 item = cast(TupleStr4, item)
32 if item[0]:
33 typ = 'number'
34 val = item[0]
35 elif item[1]:
36 typ = 'name'
37 val = item[1]
38 elif item[2]:
39 typ = item[2]
40 val = item[2]
41 elif item[3]:
42 typ = item[3]
43 val = item[3]
44 yield Token(typ, val)