OILS / tools / find / tokenizer.py View on Github | oils.pub

136 lines, 91 significant
1#!/usr/bin/env python2
2# Copyright 2019 Wilke Schwiedop. All rights reserved.
3# Copyright 2019 Andy Chu. All rights reserved.
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9"""
10tokenizer.py: Tokenizer for find.
11"""
12
13_ops = [
14 ('!', 'BANG'),
15 ('(', 'LPAR'),
16 (')', 'RPAR'),
17 ('-o', 'OR'),
18 ('-a', 'AND'),
19 (',', 'COMMA'),
20 (';', 'SEMI'),
21 ('+', 'PLUS'),
22
23 ('-true', 'TRUE'),
24 ('-false', 'FALSE'),
25
26 ('-name', 'NAME'),
27 ('-iname', 'INAME'),
28
29 ('-lname', 'LNAME'),
30 ('-ilname', 'ILNAME'),
31
32 ('-path', 'PATH'),
33 ('-ipath', 'IPATH'),
34
35 ('-regex', 'REGEX'),
36 ('-iregex', 'IREGEX'),
37
38 ('-executable', 'EXECUTABLE'),
39 ('-readable', 'READABLE'),
40 ('-writable', 'WRITABLE'),
41
42 ('-empty', 'EMPTY'),
43
44 ('-size', 'SIZE'),
45 ('-type', 'TYPE'),
46 ('-xtype', 'XTYPE'),
47 ('-perm', 'PERM'),
48
49 ('-group', 'GROUP'),
50 ('-user', 'USER'),
51 ('-gid', 'GID'),
52 ('-uid', 'UID'),
53 ('-nogroup', 'NOGROUP'),
54 ('-nouser', 'NOUSER'),
55
56 ('-amin', 'AMIN'),
57 ('-anewer', 'ANEWER'),
58 ('-atime', 'ATIME'),
59 ('-cmin', 'CMIN'),
60 ('-cnewer', 'CNEWER'),
61 ('-ctime', 'CTIME'),
62 ('-mmin', 'MMIN'),
63 # note -newer not -mnewer
64 ('-newer', 'MNEWER'),
65 ('-mtime', 'MTIME'),
66 ('-newerXY', 'NEWERXY'),
67
68 ('-delete', 'DELETE'),
69 ('-prune', 'PRUNE'),
70 ('-quit', 'QUIT'),
71
72 ('-print', 'PRINT'),
73 ('-print0', 'PRINT0'),
74 ('-printf', 'PRINTF'),
75 ('-ls', 'LS'),
76 ('-fprint', 'FPRINT'),
77 ('-fprint0', 'FPRINT0'),
78 ('-fprintf', 'FPRINTF'),
79 ('-fls', 'FLS'),
80
81 ('-exec', 'EXEC'),
82 ('-execdir', 'EXECDIR'),
83 ('-ok', 'OK'),
84 ('-okdir', 'OKDIR'),
85]
86
87# start=100 is pgen voodoo, don't touch
88opmap = dict((op, i) for i, (op, name) in enumerate(_ops, start=100))
89tok_name = dict((i, name) for i, (op, name) in enumerate(_ops, start=100))
90tok_name[0] = 'ENDMARKER'
91tok_name[1] = 'STRING'
92#tok_name[len(tok_name)] = 'N_TOKENS'
93tok_name[256] = 'NT_OFFSET'
94
95import sys
96this_module = sys.modules[__name__]
97for i, name in tok_name.items():
98 setattr(this_module, name, i)
99
100class TokenDef(object):
101 def GetTerminalNum(self, label):
102 """ e.g. NAME -> 1 """
103 itoken = getattr(this_module, label, None)
104 assert isinstance(itoken, int), label
105 assert itoken in tok_name, label
106 return itoken
107
108 def GetOpNum(self, value):
109 """ e.g '(' -> LPAR """
110 return opmap[value]
111
112 def GetKeywordNum(self, value):
113 return None
114
115
116def tokenize(argv):
117 start = end = (1, 0) # dummy location data
118 line_text = ''
119 for a in argv:
120 #log('tok = %r', a)
121 typ = opmap.get(a, STRING)
122# print (typ, a, start, end, line_text)
123 yield (typ, a, start, end, line_text)
124 yield (ENDMARKER, '', start, end, line_text)
125
126def is_terminal(type):
127 # type (int) -> bool
128 return type < NT_OFFSET
129
130def is_nonterminal(type):
131 # type (int) -> bool
132 return type >= NT_OFFSET
133
134def is_eof(type):
135 # type (int) -> bool
136 return type == ENDMARKER