1 | /*
|
2 | * Fast lexer using re2c.
|
3 | */
|
4 |
|
5 | #include <stdarg.h> // va_list, etc.
|
6 | #include <stdio.h> // printf
|
7 |
|
8 | #include <Python.h>
|
9 |
|
10 | #include "_gen/frontend/id_kind.asdl_c.h"
|
11 | #include "_gen/frontend/types.asdl_c.h" // for lex_mode_e
|
12 | #include "_gen/frontend/match.re2c.h"
|
13 |
|
14 | // TODO: Should this be shared among all extensions?
|
15 | // Log messages to stderr.
|
16 | #if 0
|
17 | static void debug(const char* fmt, ...) {
|
18 | va_list args;
|
19 | va_start(args, fmt);
|
20 | vfprintf(stderr, fmt, args);
|
21 | va_end(args);
|
22 | fprintf(stderr, "\n");
|
23 | }
|
24 | #endif
|
25 |
|
26 | static PyObject *
|
27 | fastlex_MatchOshToken(PyObject *self, PyObject *args) {
|
28 | int lex_mode;
|
29 |
|
30 | unsigned char* line;
|
31 | int line_len;
|
32 |
|
33 | int start_pos;
|
34 | if (!PyArg_ParseTuple(args, "is#i",
|
35 | &lex_mode, &line, &line_len, &start_pos)) {
|
36 | return NULL;
|
37 | }
|
38 |
|
39 | // Bounds checking. It's OK to be called with a start_pos looking at \0.
|
40 | // Eol_Tok is inserted everywhere.
|
41 | if (start_pos > line_len) {
|
42 | PyErr_Format(PyExc_ValueError,
|
43 | "Invalid MatchOshToken call (start_pos = %d, line_len = %d)",
|
44 | start_pos, line_len);
|
45 | return NULL;
|
46 | }
|
47 |
|
48 | int id;
|
49 | int end_pos;
|
50 | MatchOshToken(lex_mode, line, line_len, start_pos, &id, &end_pos);
|
51 | return Py_BuildValue("(ii)", id, end_pos);
|
52 | }
|
53 |
|
54 | static PyObject *
|
55 | fastlex_MatchEchoToken(PyObject *self, PyObject *args) {
|
56 | unsigned char* line;
|
57 | int line_len;
|
58 |
|
59 | int start_pos;
|
60 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
61 | return NULL;
|
62 | }
|
63 |
|
64 | // Bounds checking.
|
65 | if (start_pos > line_len) {
|
66 | PyErr_Format(PyExc_ValueError,
|
67 | "Invalid MatchEchoToken call (start_pos = %d, line_len = %d)",
|
68 | start_pos, line_len);
|
69 | return NULL;
|
70 | }
|
71 |
|
72 | int id;
|
73 | int end_pos;
|
74 | MatchEchoToken(line, line_len, start_pos, &id, &end_pos);
|
75 | return Py_BuildValue("(ii)", id, end_pos);
|
76 | }
|
77 |
|
78 | static PyObject *
|
79 | fastlex_MatchGlobToken(PyObject *self, PyObject *args) {
|
80 | unsigned char* line;
|
81 | int line_len;
|
82 |
|
83 | int start_pos;
|
84 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
85 | return NULL;
|
86 | }
|
87 |
|
88 | // Bounds checking.
|
89 | if (start_pos > line_len) {
|
90 | PyErr_Format(PyExc_ValueError,
|
91 | "Invalid MatchGlobToken call (start_pos = %d, line_len = %d)",
|
92 | start_pos, line_len);
|
93 | return NULL;
|
94 | }
|
95 |
|
96 | int id;
|
97 | int end_pos;
|
98 | MatchGlobToken(line, line_len, start_pos, &id, &end_pos);
|
99 | return Py_BuildValue("(ii)", id, end_pos);
|
100 | }
|
101 |
|
102 | static PyObject *
|
103 | fastlex_MatchPS1Token(PyObject *self, PyObject *args) {
|
104 | unsigned char* line;
|
105 | int line_len;
|
106 |
|
107 | int start_pos;
|
108 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
109 | return NULL;
|
110 | }
|
111 |
|
112 | // Bounds checking.
|
113 | if (start_pos > line_len) {
|
114 | PyErr_Format(PyExc_ValueError,
|
115 | "Invalid MatchPS1Token call (start_pos = %d, line_len = %d)",
|
116 | start_pos, line_len);
|
117 | return NULL;
|
118 | }
|
119 |
|
120 | int id;
|
121 | int end_pos;
|
122 | MatchPS1Token(line, line_len, start_pos, &id, &end_pos);
|
123 | return Py_BuildValue("(ii)", id, end_pos);
|
124 | }
|
125 |
|
126 | static PyObject *
|
127 | fastlex_MatchHistoryToken(PyObject *self, PyObject *args) {
|
128 | unsigned char* line;
|
129 | int line_len;
|
130 |
|
131 | int start_pos;
|
132 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
133 | return NULL;
|
134 | }
|
135 |
|
136 | // Bounds checking.
|
137 | if (start_pos > line_len) {
|
138 | PyErr_Format(PyExc_ValueError,
|
139 | "Invalid MatchHistoryToken call (start_pos = %d, line_len = %d)",
|
140 | start_pos, line_len);
|
141 | return NULL;
|
142 | }
|
143 |
|
144 | int id;
|
145 | int end_pos;
|
146 | MatchHistoryToken(line, line_len, start_pos, &id, &end_pos);
|
147 | return Py_BuildValue("(ii)", id, end_pos);
|
148 | }
|
149 |
|
150 | static PyObject *
|
151 | fastlex_MatchBraceRangeToken(PyObject *self, PyObject *args) {
|
152 | unsigned char* line;
|
153 | int line_len;
|
154 |
|
155 | int start_pos;
|
156 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
157 | return NULL;
|
158 | }
|
159 |
|
160 | // Bounds checking.
|
161 | if (start_pos > line_len) {
|
162 | PyErr_Format(PyExc_ValueError,
|
163 | "Invalid MatchBraceRangeToken call (start_pos = %d, line_len = %d)",
|
164 | start_pos, line_len);
|
165 | return NULL;
|
166 | }
|
167 |
|
168 | int id;
|
169 | int end_pos;
|
170 | MatchBraceRangeToken(line, line_len, start_pos, &id, &end_pos);
|
171 | return Py_BuildValue("(ii)", id, end_pos);
|
172 | }
|
173 |
|
174 | static PyObject *
|
175 | fastlex_MatchJ8Token(PyObject *self, PyObject *args) {
|
176 | unsigned char* line;
|
177 | int line_len;
|
178 |
|
179 | int start_pos;
|
180 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
181 | return NULL;
|
182 | }
|
183 |
|
184 | // Bounds checking.
|
185 | if (start_pos > line_len) {
|
186 | PyErr_Format(PyExc_ValueError,
|
187 | "Invalid MatchJ8Token call (start_pos = %d, line_len = %d)",
|
188 | start_pos, line_len);
|
189 | return NULL;
|
190 | }
|
191 |
|
192 | int id;
|
193 | int end_pos;
|
194 | MatchJ8Token(line, line_len, start_pos, &id, &end_pos);
|
195 | return Py_BuildValue("(ii)", id, end_pos);
|
196 | }
|
197 |
|
198 | static PyObject *
|
199 | fastlex_MatchJ8LinesToken(PyObject *self, PyObject *args) {
|
200 | unsigned char* line;
|
201 | int line_len;
|
202 |
|
203 | int start_pos;
|
204 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
205 | return NULL;
|
206 | }
|
207 |
|
208 | // Bounds checking.
|
209 | if (start_pos > line_len) {
|
210 | PyErr_Format(PyExc_ValueError,
|
211 | "Invalid MatchJ8LinesToken call (start_pos = %d, line_len = %d)",
|
212 | start_pos, line_len);
|
213 | return NULL;
|
214 | }
|
215 |
|
216 | int id;
|
217 | int end_pos;
|
218 | MatchJ8LinesToken(line, line_len, start_pos, &id, &end_pos);
|
219 | return Py_BuildValue("(ii)", id, end_pos);
|
220 | }
|
221 |
|
222 |
|
223 | static PyObject *
|
224 | fastlex_MatchJ8StrToken(PyObject *self, PyObject *args) {
|
225 | unsigned char* line;
|
226 | int line_len;
|
227 |
|
228 | int start_pos;
|
229 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
230 | return NULL;
|
231 | }
|
232 |
|
233 | // Bounds checking.
|
234 | if (start_pos > line_len) {
|
235 | PyErr_Format(PyExc_ValueError,
|
236 | "Invalid MatchJ8StrToken call (start_pos = %d, line_len = %d)",
|
237 | start_pos, line_len);
|
238 | return NULL;
|
239 | }
|
240 |
|
241 | int id;
|
242 | int end_pos;
|
243 | MatchJ8StrToken(line, line_len, start_pos, &id, &end_pos);
|
244 | return Py_BuildValue("(ii)", id, end_pos);
|
245 | }
|
246 |
|
247 | static PyObject *
|
248 | fastlex_MatchJsonStrToken(PyObject *self, PyObject *args) {
|
249 | unsigned char* line;
|
250 | int line_len;
|
251 |
|
252 | int start_pos;
|
253 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
254 | return NULL;
|
255 | }
|
256 |
|
257 | // Bounds checking.
|
258 | if (start_pos > line_len) {
|
259 | PyErr_Format(PyExc_ValueError,
|
260 | "Invalid MatchJsonStrToken call (start_pos = %d, line_len = %d)",
|
261 | start_pos, line_len);
|
262 | return NULL;
|
263 | }
|
264 |
|
265 | int id;
|
266 | int end_pos;
|
267 | MatchJsonStrToken(line, line_len, start_pos, &id, &end_pos);
|
268 | return Py_BuildValue("(ii)", id, end_pos);
|
269 | }
|
270 |
|
271 | static PyObject *
|
272 | fastlex_MatchShNumberToken(PyObject *self, PyObject *args) {
|
273 | unsigned char* line;
|
274 | int line_len;
|
275 |
|
276 | int start_pos;
|
277 | if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
|
278 | return NULL;
|
279 | }
|
280 |
|
281 | // Bounds checking.
|
282 | if (start_pos > line_len) {
|
283 | PyErr_Format(PyExc_ValueError,
|
284 | "Invalid MatchShNumberToken call (start_pos = %d, line_len = %d)",
|
285 | start_pos, line_len);
|
286 | return NULL;
|
287 | }
|
288 |
|
289 | int id;
|
290 | int end_pos;
|
291 | MatchShNumberToken(line, line_len, start_pos, &id, &end_pos);
|
292 | return Py_BuildValue("(ii)", id, end_pos);
|
293 | }
|
294 |
|
295 | static PyObject *
|
296 | fastlex_IsValidVarName(PyObject *self, PyObject *args) {
|
297 | unsigned char *name;
|
298 | int len;
|
299 |
|
300 | if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
|
301 | return NULL;
|
302 | }
|
303 | return PyBool_FromLong(IsValidVarName(name, len));
|
304 | }
|
305 |
|
306 | static PyObject *
|
307 | fastlex_ShouldHijack(PyObject *self, PyObject *args) {
|
308 | unsigned char *name;
|
309 | int len;
|
310 |
|
311 | if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
|
312 | return NULL;
|
313 | }
|
314 | return PyBool_FromLong(ShouldHijack(name, len));
|
315 | }
|
316 |
|
317 | static PyObject *
|
318 | fastlex_LooksLikeInteger(PyObject *self, PyObject *args) {
|
319 | unsigned char *name;
|
320 | int len;
|
321 |
|
322 | if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
|
323 | return NULL;
|
324 | }
|
325 | return PyBool_FromLong(LooksLikeInteger(name, len));
|
326 | }
|
327 |
|
328 | static PyObject *
|
329 | fastlex_LooksLikeYshInt(PyObject *self, PyObject *args) {
|
330 | unsigned char *name;
|
331 | int len;
|
332 |
|
333 | if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
|
334 | return NULL;
|
335 | }
|
336 | return PyBool_FromLong(LooksLikeYshInt(name, len));
|
337 | }
|
338 |
|
339 | static PyObject *
|
340 | fastlex_LooksLikeYshFloat(PyObject *self, PyObject *args) {
|
341 | unsigned char *name;
|
342 | int len;
|
343 |
|
344 | if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
|
345 | return NULL;
|
346 | }
|
347 | return PyBool_FromLong(LooksLikeYshFloat(name, len));
|
348 | }
|
349 |
|
350 | #ifdef OVM_MAIN
|
351 | #include "pyext/fastlex.c/methods.def"
|
352 | #else
|
353 | static PyMethodDef methods[] = {
|
354 | {"MatchOshToken", fastlex_MatchOshToken, METH_VARARGS,
|
355 | "(lexer mode, line, start_pos) -> (id, end_pos)."},
|
356 | {"MatchEchoToken", fastlex_MatchEchoToken, METH_VARARGS,
|
357 | "(line, start_pos) -> (id, end_pos)."},
|
358 | {"MatchGlobToken", fastlex_MatchGlobToken, METH_VARARGS,
|
359 | "(line, start_pos) -> (id, end_pos)."},
|
360 | {"MatchPS1Token", fastlex_MatchPS1Token, METH_VARARGS,
|
361 | "(line, start_pos) -> (id, end_pos)."},
|
362 | {"MatchHistoryToken", fastlex_MatchHistoryToken, METH_VARARGS,
|
363 | "(line, start_pos) -> (id, end_pos)."},
|
364 | {"MatchBraceRangeToken", fastlex_MatchBraceRangeToken, METH_VARARGS,
|
365 | "(line, start_pos) -> (id, end_pos)."},
|
366 | {"MatchJ8Token", fastlex_MatchJ8Token, METH_VARARGS,
|
367 | "(line, start_pos) -> (id, end_pos)."},
|
368 | {"MatchJ8LinesToken", fastlex_MatchJ8LinesToken, METH_VARARGS,
|
369 | "(line, start_pos) -> (id, end_pos)."},
|
370 | {"MatchJ8StrToken", fastlex_MatchJ8StrToken, METH_VARARGS,
|
371 | "(line, start_pos) -> (id, end_pos)."},
|
372 | {"MatchJsonStrToken", fastlex_MatchJsonStrToken, METH_VARARGS,
|
373 | "(line, start_pos) -> (id, end_pos)."},
|
374 | {"MatchShNumberToken", fastlex_MatchShNumberToken, METH_VARARGS,
|
375 | "(line, start_pos) -> (id, end_pos)."},
|
376 | {"IsValidVarName", fastlex_IsValidVarName, METH_VARARGS,
|
377 | "Is it a valid var name?"},
|
378 | // Should we hijack this shebang line?
|
379 | {"ShouldHijack", fastlex_ShouldHijack, METH_VARARGS, ""},
|
380 | {"LooksLikeInteger", fastlex_LooksLikeInteger, METH_VARARGS, ""},
|
381 | {"LooksLikeYshInt", fastlex_LooksLikeYshInt, METH_VARARGS, ""},
|
382 | {"LooksLikeYshFloat", fastlex_LooksLikeYshFloat, METH_VARARGS, ""},
|
383 | {NULL, NULL},
|
384 | };
|
385 | #endif
|
386 |
|
387 | void initfastlex(void) {
|
388 | Py_InitModule("fastlex", methods);
|
389 | }
|