OILS / pyext / libc.c View on Github | oils.pub

422 lines, 246 significant
1/*
2 * Python interface to libc functions.
3 */
4
5// - Enable GNU extensions in fnmatch.h for extended glob.
6// - It's also apparently needed for wchar.h in combination with Python.
7// https://github.com/python-pillow/Pillow/issues/1850
8// - It's currently hard-coded in pyconfig.h.
9#define _GNU_SOURCE 1
10
11#include <stdarg.h> // va_list, etc.
12#include <stdio.h> // printf
13#include <limits.h>
14#include <wchar.h>
15#include <stdlib.h>
16#include <sys/ioctl.h>
17#include <locale.h>
18#include <fnmatch.h>
19#include <glob.h>
20#include <regex.h>
21
22#include <Python.h>
23
24#include "_build/detected-config.h"
25
26// Log messages to stderr.
27static void debug(const char* fmt, ...) {
28#ifdef LIBC_VERBOSE
29 va_list args;
30 va_start(args, fmt);
31 vfprintf(stderr, fmt, args);
32 va_end(args);
33 fprintf(stderr, "\n");
34#endif
35}
36
37static PyObject *
38func_realpath(PyObject *self, PyObject *args) {
39 const char *symlink;
40
41 if (!PyArg_ParseTuple(args, "s", &symlink)) {
42 return NULL;
43 }
44 char target[PATH_MAX + 1];
45 char *status = realpath(symlink, target);
46
47 // TODO: Throw exception like IOError here
48 if (status == NULL) {
49 debug("error from realpath()");
50 Py_RETURN_NONE;
51 }
52
53 return PyString_FromString(target);
54}
55
56static PyObject *
57func_fnmatch(PyObject *self, PyObject *args) {
58 const char *pattern;
59 const char *str;
60 int flags = 0;
61
62 if (!PyArg_ParseTuple(args, "ss|i", &pattern, &str, &flags)) {
63 return NULL;
64 }
65
66#ifdef FNM_EXTMATCH
67 flags |= FNM_EXTMATCH;
68#endif
69
70 int ret = fnmatch(pattern, str, flags);
71
72 switch (ret) {
73 case 0:
74 debug("matched: %s", str);
75 return PyLong_FromLong(1);
76 break;
77 case FNM_NOMATCH:
78 debug("no match: %s", str);
79 return PyLong_FromLong(0);
80 break;
81 default:
82 debug("other error: %s", str);
83 return PyLong_FromLong(-1);
84 break;
85 }
86}
87
88// error callback to glob()
89//
90// Disabled because of spurious errors. For example, sed -i s/.*// (without
91// quotes) is OK, but it would be treated as a glob, and prints an error if the
92// directory 's' doesn't exist.
93//
94// Bash does its own globbing -- it doesn't use libc. Likewise, I think dash
95// and mksh do their own globbing.
96
97int globerr(const char *path, int errno_) {
98 fprintf(stderr, "globerr: %s: %s\n", path, strerror(errno_));
99 return 0; // let glob() keep going
100}
101
102static PyObject *
103func_glob(PyObject *self, PyObject *args) {
104 const char* pattern;
105 int flags = 0;
106 if (!PyArg_ParseTuple(args, "s|i", &pattern, &flags)) {
107 return NULL;
108 }
109
110 glob_t results;
111 // Hm, it's weird that the first one can't be called with GLOB_APPEND. You
112 // get a segfault.
113 // int flags = GLOB_APPEND;
114 //flags |= GLOB_NOMAGIC;
115 int ret = glob(pattern, flags, NULL, &results);
116
117 const char *err_str = NULL;
118 switch (ret) {
119 case 0: // no error
120 break;
121 case GLOB_ABORTED:
122 err_str = "read error";
123 break;
124 case GLOB_NOMATCH:
125 // No error, because not matching isn't necessarily a problem.
126 // NOTE: This can be turned on to log overaggressive calls to glob().
127 //err_str = "nothing matched";
128 break;
129 case GLOB_NOSPACE:
130 err_str = "no dynamic memory";
131 break;
132 default:
133 err_str = "unknown problem";
134 break;
135 }
136 if (err_str) {
137 //fprintf(stderr, "func_glob: %s: %s\n", pattern, err_str);
138 PyErr_SetString(PyExc_RuntimeError, err_str);
139 return NULL;
140 }
141
142 // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak
143 size_t n = results.gl_pathc;
144 PyObject* matches = PyList_New(n);
145
146 // Print array of results
147 size_t i;
148 for (i = 0; i < n; i++) {
149 //printf("%s\n", results.gl_pathv[i]);
150 PyObject* m = Py_BuildValue("s", results.gl_pathv[i]);
151 PyList_SetItem(matches, i, m);
152 }
153 globfree(&results);
154
155 return matches;
156}
157
158static PyObject *
159func_regex_search(PyObject *self, PyObject *args) {
160 const char* pattern;
161 const char* str;
162 int cflags = 0;
163 int eflags = 0;
164 int pos = 0;
165
166 if (!PyArg_ParseTuple(args, "sisi|i", &pattern, &cflags, &str, &eflags, &pos)) {
167 return NULL;
168 }
169
170 cflags |= REG_EXTENDED;
171 regex_t pat;
172 int status = regcomp(&pat, pattern, cflags);
173 if (status != 0) {
174 char error_desc[50];
175 regerror(status, &pat, error_desc, 50);
176
177 char error_message[80];
178 snprintf(error_message, 80, "Invalid regex %s (%s)", pattern, error_desc);
179
180 PyErr_SetString(PyExc_ValueError, error_message);
181 return NULL;
182 }
183
184 int num_groups = pat.re_nsub + 1;
185 PyObject *ret = PyList_New(num_groups * 2);
186
187 if (ret == NULL) {
188 regfree(&pat);
189 return NULL;
190 }
191
192 regmatch_t *pmatch = (regmatch_t*) malloc(sizeof(regmatch_t) * num_groups);
193 int match = regexec(&pat, str + pos, num_groups, pmatch, eflags);
194 if (match == 0) {
195 int i;
196 for (i = 0; i < num_groups; i++) {
197 int start = pmatch[i].rm_so;
198 if (start != -1) {
199 start += pos;
200 }
201 PyList_SetItem(ret, 2*i, PyInt_FromLong(start));
202
203 int end = pmatch[i].rm_eo;
204 if (end != -1) {
205 end += pos;
206 }
207 PyList_SetItem(ret, 2*i + 1, PyInt_FromLong(end));
208 }
209 }
210
211 free(pmatch);
212 regfree(&pat);
213
214 if (match != 0) {
215 Py_RETURN_NONE;
216 }
217
218 return ret;
219}
220
221// For ${//}, the number of groups is always 1, so we want 2 match position
222// results -- the whole regex (which we ignore), and then first group.
223//
224// For [[ =~ ]], do we need to count how many matches the user gave?
225
226#define NMATCH 2
227
228static PyObject *
229func_regex_first_group_match(PyObject *self, PyObject *args) {
230 const char* pattern;
231 const char* str;
232 int pos;
233 if (!PyArg_ParseTuple(args, "ssi", &pattern, &str, &pos)) {
234 return NULL;
235 }
236
237 regex_t pat;
238 regmatch_t m[NMATCH];
239
240 // Could have been checked by regex_parse for [[ =~ ]], but not for glob
241 // patterns like ${foo/x*/y}.
242
243 int status = regcomp(&pat, pattern, REG_EXTENDED);
244 if (status != 0) {
245 char error_string[80];
246 regerror(status, &pat, error_string, 80);
247 PyErr_SetString(PyExc_RuntimeError, error_string);
248 return NULL;
249 }
250
251 debug("first_group_match pat %s str %s pos %d", pattern, str, pos);
252
253 // Match at offset 'pos'
254 int result = regexec(&pat, str + pos, NMATCH, m, 0 /*flags*/);
255 regfree(&pat);
256
257 if (result != 0) {
258 Py_RETURN_NONE; // no match
259 }
260
261 // Assume there is a match
262 regoff_t start = m[1].rm_so;
263 regoff_t end = m[1].rm_eo;
264 return Py_BuildValue("(i,i)", pos + start, pos + end);
265}
266
267// We do this in C so we can remove '%f' % 0.1 from the CPython build. That
268// involves dtoa.c and pystrod.c, which are thousands of lines of code.
269static PyObject *
270func_print_time(PyObject *self, PyObject *args) {
271 double real, user, sys;
272 if (!PyArg_ParseTuple(args, "ddd", &real, &user, &sys)) {
273 return NULL;
274 }
275 fprintf(stderr, "real\t%.3f\n", real);
276 fprintf(stderr, "user\t%.3f\n", user);
277 fprintf(stderr, "sys\t%.3f\n", sys);
278 Py_RETURN_NONE;
279}
280
281// A copy of socket.gethostname() from socketmodule.c. That module brings in
282// too many dependencies.
283
284static PyObject *errno_error;
285
286static PyObject *
287socket_gethostname(PyObject *self, PyObject *unused)
288{
289 char buf[1024];
290 int res;
291 Py_BEGIN_ALLOW_THREADS
292 res = gethostname(buf, (int) sizeof buf - 1);
293 //res = gethostname(buf, 0); // For testing errors
294 Py_END_ALLOW_THREADS
295 if (res < 0)
296 return PyErr_SetFromErrno(errno_error);
297 buf[sizeof buf - 1] = '\0';
298 return PyString_FromString(buf);
299}
300
301static PyObject *
302func_get_terminal_width(PyObject *self, PyObject *unused) {
303 struct winsize w;
304 int res;
305 res = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
306 if (res < 0)
307 return PyErr_SetFromErrno(errno_error);
308 return PyLong_FromLong(w.ws_col);
309}
310
311static PyObject *
312func_wcswidth(PyObject *self, PyObject *args){
313 char *string;
314 if (!PyArg_ParseTuple(args, "s", &string)) {
315 return NULL;
316 }
317
318 int num_wide_chars = mbstowcs(NULL, string, 0);
319 if (num_wide_chars == -1) {
320 PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 1");
321 return NULL;
322 }
323 int buf_size = (num_wide_chars + 1) * sizeof(wchar_t);
324 wchar_t* wide_chars = (wchar_t*)malloc(buf_size);
325 assert(wide_chars != NULL);
326
327 num_wide_chars = mbstowcs(wide_chars, string, num_wide_chars);
328 if (num_wide_chars == -1) {
329 PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 2");
330 return NULL;
331 }
332
333 int width = wcswidth(wide_chars, num_wide_chars);
334 if (width == -1) {
335 PyErr_SetString(PyExc_UnicodeError, "wcswidth()");
336 return NULL;
337 }
338
339 return PyInt_FromLong(width);
340}
341
342static PyObject *
343func_cpython_reset_locale(PyObject *self, PyObject *unused)
344{
345 // From man setlocale:
346 // The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
347 // On startup of the main program, the portable "C" locale is selected as default.
348
349 // Python overrides this, so we set it back.
350 if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) {
351 // Our CI machines don't work with C.UTF-8, even though it's supposed
352 // to exist?
353 if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) {
354 PyErr_SetString(PyExc_SystemError, "Couldn't set locale to C.UTF-8 or en_US.UTF-8");
355 return NULL;
356 }
357 }
358 Py_RETURN_NONE;
359}
360
361#ifdef OVM_MAIN
362#include "pyext/libc.c/methods.def"
363#else
364static PyMethodDef methods[] = {
365 // Return the canonical version of a path with symlinks, or None if there is
366 // an error.
367 {"realpath", func_realpath, METH_VARARGS, ""},
368
369 // Return whether a string matches a pattern."
370 {"fnmatch", func_fnmatch, METH_VARARGS, ""},
371
372 // Return a list of files that match a pattern.
373 // We need this since Python's glob doesn't have char classes.
374 {"glob", func_glob, METH_VARARGS, ""},
375
376 // Search a string for regex. Returns a list of matches, None if no
377 // match. Raises RuntimeError if the regex is invalid.
378 {"regex_search", func_regex_search, METH_VARARGS, ""},
379
380 // If the regex matches the string, return the start and end position of the
381 // first group. Returns None if there is no match. Raises RuntimeError if
382 // the regex is invalid.
383 {"regex_first_group_match", func_regex_first_group_match, METH_VARARGS, ""},
384
385 // "Print three floating point values for the 'time' builtin.
386 {"print_time", func_print_time, METH_VARARGS, ""},
387
388 {"gethostname", socket_gethostname, METH_NOARGS, ""},
389
390 // ioctl() to get the terminal width.
391 {"get_terminal_width", func_get_terminal_width, METH_NOARGS, ""},
392
393 // Get the display width of a string. Throw an exception if the string is invalid UTF8.
394 {"wcswidth", func_wcswidth, METH_VARARGS, ""},
395
396 // Workaround for CPython's calling setlocale() in pythonrun.c. ONLY used
397 // by tests and bin/oil.py.
398 {"cpython_reset_locale", func_cpython_reset_locale, METH_NOARGS, ""},
399 {NULL, NULL},
400};
401#endif
402
403void initlibc(void) {
404 PyObject *module;
405
406 module = Py_InitModule("libc", methods);
407 if (module != NULL) {
408 // ./configure values
409 PyModule_AddIntConstant(module, "HAVE_GLOB_PERIOD", HAVE_GLOB_PERIOD);
410 PyModule_AddIntConstant(module, "HAVE_FNM_EXTMATCH", HAVE_FNM_EXTMATCH);
411
412 // Actual libc values
413 PyModule_AddIntConstant(module, "GLOB_PERIOD", GLOB_PERIOD);
414 PyModule_AddIntConstant(module, "FNM_CASEFOLD", FNM_CASEFOLD);
415 PyModule_AddIntConstant(module, "REG_ICASE", REG_ICASE);
416 PyModule_AddIntConstant(module, "REG_NEWLINE", REG_NEWLINE);
417 PyModule_AddIntConstant(module, "REG_NOTBOL", REG_NOTBOL);
418 }
419
420 errno_error = PyErr_NewException("libc.error",
421 PyExc_IOError, NULL);
422}