pyext/libc_test.py

OILS / pyext / libc_test.py View on Github | oils.pub

386 lines, 213 significant

1	#!/usr/bin/env python2
2	# coding=utf8
3	# Copyright 2016 Andy Chu. All rights reserved.
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	from __future__ import print_function
10	"""
11	libc_test.py: Tests for libc.py
12	"""
13	import unittest
14	import sys
15	import signal
16
17	import libc # module under test
18
19	# guard some tests that fail on Darwin
20	IS_DARWIN = sys.platform == 'darwin'
21
22	class LibcTest(unittest.TestCase):
23
24	def testConstants(self):
25	print('GLOB_PERIOD %d' % libc.GLOB_PERIOD)
26	print('HAVE_GLOB_PERIOD %d' % libc.HAVE_GLOB_PERIOD)
27	print('HAVE_FNM_EXTMATCH %d' % libc.HAVE_FNM_EXTMATCH)
28
29	def testFnmatch(self):
30
31	cases = [
32	# (pattern, string, result)
33
34	('', '', 1), # no pattern is valid
35	('a', 'a', 1),
36	('?', 'a', 1),
37
38	# Test escaping of glob operator chars
39	('\\?', '-', 0),
40	('\\?', '?', 1),
41
42	('\\*', '-', 0),
43	('\\', '', 1),
44
45	('\\[', '-', 0),
46	('\\[', '[', 1),
47
48	('\\!', '-', 0),
49	('\\!', '!', 1),
50
51	# What if we also escape extended glob chars?
52	# Extra escaping is OK, so we should ALWAYS escape them.
53	('\\(', '(', 1),
54	('\\(', 'x', 0),
55	('\\(', '\\', 0),
56	('\\(', '\\(', 0),
57
58	('\\\|', '\|', 1),
59	('\\\|', 'x', 0),
60
61	('\\\\', '\\', 1),
62	('\\\\', 'x', 0),
63	('\\\\', '\\extra', 0),
64
65	('\\f', '\\', 0), # no match
66
67	# Hm this is weird, c is not a special character
68	('\\c', 'c', 1),
69	('\\c', '\\c', 0),
70	('\\\\c', '\\c', 1), # the proper way to match
71
72	('c:\\foo', 'c:\\foo', 0),
73	('c:\\foo', 'c:foo', 1),
74
75	('strange]one', 'strange]one', 1),
76
77	# What is another error? Invalid escape is OK?
78	None if IS_DARWIN else ('\\', '\\', 0), # no pattern is valid
79
80	('[[:alpha:]]', 'a', 1),
81	('[^[:alpha:]]', 'a', 0), # negate
82	('[[:alpha:]]', 'aa', 0), # exact match fails
83
84	# Combining char class and a literal character
85	('[[:alpha:]7]', '7', 1),
86	('[[:alpha:]][[:alpha:]]', 'az', 1),
87
88	('[a]', 'a', 1),
89	# Hm [] is treated as a constant string, not an empty char class.
90	# Should we change LooksLikeGlob?
91	('[]', '', 0),
92
93	('[a-z]', 'a', 1),
94	('[a-z]', '-', 0),
95
96	# THIS IS INCONSISTENT WITH REGEX!
97	# Somehow in regexes (at least ERE) GNU libc treats [a\-z] as [a-z].
98	# See below.
99	('[a\-z]', '-', 1),
100	('[a\-z]', 'b', 0),
101
102	# Need double backslash in character class
103	('[\\\\]', '\\', 1),
104
105	# Can you escape ] with \? Yes in fnmatch
106	('[\\]]', '\\', 0),
107	('[\\]]', ']', 1),
108
109
110	None if IS_DARWIN else ('[]', 'a', 0),
111	None if IS_DARWIN else ('[]', '[]', 1),
112
113	('?.c', 'a.c', 1),
114	('?.c', 'aa.c', 0),
115	# mu character
116	('?.c', '\xce\xbc.c', 1),
117	]
118
119	for pat, s, expected in filter(None, cases):
120	actual = libc.fnmatch(pat, s)
121	self.assertEqual(
122	expected, actual, '%r %r -> got %d' % (pat, s, actual))
123
124	def testFnmatchExtglob(self):
125	# NOTE: We always use FNM_EXTMATCH when available
126
127	# With GNU extension.
128	cases = [
129	# One of these
130	('--@(help\|verbose)', '--verbose', 1),
131	('--@(help\|verbose)', '--foo', 0),
132
133	('--*(help\|verbose)', '--verbose', 1),
134	('--*(help\|verbose)', '--', 1),
135	('--*(help\|verbose)', '--helpverbose', 1), # Not what we want
136
137	('--+(help\|verbose)', '--verbose', 1),
138	('--+(help\|verbose)', '--', 0),
139	('--+(help\|verbose)', '--helpverbose', 1), # Not what we want
140
141	('--?(help\|verbose)', '--verbose', 1),
142	('--?(help\|verbose)', '--helpverbose', 0),
143
144	# Neither of these
145	('--!(help\|verbose)', '--verbose', 0),
146
147	# escaping *
148	('@(ab\)', 'ab', 1),
149	('@(ab\*)', 'abc', 0),
150	# escaping ?
151	('@(ab\?)', 'ab?', 1),
152	('@(ab\?)', 'abc', 0),
153
154	# escaping []
155	('@(ab\[\])', 'ab[]', 1),
156	('@(ab\[\])', 'abcd', 0),
157
158	# escaping :
159	('@(ab\:)', 'ab:', 1),
160	('@(ab\:)', 'abc', 0),
161
162	# escaping a is no-op
163	(r'@(\ab)', 'ab', 1),
164	(r'@(\ab)', r'\ab', 0),
165
166	#('@(ab\\|)', 'ab\|', 1), # GNU libc bug? THIS SHOULD WORK
167
168	# There's no way to escape \| in extended glob??? wtf.
169	#('@(ab\\|)', 'ab', 1),
170	#('@(ab\\|)', 'ab\\', 1),
171	#('@(ab\\|)', 'ab\\\|', 1),
172	]
173	for pat, s, expected in cases:
174	actual = libc.fnmatch(pat, s)
175	self.assertEqual(expected, actual,
176	"Matching %s against %s: got %s but expected %s" %
177	(pat, s, actual, expected))
178
179	def testGlob(self):
180	print(libc.glob('*.py', 0))
181
182	# This will not match anything!
183	print(libc.glob('\\', 0))
184	# This one will match a file named \
185	print(libc.glob('\\\\', 0))
186	print(libc.glob('[[:punct:]]', 0))
187
188	# core/util_test.py has more tests like this, for util.RegexSearch()
189
190	def testRegexSearch(self):
191	# Oh it's a PRECEDENCE problem?
192	# leftMatch() is not implemented correctly
193	#pat = '^([0-9])\|([a-z])'
194
195	pat = '^([0-9])\|^([a-z])'
196
197	lines = 'one\n2\nthree\n'
198
199	pos = 3
200	indices = libc.regex_search(pat, 0, lines, 0, pos)
201
202	self.assertEqual(None, indices)
203
204	def testRegexSearchError(self):
205	try:
206	libc.regex_search(r'*', 0, 'abcd', 0)
207	except ValueError as e:
208	print(e)
209	else:
210	self.fail('Expected ValueError')
211
212	def testRegexSearchUnicode(self):
213	self.assertEqual([0, 1], libc.regex_search(r'.', 0, 'a', 0))
214	self.assertEqual([0, 1], libc.regex_search(r'.', 0, '\x7f', 0))
215
216	# dot matches both bytes of utf-8 encoded MU char - appears independent of LANG=C LC_ALL=C
217	self.assertEqual([0, 2], libc.regex_search(r'.', 0, '\xce\xbc', 0))
218
219	# Literal
220	self.assertEqual([0, 2], libc.regex_search('\xce\xbc', 0, '\xce\xbc', 0))
221
222	# literal mu in char class allowed?
223	self.assertEqual([0, 2], libc.regex_search('[\xce\xbc]', 0, '\xce\xbc', 0))
224	# two bytes here
225	self.assertEqual(None, libc.regex_search('[\xce\xbc]', 0, '\xce', 0))
226
227	# dot doesn't match high byte? not utf-8
228	self.assertEqual(None, libc.regex_search(r'.', 0, '\xce', 0))
229
230	# [a] matches a
231	self.assertEqual([0, 1], libc.regex_search(r'[a]', 0, 'a', 0))
232
233	# \x01 isn't valid syntax
234	self.assertEqual(None, libc.regex_search(r'[\x01]', 0, '\x01', 0))
235
236	# literal low byte matches
237	self.assertEqual([0, 1], libc.regex_search('[\x01]', 0, '\x01', 0))
238
239	# literal high byte does NOT match? Why?
240	if 0:
241	self.assertEqual([0, 1], libc.regex_search('[\xff]', 0, '\xff', 0))
242
243	def testRegexFirstGroupMatch(self):
244	s='oXooXoooXoX'
245	self.assertEqual(
246	(1, 3),
247	libc.regex_first_group_match('(X.)', s, 0))
248
249	# Match from position 3
250	self.assertEqual(
251	(4, 6),
252	libc.regex_first_group_match('(X.)', s, 3))
253
254	# Match from position 3
255	self.assertEqual(
256	(8, 10),
257	libc.regex_first_group_match('(X.)', s, 6))
258
259	# Syntax Error
260	self.assertRaises(
261	RuntimeError, libc.regex_first_group_match, r'*', 'abcd', 0)
262
263	def testRegexFirstGroupMatchError(self):
264	# Helping to debug issue #291
265	s = ''
266	if 0:
267	# Invalid regex syntax
268	libc.regex_first_group_match("(['+-'])", s, 6)
269
270	def testSpecialCharsInCharClass(self):
271	CASES = [
272	("([a-z]+)", '123abc123', (3, 6)),
273
274	# Uh what the heck, \- means the same thing as -? It's just ignored. At
275	# least in GNU libc.
276
277	# https://stackoverflow.com/questions/28495913/how-do-you-escape-a-hyphen-as-character-range-in-a-posix-regex
278	# The <hyphen> character shall be treated as itself if it occurs first (after an initial '^', if any) or last in the list, or as an ending range point in a range expression
279
280	("([a\-z]+)", '123abc123', (3, 6)),
281
282	# This is an inverted range. TODO: Need to fix the error message.
283	#("([a\-.]+)", '123abc123', None),
284
285	("([\\\\]+)", 'a\\b', (1, 2)),
286
287	# Can you escape ] with \? Yes in fnmatch, but NO here!!!
288	('([\\]])', '\\', None),
289	('([\\]])', ']', None),
290
291	# Weird parsing!!!
292	('([\\]])', '\\]', (0, 2)),
293
294	]
295
296	for pat, s, expected in CASES:
297	result = libc.regex_first_group_match(pat, s, 0)
298	self.assertEqual(expected, result,
299	"FAILED: pat %r s %r result %s" % (pat, s, result))
300
301	def testRealpathFailOnNonexistentDirectory(self):
302	# This behaviour is actually inconsistent with GNU readlink,
303	# but matches behaviour of busybox readlink
304	# (https://github.com/jgunthorpe/busybox)
305	self.assertEqual(None, libc.realpath('_tmp/nonexistent'))
306
307	# Consistent with GNU
308	self.assertEqual(None, libc.realpath('_tmp/nonexistent/supernonexistent'))
309
310	def testPrintTime(self):
311	print('', file=sys.stderr)
312	libc.print_time(0.1, 0.2, 0.3)
313	print('', file=sys.stderr)
314
315	def testGethostname(self):
316	print(libc.gethostname())
317
318	def testGetTerminalWidth(self):
319	try:
320	width = libc.get_terminal_width()
321	except IOError as e:
322	print('error getting terminal width: %s' % e)
323	else:
324	print('width % d' % width)
325
326	def testWcsWidth(self):
327	if not IS_DARWIN:
328	self.assertEqual(1, libc.wcswidth("▶️"))
329	self.assertEqual(28, libc.wcswidth("(osh) ~/.../unchanged/oil ▶️ "))
330
331	mu = u"\u03bc".encode('utf-8')
332	print(repr(mu))
333	print(mu)
334	print(len(mu))
335	self.assertEqual(1, libc.wcswidth(mu))
336
337	self.assertEqual(2, libc.wcswidth("→ "))
338
339	# mbstowcs fails on invalid utf-8
340	try:
341	# first byte of mu
342	libc.wcswidth("\xce")
343	except UnicodeError as e:
344	self.assertEqual('mbstowcs() 1', e.message)
345	else:
346	self.fail('Expected failure')
347
348	# wcswidth fails on unprintable character
349	try:
350	libc.wcswidth("\x01")
351	except UnicodeError as e:
352	self.assertEqual('wcswidth()', e.message)
353	else:
354	self.fail('Expected failure')
355
356	self.assertRaises(UnicodeError, libc.wcswidth, "\xfe")
357
358	def testSleepUntilError(self):
359	try:
360	bad = libc.sleep_until_error(None)
361	except TypeError:
362	print('ok')
363	else:
364	self.fail('Expected TypeError')
365
366	result = libc.sleep_until_error(0.001)
367	self.assertEqual(0, result)
368
369	# Not testing errno case
370
371	def testStrsignal(self):
372	self.assertEqual('Segmentation fault', libc.strsignal(11))
373	self.assertEqual('Aborted', libc.strsignal(6))
374	self.assertEqual('Illegal instruction', libc.strsignal(4))
375	self.assertEqual('Terminated', libc.strsignal(signal.SIGTERM))
376
377	with self.assertRaises(ValueError):
378	libc.strsignal(999)
379
380
381	if __name__ == '__main__':
382	# To simulate the OVM_MAIN patch in pythonrun.c
383	libc.cpython_reset_locale()
384	unittest.main()
385
386	# vim: ts=2 sw=2