mycpp/mylib.py

OILS / mycpp / mylib.py View on Github | oilshell.org

533 lines, 188 significant

1	"""
2	mylib.py: Python stubs/interfaces that are reimplemented in C++, not directly
3	translated.
4	"""
5	from __future__ import print_function
6
7	try:
8	import cStringIO
9	except ImportError:
10	# Python 3 doesn't have cStringIO. Our yaks/ demo currently uses
11	# mycpp/mylib.py with Python 3.
12	cStringIO = None
13	import io
14
15	import sys
16
17	from pylib import collections_
18	try:
19	import posix_ as posix
20	except ImportError:
21	# Hack for tangled dependencies.
22	import os
23	posix = os
24
25	from typing import (Tuple, List, Dict, Optional, Iterator, Any, TypeVar,
26	Generic, cast, TYPE_CHECKING)
27	if TYPE_CHECKING:
28	from mycpp import mops
29
30	# For conditional translation
31	CPP = False
32	PYTHON = True
33
34	# Use POSIX name directly
35	STDIN_FILENO = 0
36
37
38	def MaybeCollect():
39	# type: () -> None
40	pass
41
42
43	def NewDict():
44	# type: () -> Dict[str, Any]
45	"""Make dictionaries ordered in Python, e.g. for JSON.
46
47	In C++, our Dict implementation should be ordered.
48	"""
49	return collections_.OrderedDict()
50
51
52	def log(msg, *args):
53	# type: (str, *Any) -> None
54	"""Print debug output to stderr."""
55	if args:
56	msg = msg % args
57	print(msg, file=sys.stderr)
58
59
60	def print_stderr(s):
61	# type: (str) -> None
62	"""Print a message to stderr for the user.
63
64	This should be used sparingly, since it doesn't have location info, like
65	ui.ErrorFormatter does. We use it to print fatal I/O errors that were only
66	caught at the top level.
67	"""
68	print(s, file=sys.stderr)
69
70
71	#
72	# Byte Operations avoid excessive allocations with string algorithms
73	#
74
75
76	def ByteAt(s, i):
77	# type: (str, int) -> int
78	"""i must be in bounds."""
79
80	# This simplifies the C++ implementation
81	assert 0 <= i, 'No negative indices'
82	assert i < len(s), 'No negative indices'
83
84	return ord(s[i])
85
86
87	def ByteEquals(byte, ch):
88	# type: (int, str) -> bool
89	assert len(ch) == 1, ch
90	assert 0 <= byte < 256, byte
91
92	return byte == ord(ch)
93
94
95	def ByteInSet(byte, byte_set):
96	# type: (int, str) -> bool
97	assert 0 <= byte < 256, byte
98
99	return chr(byte) in byte_set
100
101
102	def JoinBytes(byte_list):
103	# type: (List[int]) -> str
104
105	return ''.join(chr(b) for b in byte_list)
106
107
108	#
109	# For SparseArray
110	#
111
112
113	def BigIntSort(keys):
114	# type: (List[mops.BigInt]) -> None
115	keys.sort(key=lambda big: big.i)
116
117
118	#
119	# Files
120	#
121
122
123	class File:
124	"""
125	TODO: This should define a read/write interface, and then LineReader() and
126	Writer() can possibly inherit it, with runtime assertions
127
128	Then we allow downcasting from File -> LineReader, like we currently do in
129	C++ in gc_mylib.h.
130
131	Inheritance can't express the structural Reader/Writer pattern of Go, which
132	would be better. I suppose we could use File* everywhere, but having
133	fine-grained types is nicer. And there will be very few casts.
134	"""
135	pass
136
137
138	class LineReader:
139
140	def readline(self):
141	# type: () -> str
142	raise NotImplementedError()
143
144	def close(self):
145	# type: () -> None
146	raise NotImplementedError()
147
148	def isatty(self):
149	# type: () -> bool
150	raise NotImplementedError()
151
152
153	if TYPE_CHECKING:
154
155	class BufLineReader(LineReader):
156
157	def __init__(self, s):
158	# type: (str) -> None
159	raise NotImplementedError()
160
161	def open(path):
162	# type: (str) -> LineReader
163
164	# TODO: should probably return mylib.File
165	# mylib.open() is currently only used in yaks/yaks_main and
166	# bin.osh_parse
167	raise NotImplementedError()
168
169	else:
170	# Actual runtime
171	if cStringIO:
172	BufLineReader = cStringIO.StringIO
173	else: # Python 3
174	BufLineReader = io.StringIO
175
176	open = open
177
178
179	class Writer:
180
181	def write(self, s):
182	# type: (str) -> None
183	raise NotImplementedError()
184
185	def flush(self):
186	# type: () -> None
187	raise NotImplementedError()
188
189	def isatty(self):
190	# type: () -> bool
191	raise NotImplementedError()
192
193	def close(self):
194	# type: () -> None
195	raise NotImplementedError()
196
197
198	class BufWriter(Writer):
199	"""Mimic StringIO API, but add clear() so we can reuse objects.
200
201	We can also add accelerators for directly writing numbers, to avoid
202	allocations when encoding JSON.
203	"""
204
205	def __init__(self):
206	# type: () -> None
207	self.parts = []
208
209	def write(self, s):
210	# type: (str) -> None
211	self.parts.append(s)
212
213	def isatty(self):
214	# type: () -> bool
215	return False
216
217	def write_spaces(self, n):
218	# type: (int) -> None
219	"""For JSON indenting. Avoid intermediate allocations in C++."""
220	self.parts.append(' ' * n)
221
222	def getvalue(self):
223	# type: () -> str
224	return ''.join(self.parts)
225
226	def clear(self):
227	# type: () -> None
228	del self.parts[:]
229
230	def close(self):
231	# type: () -> None
232
233	# No-op for now - we could invalidate write()?
234	pass
235
236
237	def Stdout():
238	# type: () -> Writer
239	return sys.stdout
240
241
242	def Stderr():
243	# type: () -> Writer
244	return sys.stderr
245
246
247	def Stdin():
248	# type: () -> LineReader
249	return sys.stdin
250
251
252	class switch(object):
253	"""Translates to C switch on int.
254
255	with tagswitch(i) as case:
256	if case(42, 43):
257	print('hi')
258	elif case(99):
259	print('two')
260	else:
261	print('neither')
262	"""
263
264	def __init__(self, value):
265	# type: (int) -> None
266	self.value = value
267
268	def __enter__(self):
269	# type: () -> switch
270	return self
271
272	def __exit__(self, type, value, traceback):
273	# type: (Any, Any, Any) -> bool
274	return False # Allows a traceback to occur
275
276	def __call__(self, *cases):
277	# type: (*Any) -> bool
278	return self.value in cases
279
280
281	class str_switch(object):
282	"""Translates to fast dispatch on string length, then memcmp()."""
283
284	def __init__(self, value):
285	# type: (str) -> None
286	self.value = value
287
288	def __enter__(self):
289	# type: () -> switch
290	return self
291
292	def __exit__(self, type, value, traceback):
293	# type: (Any, Any, Any) -> bool
294	return False # Allows a traceback to occur
295
296	def __call__(self, *cases):
297	# type: (*Any) -> bool
298	return self.value in cases
299
300
301	class tagswitch(object):
302	"""A ContextManager that translates to switch statement over ASDL types."""
303
304	def __init__(self, node):
305	# type: (Any) -> None
306	self.tag = node.tag()
307
308	def __enter__(self):
309	# type: () -> tagswitch
310	return self
311
312	def __exit__(self, type, value, traceback):
313	# type: (Any, Any, Any) -> bool
314	return False # Allows a traceback to occur
315
316	def __call__(self, *cases):
317	# type: (*Any) -> bool
318	return self.tag in cases
319
320
321	if TYPE_CHECKING:
322	# Doesn't work
323	T = TypeVar('T')
324
325	class StackArray(Generic[T]):
326
327	def __init__(self):
328	self.items = [] # type: List[T]
329
330	def append(self, item):
331	# type: (T) -> None
332	self.items.append(item)
333
334	def pop(self):
335	# type: () -> T
336	return self.items.pop()
337
338	# Doesn't work, this is only for primitive types
339	#StackArray = NewType('StackArray', list)
340
341
342	def MakeStackArray(item_type):
343	# type: (TypeVar) -> StackArray[item_type]
344	"""
345	Convenience "constructor" used like this:
346
347	myarray = MakeStackArray(int)
348
349	The idiom could also be
350
351	myarray = cast('StackArray[int]', [])
352
353	But that's uglier.
354	"""
355	return cast('StackArray[item_type]', [])
356
357
358	if TYPE_CHECKING:
359	K = TypeVar('K')
360	V = TypeVar('V')
361
362
363	def iteritems(d):
364	# type: (Dict[K, V]) -> Iterator[Tuple[K, V]]
365	"""Make translation a bit easier."""
366	return d.iteritems()
367
368
369	def split_once(s, delim):
370	# type: (str, str) -> Tuple[str, Optional[str]]
371	"""Easier to call than split(s, 1) because of tuple unpacking."""
372
373	parts = s.split(delim, 1)
374	if len(parts) == 1:
375	no_str = None # type: Optional[str]
376	return s, no_str
377	else:
378	return parts[0], parts[1]
379
380
381	def hex_lower(i):
382	# type: (int) -> str
383	return '%x' % i
384
385
386	def dict_erase(d, key):
387	# type: (Dict[Any, Any], Any) -> None
388	"""
389	Ensure that a key isn't in the Dict d. This makes C++ translation easier.
390	"""
391	try:
392	del d[key]
393	except KeyError:
394	pass
395
396
397	def str_cmp(s1, s2):
398	# type: (str, str) -> int
399	if s1 == s2:
400	return 0
401	if s1 < s2:
402	return -1
403	else:
404	return 1
405
406
407	class UniqueObjects(object):
408	"""A set of objects identified by their address in memory
409
410	Python's id(obj) returns the address of any object. But we don't simply
411	implement it, because it requires a uint64_t on 64-bit systems, while mycpp
412	only supports 'int'.
413
414	So we have a whole class.
415
416	Should be used for:
417
418	- Cycle detection when pretty printing, as Python's repr() does
419	- See CPython's Objects/object.c PyObject_Repr()
420	/* These methods are used to control infinite recursion in repr, str, print,
421	etc. Container objects that may recursively contain themselves,
422	e.g. builtin dictionaries and lists, should use Py_ReprEnter() and
423	Py_ReprLeave() to avoid infinite recursion.
424	*/
425	- e.g. dictobject.c dict_repr() calls Py_ReprEnter() to print {...}
426	- In Python 2.7 a GLOBAL VAR is used
427
428	- It also checks for STACK OVERFLOW
429
430	- Packle serialization
431	"""
432
433	def __init__(self):
434	# 64-bit id() -> small integer ID
435	self.addresses = {} # type: Dict[int, int]
436
437	def Contains(self, obj):
438	# type: (Any) -> bool
439	""" Convenience? """
440	return self.Get(obj) != -1
441
442	def MaybeAdd(self, obj):
443	# type: (Any) -> None
444	""" Convenience? """
445
446	# def AddNewObject(self, obj):
447	def Add(self, obj):
448	# type: (Any) -> None
449	"""
450	Assert it isn't already there, and assign a new ID!
451
452	# Lib/pickle does:
453
454	self.memo[id(obj)] = memo_len, obj
455
456	I guess that's the object ID and a void*
457
458	Then it does:
459
460	x = self.memo.get(id(obj))
461
462	and
463
464	# If the object is already in the memo, this means it is
465	# recursive. In this case, throw away everything we put on the
466	# stack, and fetch the object back from the memo.
467	if id(obj) in self.memo:
468	write(POP + self.get(self.memo[id(obj)][0]))
469
470	BUT It only uses the numeric ID!
471	"""
472	addr = id(obj)
473	assert addr not in self.addresses
474	self.addresses[addr] = len(self.addresses)
475
476	def Get(self, obj):
477	# type: (Any) -> int
478	"""
479	Returns unique ID assigned
480
481	Returns -1 if it doesn't exist?
482	"""
483	addr = id(obj)
484	return self.addresses.get(addr, -1)
485
486	# Note: self.memo.clear() doesn't appear to be used
487
488
489	def probe(provider, name, *args):
490	# type: (str, str, Any) -> None
491	"""Create a probe for use with profilers like linux perf and ebpf or dtrace."""
492	# Noop. Just a marker for mycpp to emit a DTRACE_PROBE()
493	return
494
495
496	if 0:
497	# Prototype of Unix file descriptor I/O, compared with FILE* libc I/O.
498	# Doesn't seem like we need this now.
499
500	# Short versions of STDOUT_FILENO and STDERR_FILENO
501	kStdout = 1
502	kStderr = 2
503
504	def writeln(s, fd=kStdout):
505	# type: (str, int) -> None
506	"""Write a line. The name is consistent with JavaScript writeln() and Rust.
507
508	e.g.
509	writeln("x = %d" % x, kStderr)
510
511	TODO: The Oil interpreter shouldn't use print() anywhere. Instead it can use
512	writeln(s) and writeln(s, kStderr)
513	"""
514	posix.write(fd, s)
515	posix.write(fd, '\n')
516
517	class File(object):
518	"""Custom file wrapper for Unix I/O like write() read()
519
520	Not C I/O like fwrite() fread(). There should be no flush().
521	"""
522
523	def __init__(self, fd):
524	# type: (int) -> None
525	self.fd = fd
526
527	def write(self, s):
528	# type: (str) -> None
529	posix.write(self.fd, s)
530
531	def writeln(self, s):
532	# type: (str) -> None
533	writeln(s, fd=self.fd)