OILS / mycpp / mylib.py View on Github | oilshell.org

533 lines, 188 significant
1"""
2mylib.py: Python stubs/interfaces that are reimplemented in C++, not directly
3translated.
4"""
5from __future__ import print_function
6
7try:
8 import cStringIO
9except ImportError:
10 # Python 3 doesn't have cStringIO. Our yaks/ demo currently uses
11 # mycpp/mylib.py with Python 3.
12 cStringIO = None
13 import io
14
15import sys
16
17from pylib import collections_
18try:
19 import posix_ as posix
20except ImportError:
21 # Hack for tangled dependencies.
22 import os
23 posix = os
24
25from typing import (Tuple, List, Dict, Optional, Iterator, Any, TypeVar,
26 Generic, cast, TYPE_CHECKING)
27if TYPE_CHECKING:
28 from mycpp import mops
29
30# For conditional translation
31CPP = False
32PYTHON = True
33
34# Use POSIX name directly
35STDIN_FILENO = 0
36
37
38def MaybeCollect():
39 # type: () -> None
40 pass
41
42
43def NewDict():
44 # type: () -> Dict[str, Any]
45 """Make dictionaries ordered in Python, e.g. for JSON.
46
47 In C++, our Dict implementation should be ordered.
48 """
49 return collections_.OrderedDict()
50
51
52def log(msg, *args):
53 # type: (str, *Any) -> None
54 """Print debug output to stderr."""
55 if args:
56 msg = msg % args
57 print(msg, file=sys.stderr)
58
59
60def print_stderr(s):
61 # type: (str) -> None
62 """Print a message to stderr for the user.
63
64 This should be used sparingly, since it doesn't have location info, like
65 ui.ErrorFormatter does. We use it to print fatal I/O errors that were only
66 caught at the top level.
67 """
68 print(s, file=sys.stderr)
69
70
71#
72# Byte Operations avoid excessive allocations with string algorithms
73#
74
75
76def ByteAt(s, i):
77 # type: (str, int) -> int
78 """i must be in bounds."""
79
80 # This simplifies the C++ implementation
81 assert 0 <= i, 'No negative indices'
82 assert i < len(s), 'No negative indices'
83
84 return ord(s[i])
85
86
87def ByteEquals(byte, ch):
88 # type: (int, str) -> bool
89 assert len(ch) == 1, ch
90 assert 0 <= byte < 256, byte
91
92 return byte == ord(ch)
93
94
95def ByteInSet(byte, byte_set):
96 # type: (int, str) -> bool
97 assert 0 <= byte < 256, byte
98
99 return chr(byte) in byte_set
100
101
102def JoinBytes(byte_list):
103 # type: (List[int]) -> str
104
105 return ''.join(chr(b) for b in byte_list)
106
107
108#
109# For SparseArray
110#
111
112
113def BigIntSort(keys):
114 # type: (List[mops.BigInt]) -> None
115 keys.sort(key=lambda big: big.i)
116
117
118#
119# Files
120#
121
122
123class File:
124 """
125 TODO: This should define a read/write interface, and then LineReader() and
126 Writer() can possibly inherit it, with runtime assertions
127
128 Then we allow downcasting from File -> LineReader, like we currently do in
129 C++ in gc_mylib.h.
130
131 Inheritance can't express the structural Reader/Writer pattern of Go, which
132 would be better. I suppose we could use File* everywhere, but having
133 fine-grained types is nicer. And there will be very few casts.
134 """
135 pass
136
137
138class LineReader:
139
140 def readline(self):
141 # type: () -> str
142 raise NotImplementedError()
143
144 def close(self):
145 # type: () -> None
146 raise NotImplementedError()
147
148 def isatty(self):
149 # type: () -> bool
150 raise NotImplementedError()
151
152
153if TYPE_CHECKING:
154
155 class BufLineReader(LineReader):
156
157 def __init__(self, s):
158 # type: (str) -> None
159 raise NotImplementedError()
160
161 def open(path):
162 # type: (str) -> LineReader
163
164 # TODO: should probably return mylib.File
165 # mylib.open() is currently only used in yaks/yaks_main and
166 # bin.osh_parse
167 raise NotImplementedError()
168
169else:
170 # Actual runtime
171 if cStringIO:
172 BufLineReader = cStringIO.StringIO
173 else: # Python 3
174 BufLineReader = io.StringIO
175
176 open = open
177
178
179class Writer:
180
181 def write(self, s):
182 # type: (str) -> None
183 raise NotImplementedError()
184
185 def flush(self):
186 # type: () -> None
187 raise NotImplementedError()
188
189 def isatty(self):
190 # type: () -> bool
191 raise NotImplementedError()
192
193 def close(self):
194 # type: () -> None
195 raise NotImplementedError()
196
197
198class BufWriter(Writer):
199 """Mimic StringIO API, but add clear() so we can reuse objects.
200
201 We can also add accelerators for directly writing numbers, to avoid
202 allocations when encoding JSON.
203 """
204
205 def __init__(self):
206 # type: () -> None
207 self.parts = []
208
209 def write(self, s):
210 # type: (str) -> None
211 self.parts.append(s)
212
213 def isatty(self):
214 # type: () -> bool
215 return False
216
217 def write_spaces(self, n):
218 # type: (int) -> None
219 """For JSON indenting. Avoid intermediate allocations in C++."""
220 self.parts.append(' ' * n)
221
222 def getvalue(self):
223 # type: () -> str
224 return ''.join(self.parts)
225
226 def clear(self):
227 # type: () -> None
228 del self.parts[:]
229
230 def close(self):
231 # type: () -> None
232
233 # No-op for now - we could invalidate write()?
234 pass
235
236
237def Stdout():
238 # type: () -> Writer
239 return sys.stdout
240
241
242def Stderr():
243 # type: () -> Writer
244 return sys.stderr
245
246
247def Stdin():
248 # type: () -> LineReader
249 return sys.stdin
250
251
252class switch(object):
253 """Translates to C switch on int.
254
255 with tagswitch(i) as case:
256 if case(42, 43):
257 print('hi')
258 elif case(99):
259 print('two')
260 else:
261 print('neither')
262 """
263
264 def __init__(self, value):
265 # type: (int) -> None
266 self.value = value
267
268 def __enter__(self):
269 # type: () -> switch
270 return self
271
272 def __exit__(self, type, value, traceback):
273 # type: (Any, Any, Any) -> bool
274 return False # Allows a traceback to occur
275
276 def __call__(self, *cases):
277 # type: (*Any) -> bool
278 return self.value in cases
279
280
281class str_switch(object):
282 """Translates to fast dispatch on string length, then memcmp()."""
283
284 def __init__(self, value):
285 # type: (str) -> None
286 self.value = value
287
288 def __enter__(self):
289 # type: () -> switch
290 return self
291
292 def __exit__(self, type, value, traceback):
293 # type: (Any, Any, Any) -> bool
294 return False # Allows a traceback to occur
295
296 def __call__(self, *cases):
297 # type: (*Any) -> bool
298 return self.value in cases
299
300
301class tagswitch(object):
302 """A ContextManager that translates to switch statement over ASDL types."""
303
304 def __init__(self, node):
305 # type: (Any) -> None
306 self.tag = node.tag()
307
308 def __enter__(self):
309 # type: () -> tagswitch
310 return self
311
312 def __exit__(self, type, value, traceback):
313 # type: (Any, Any, Any) -> bool
314 return False # Allows a traceback to occur
315
316 def __call__(self, *cases):
317 # type: (*Any) -> bool
318 return self.tag in cases
319
320
321if TYPE_CHECKING:
322 # Doesn't work
323 T = TypeVar('T')
324
325 class StackArray(Generic[T]):
326
327 def __init__(self):
328 self.items = [] # type: List[T]
329
330 def append(self, item):
331 # type: (T) -> None
332 self.items.append(item)
333
334 def pop(self):
335 # type: () -> T
336 return self.items.pop()
337
338 # Doesn't work, this is only for primitive types
339 #StackArray = NewType('StackArray', list)
340
341
342def MakeStackArray(item_type):
343 # type: (TypeVar) -> StackArray[item_type]
344 """
345 Convenience "constructor" used like this:
346
347 myarray = MakeStackArray(int)
348
349 The idiom could also be
350
351 myarray = cast('StackArray[int]', [])
352
353 But that's uglier.
354 """
355 return cast('StackArray[item_type]', [])
356
357
358if TYPE_CHECKING:
359 K = TypeVar('K')
360 V = TypeVar('V')
361
362
363def iteritems(d):
364 # type: (Dict[K, V]) -> Iterator[Tuple[K, V]]
365 """Make translation a bit easier."""
366 return d.iteritems()
367
368
369def split_once(s, delim):
370 # type: (str, str) -> Tuple[str, Optional[str]]
371 """Easier to call than split(s, 1) because of tuple unpacking."""
372
373 parts = s.split(delim, 1)
374 if len(parts) == 1:
375 no_str = None # type: Optional[str]
376 return s, no_str
377 else:
378 return parts[0], parts[1]
379
380
381def hex_lower(i):
382 # type: (int) -> str
383 return '%x' % i
384
385
386def dict_erase(d, key):
387 # type: (Dict[Any, Any], Any) -> None
388 """
389 Ensure that a key isn't in the Dict d. This makes C++ translation easier.
390 """
391 try:
392 del d[key]
393 except KeyError:
394 pass
395
396
397def str_cmp(s1, s2):
398 # type: (str, str) -> int
399 if s1 == s2:
400 return 0
401 if s1 < s2:
402 return -1
403 else:
404 return 1
405
406
407class UniqueObjects(object):
408 """A set of objects identified by their address in memory
409
410 Python's id(obj) returns the address of any object. But we don't simply
411 implement it, because it requires a uint64_t on 64-bit systems, while mycpp
412 only supports 'int'.
413
414 So we have a whole class.
415
416 Should be used for:
417
418 - Cycle detection when pretty printing, as Python's repr() does
419 - See CPython's Objects/object.c PyObject_Repr()
420 /* These methods are used to control infinite recursion in repr, str, print,
421 etc. Container objects that may recursively contain themselves,
422 e.g. builtin dictionaries and lists, should use Py_ReprEnter() and
423 Py_ReprLeave() to avoid infinite recursion.
424 */
425 - e.g. dictobject.c dict_repr() calls Py_ReprEnter() to print {...}
426 - In Python 2.7 a GLOBAL VAR is used
427
428 - It also checks for STACK OVERFLOW
429
430 - Packle serialization
431 """
432
433 def __init__(self):
434 # 64-bit id() -> small integer ID
435 self.addresses = {} # type: Dict[int, int]
436
437 def Contains(self, obj):
438 # type: (Any) -> bool
439 """ Convenience? """
440 return self.Get(obj) != -1
441
442 def MaybeAdd(self, obj):
443 # type: (Any) -> None
444 """ Convenience? """
445
446 # def AddNewObject(self, obj):
447 def Add(self, obj):
448 # type: (Any) -> None
449 """
450 Assert it isn't already there, and assign a new ID!
451
452 # Lib/pickle does:
453
454 self.memo[id(obj)] = memo_len, obj
455
456 I guess that's the object ID and a void*
457
458 Then it does:
459
460 x = self.memo.get(id(obj))
461
462 and
463
464 # If the object is already in the memo, this means it is
465 # recursive. In this case, throw away everything we put on the
466 # stack, and fetch the object back from the memo.
467 if id(obj) in self.memo:
468 write(POP + self.get(self.memo[id(obj)][0]))
469
470 BUT It only uses the numeric ID!
471 """
472 addr = id(obj)
473 assert addr not in self.addresses
474 self.addresses[addr] = len(self.addresses)
475
476 def Get(self, obj):
477 # type: (Any) -> int
478 """
479 Returns unique ID assigned
480
481 Returns -1 if it doesn't exist?
482 """
483 addr = id(obj)
484 return self.addresses.get(addr, -1)
485
486 # Note: self.memo.clear() doesn't appear to be used
487
488
489def probe(provider, name, *args):
490 # type: (str, str, Any) -> None
491 """Create a probe for use with profilers like linux perf and ebpf or dtrace."""
492 # Noop. Just a marker for mycpp to emit a DTRACE_PROBE()
493 return
494
495
496if 0:
497 # Prototype of Unix file descriptor I/O, compared with FILE* libc I/O.
498 # Doesn't seem like we need this now.
499
500 # Short versions of STDOUT_FILENO and STDERR_FILENO
501 kStdout = 1
502 kStderr = 2
503
504 def writeln(s, fd=kStdout):
505 # type: (str, int) -> None
506 """Write a line. The name is consistent with JavaScript writeln() and Rust.
507
508 e.g.
509 writeln("x = %d" % x, kStderr)
510
511 TODO: The Oil interpreter shouldn't use print() anywhere. Instead it can use
512 writeln(s) and writeln(s, kStderr)
513 """
514 posix.write(fd, s)
515 posix.write(fd, '\n')
516
517 class File(object):
518 """Custom file wrapper for Unix I/O like write() read()
519
520 Not C I/O like fwrite() fread(). There should be no flush().
521 """
522
523 def __init__(self, fd):
524 # type: (int) -> None
525 self.fd = fd
526
527 def write(self, s):
528 # type: (str) -> None
529 posix.write(self.fd, s)
530
531 def writeln(self, s):
532 # type: (str) -> None
533 writeln(s, fd=self.fd)