OILS / mycpp / format_strings.py View on Github | oilshell.org

120 lines, 41 significant
1"""
2format_strings.py
3
4Parse a printf format string so we can compile it to function calls.
5"""
6from __future__ import print_function
7
8import re
9
10from typing import List
11
12
13def DecodeMyPyString(s):
14 # type: (str) -> str
15 """Workaround for MyPy's weird escaping.
16
17 Used below and in cppgen_pass.py.
18 """
19 byte_string = bytes(s, 'utf-8')
20
21 # In Python 3
22 # >>> b'\\t'.decode('unicode_escape')
23 # '\t'
24
25 raw_string = byte_string.decode('unicode_escape')
26 return raw_string
27
28
29class _Part:
30 pass
31
32
33class LiteralPart(_Part):
34
35 def __init__(self, s: str):
36 self.s = s
37 self.strlen = len(s)
38
39 def __repr__(self) -> str:
40 return '(Literal %r)' % (self.s)
41
42
43class SubstPart(_Part):
44
45 def __init__(self, width: str, char_code: str, arg_num: int) -> None:
46 self.width = width
47 self.char_code = char_code
48 self.arg_num = arg_num
49
50 def __repr__(self) -> str:
51 return '(Subst %r %s %d)' % (self.width, self.char_code, self.arg_num)
52
53
54PAT = re.compile(
55 '''
56([^%]*)
57(?:
58 %([0-9]*)(.) # optional number, and then character code
59)?
60''', re.VERBOSE)
61
62
63def Parse(fmt: str) -> List[_Part]:
64
65 arg_num = 0
66 parts: List[_Part] = []
67 for m in PAT.finditer(fmt):
68 lit = m.group(1)
69 width = m.group(2)
70 char_code = m.group(3)
71
72 if lit:
73 parts.append(LiteralPart(lit))
74 if char_code:
75 if char_code == '%':
76 part: _Part = LiteralPart('%')
77 else:
78 part = SubstPart(width, char_code, arg_num)
79 arg_num += 1
80 parts.append(part)
81
82 #print('end =', m.end(0))
83
84 return parts
85
86
87# Note: This would be a lot easier in Oil!
88# TODO: Should there be a char type?
89"""
90enum format_part {
91 case Literal(s BigStr)
92 case Subst(char_code BigStr, arg_num Int)
93}
94
95let PAT = ///
96 < ~['%']* : lit > # anything except %
97 < '%' dot : subst > # % and then any char
98///
99
100func Parse(fmt BigStr) {
101 var arg_num = 0
102 let parts = []
103
104 for (m in find(fmt, PAT)) {
105 if (m.lit) {
106 do parts.append(format_part.Literal(m.lit))
107 }
108 if (m.subst) {
109 if (char_code == '%') {
110 part = format_part.Literal('%')
111 } else {
112 part = format_part.Subst(char_code, arg_num)
113 }
114 do parts.append(part)
115 set arg_num += 1
116 }
117 }
118 return parts
119}
120"""