OILS / core / alloc.py View on Github | oils.pub

211 lines, 95 significant
1from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine
2from asdl import runtime
3from mycpp.mylib import log
4
5from typing import List, Dict, Any
6
7_ = log
8
9
10class ctx_SourceCode(object):
11
12 def __init__(self, arena, src):
13 # type: (Arena, source_t) -> None
14 arena.PushSource(src)
15 self.arena = arena
16
17 def __enter__(self):
18 # type: () -> None
19 pass
20
21 def __exit__(self, type, value, traceback):
22 # type: (Any, Any, Any) -> None
23 self.arena.PopSource()
24
25
26class Arena(object):
27 """Manages source_t, SourceLine, Token."""
28
29 def __init__(self, save_tokens=False):
30 # type: (bool) -> None
31
32 self.save_tokens = save_tokens
33
34 # indexed by span_id
35 self.tokens = [] # type: List[Token]
36 self.num_tokens = 0
37
38 # Only used in tools
39 self.span_id_lookup = {} # type: Dict[Token, int]
40
41 # All lines that haven't been discarded. For LST formatting.
42 self.lines_list = [] # type: List[SourceLine]
43
44 # reuse these instances in many line_span instances
45 self.source_instances = [] # type: List[source_t]
46
47 def SaveTokens(self):
48 # type: () -> None
49 """
50 Used by --tool X. Do we need LosslessArena?
51 """
52 self.save_tokens = True
53
54 def PushSource(self, src):
55 # type: (source_t) -> None
56 self.source_instances.append(src)
57
58 def PopSource(self):
59 # type: () -> None
60 self.source_instances.pop()
61
62 def AddLine(self, line, line_num):
63 # type: (str, int) -> SourceLine
64 """Save a physical line and return a line_id for later retrieval.
65
66 The line number is 1-based.
67 """
68 src_line = SourceLine(line_num, line, self.source_instances[-1])
69 self.lines_list.append(src_line)
70 return src_line
71
72 def DiscardLines(self):
73 # type: () -> None
74 """Remove references ot lines we've accumulated.
75
76 - This makes the linear search in SnipCodeString() shorter.
77 - It removes the ARENA's references to all lines. The TOKENS still
78 reference some lines.
79 """
80 #log("discarding %d lines", len(self.lines_list))
81 del self.lines_list[:]
82
83 def SnipCodeString(self, left, right, inclusive=True):
84 # type: (Token, Token, bool) -> str
85 """Return the code string between left and right tokens, INCLUSIVE.
86
87 Used for ALIAS expansion, which happens in the PARSER.
88
89 The argument to aliases can span multiple lines, like this:
90
91 $ myalias '1 2 3'
92 """
93 if inclusive:
94 ileft = left.col
95 iright = right.col + right.length
96 else:
97 ileft = left.col + left.length
98 iright = right.col
99
100 pieces = [] # type: List[str]
101 if not inclusive:
102 pieces.append(' ' * ileft)
103
104 if left.line == right.line:
105 for li in self.lines_list:
106 if li == left.line:
107 pieces.append(li.content[ileft:iright])
108 return ''.join(pieces)
109
110 saving = False
111 found_left = False
112 found_right = False
113 for li in self.lines_list:
114 if li == left.line:
115 found_left = True
116 saving = True
117
118 # Save everything after the left token
119 piece = li.content[ileft:]
120 pieces.append(piece)
121 #log(' %r', piece)
122 continue
123
124 if li == right.line:
125 found_right = True
126
127 piece = li.content[:iright]
128 pieces.append(piece)
129 #log(' %r', piece)
130
131 saving = False
132 break
133
134 if saving:
135 pieces.append(li.content)
136 #log(' %r', li.content)
137
138 assert found_left, "Couldn't find left token"
139 assert found_right, "Couldn't find right token"
140 return ''.join(pieces)
141
142 def NewToken(self, id_, col, length, src_line):
143 # type: (int, int, int, SourceLine) -> Token
144
145 tok = Token(id_, length, col, src_line, None)
146 if self.save_tokens:
147 span_id = self.num_tokens
148 self.num_tokens += 1
149
150 self.tokens.append(tok)
151 self.span_id_lookup[tok] = span_id
152 return tok
153
154 def UnreadOne(self):
155 # type: () -> None
156 """Reuse the last span ID."""
157 if self.save_tokens:
158 self.tokens.pop()
159 self.num_tokens -= 1
160
161 def GetToken(self, span_id):
162 # type: (int) -> Token
163 assert span_id != runtime.NO_SPID, span_id
164 assert span_id < len(self.tokens), \
165 'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
166 return self.tokens[span_id]
167
168 def GetSpanId(self, tok):
169 # type: (Token) -> int
170 """Given a Token, returns its a sequence number"""
171 #return tok.span_id
172 #return -1
173 assert tok in self.span_id_lookup
174 return self.span_id_lookup[tok]
175
176 def LastSpanId(self):
177 # type: () -> int
178 """Return one past the last span ID."""
179 return len(self.tokens)
180
181
182class LosslessArena(Arena):
183 """
184 TODO:
185
186 Has lossless invariant, for
187 --tool fmt
188 --tool ysh-ify
189
190 Retains all SourceLine and Token
191
192 Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
193 """
194 pass
195
196
197class DynamicArena(Arena):
198 """
199 For batch and interactive shell
200
201 TODO:
202 - Test that SourceLine and Token are GC'd
203
204 However, it should support:
205 - SnipCodeString() for aliases
206 - SnipCodeBlock() for Hay
207
208 Neither of those are necessary in the LosslessArena? We might have
209 different utilities there.
210 """
211 pass