OILS / core / alloc.py View on Github | oils.pub

321 lines, 137 significant
1"""
2alloc.py - Arena manages SourceLine and Token instances (could rename)
3"""
4
5from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine, loc
6from asdl import runtime
7from core import error
8from mycpp.mylib import log
9
10from typing import List, Dict, Any
11
12_ = log
13
14
15def SnipCodeBlock(left, right, lines):
16 # type: (Token, Token, List[SourceLine]) -> str
17 """Return the code string between left and right tokens, EXCLUSIVE.
18
19 Meaning { } are not included.
20
21 Used for Command.sourceCode() and Hay evaluation. Similar to SnipCodeString().
22
23 TODO: This algorithm is wrong when re-parsing occurs, e.g. bacticks, aliases, a[i++]=1.
24 """
25 pieces = [] # type: List[str]
26
27 assert left.length == 1, "{ expected"
28 assert right.length == 1, "} expected"
29
30 # Pad with spaces so column numbers aren't off
31 pieces.append(' ' * (left.col + 1))
32
33 if left.line == right.line:
34 for li in lines:
35 if li == left.line:
36 piece = li.content[left.col + left.length:right.col]
37 pieces.append(piece)
38 return ''.join(pieces)
39
40 saving = False
41 found_left = False
42 found_right = False
43 for li in lines:
44 if li == left.line:
45 found_left = True
46 saving = True
47
48 # Save everything after the left token
49 piece = li.content[left.col + left.length:]
50 pieces.append(piece)
51 #log(' %r', piece)
52 continue
53
54 if li == right.line:
55 found_right = True
56
57 piece = li.content[:right.col]
58 pieces.append(piece)
59 #log(' %r', piece)
60
61 saving = False
62 break
63
64 if saving:
65 pieces.append(li.content)
66 #log(' %r', li.content)
67
68 assert found_left, "Couldn't find left token"
69 assert found_right, "Couldn't find right token"
70 return ''.join(pieces)
71
72
73class ctx_SourceCode(object):
74
75 def __init__(self, arena, src):
76 # type: (Arena, source_t) -> None
77 arena.PushSource(src)
78 self.arena = arena
79
80 def __enter__(self):
81 # type: () -> None
82 pass
83
84 def __exit__(self, type, value, traceback):
85 # type: (Any, Any, Any) -> None
86 self.arena.PopSource()
87
88
89class Arena(object):
90 """Manages source_t, SourceLine, Token."""
91
92 def __init__(self, save_tokens=False):
93 # type: (bool) -> None
94
95 self.save_tokens = save_tokens
96
97 # indexed by span_id
98 self.tokens = [] # type: List[Token]
99 self.num_tokens = 0
100
101 # Only used in tools
102 self.span_id_lookup = {} # type: Dict[Token, int]
103
104 # All lines that haven't been discarded. For LST formatting.
105 self.lines_list = [] # type: List[SourceLine]
106
107 # reuse these instances in many line_span instances
108 self.source_instances = [] # type: List[source_t]
109
110 def SaveTokens(self):
111 # type: () -> None
112 """
113 Used by --tool X. Do we need LosslessArena?
114 """
115 self.save_tokens = True
116
117 def PushSource(self, src):
118 # type: (source_t) -> None
119 self.source_instances.append(src)
120
121 def PopSource(self):
122 # type: () -> None
123 self.source_instances.pop()
124
125 def AddLine(self, line, line_num):
126 # type: (str, int) -> SourceLine
127 """Save a physical line and return a line_id for later retrieval.
128
129 The line number is 1-based.
130 """
131 src_line = SourceLine(line_num, line, self.source_instances[-1])
132 self.lines_list.append(src_line)
133 return src_line
134
135 def DiscardLines(self):
136 # type: () -> None
137 """Remove references ot lines we've accumulated.
138
139 - This makes the linear search in SnipCodeString() shorter.
140 - It removes the ARENA's references to all lines. The TOKENS still
141 reference some lines.
142 """
143 #log("discarding %d lines", len(self.lines_list))
144 del self.lines_list[:]
145
146 def SaveLinesAndDiscard(self, left, right):
147 # type: (Token, Token) -> List[SourceLine]
148 """Save the lines between two tokens, e.g. for { and }
149
150 Why?
151 - In between { }, we want to preserve lines not pointed to by a token, e.g.
152 comment lines.
153 - But we don't want to save all lines in an interactive shell:
154 echo 1
155 echo 2
156 ...
157 echo 500000
158 echo 500001
159
160 The lines should be freed after execution takes place.
161 """
162 #log('*** Saving lines between %r and %r', left, right)
163
164 saved = [] # type: List[SourceLine]
165 saving = False
166 for li in self.lines_list:
167 if li == left.line:
168 saving = True
169
170 # These lines are PERMANENT, and never deleted. What if you overwrite a
171 # function name? You might want to save those in a the function record
172 # ITSELF.
173 #
174 # This is for INLINE hay blocks that can be evaluated at any point. In
175 # contrast, parse_hay(other_file) uses ParseWholeFile, and we could save
176 # all lines.
177
178 # TODO: consider creating a new Arena for each CommandParser? Or rename itj
179 # to 'BackingLines' or something.
180
181 # TODO: We should mutate li.line_id here so it's the index into
182 # saved_lines?
183 if saving:
184 saved.append(li)
185 #log(' %r', li.val)
186
187 if li == right.line:
188 saving = False
189 break
190
191 #log('*** SAVED %d lines', len(saved))
192
193 self.DiscardLines()
194 return saved
195
196 #log('SAVED = %s', [line.val for line in self.saved_lines])
197
198 def SnipCodeString(self, left, right):
199 # type: (Token, Token) -> str
200 """Return the code string between left and right tokens, INCLUSIVE.
201
202 Used for ALIAS expansion, which happens in the PARSER.
203
204 The argument to aliases can span multiple lines, like this:
205
206 $ myalias '1 2 3'
207 """
208 if left.line == right.line:
209 for li in self.lines_list:
210 if li == left.line:
211 piece = li.content[left.col:right.col + right.length]
212 return piece
213
214 pieces = [] # type: List[str]
215 saving = False
216 found_left = False
217 found_right = False
218 for li in self.lines_list:
219 if li == left.line:
220 found_left = True
221 saving = True
222
223 # Save everything after the left token
224 piece = li.content[left.col:]
225 pieces.append(piece)
226 #log(' %r', piece)
227 continue
228
229 if li == right.line:
230 found_right = True
231
232 piece = li.content[:right.col + right.length]
233 pieces.append(piece)
234 #log(' %r', piece)
235
236 saving = False
237 break
238
239 if saving:
240 pieces.append(li.content)
241 #log(' %r', li.content)
242
243 assert found_left, "Couldn't find left token"
244 assert found_right, "Couldn't find right token"
245 return ''.join(pieces)
246
247 def NewToken(self, id_, col, length, src_line):
248 # type: (int, int, int, SourceLine) -> Token
249
250 if length >= 65536:
251 raise error.Parse(
252 '', # ignored message
253 loc.TokenTooLong(src_line, id_, length, col))
254
255 tok = Token(id_, length, col, src_line, None)
256 if self.save_tokens:
257 span_id = self.num_tokens
258 self.num_tokens += 1
259
260 self.tokens.append(tok)
261 self.span_id_lookup[tok] = span_id
262 return tok
263
264 def UnreadOne(self):
265 # type: () -> None
266 """Reuse the last span ID."""
267 if self.save_tokens:
268 self.tokens.pop()
269 self.num_tokens -= 1
270
271 def GetToken(self, span_id):
272 # type: (int) -> Token
273 assert span_id != runtime.NO_SPID, span_id
274 assert span_id < len(self.tokens), \
275 'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
276 return self.tokens[span_id]
277
278 def GetSpanId(self, tok):
279 # type: (Token) -> int
280 """Given a Token, returns its a sequence number"""
281 #return tok.span_id
282 #return -1
283 assert tok in self.span_id_lookup
284 return self.span_id_lookup[tok]
285
286 def LastSpanId(self):
287 # type: () -> int
288 """Return one past the last span ID."""
289 return len(self.tokens)
290
291
292class LosslessArena(Arena):
293 """
294 TODO:
295
296 Has lossless invariant, for
297 --tool fmt
298 --tool ysh-ify
299
300 Retains all SourceLine and Token
301
302 Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
303 """
304 pass
305
306
307class DynamicArena(Arena):
308 """
309 For batch and interactive shell
310
311 TODO:
312 - Test that SourceLine and Token are GC'd
313
314 However, it should support:
315 - SnipCodeString() for aliases
316 - SnipCodeBlock() for Hay
317
318 Neither of those are necessary in the LosslessArena? We might have
319 different utilities there.
320 """
321 pass