OILS / display / pretty.py View on Github | oilshell.org

333 lines, 117 significant
1#!/usr/bin/env python2
2"""
3Pretty printing library.
4
5Pretty printing means intelligently choosing whitespace including indentation
6and newline placement, to attempt to display data nicely while staying within a
7maximum line width.
8"""
9
10# ~~~ Architecture ~~~
11#
12# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13#
14# Pretty printing proceeds in two phases:
15#
16# 1. Convert the thing you want to print into a `doc`.
17# 2. Print the `doc` using a standard algorithm.
18#
19# This separation keeps the details of the data you want to print separate from
20# the printing algorithm.
21
22# ~~~ Pretty Printing Overview ~~~
23#
24# If you're just using this file, you don't need to know how pretty printing
25# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26# or extend how values are printed, you'll need to know, so here's an overview.
27#
28# You may want to first read Walder's "A Prettier Printer", which this is based
29# off of:
30# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31#
32# Some additional reading, though only tangentially related:
33#
34# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39#
40# ~ Constructors ~
41#
42# There are just a few constructors for `doc`, from which everything else is
43# built from.
44#
45# Text(string) prints like:
46# |string
47#
48# Break(string) prints like:
49# |string
50# or like a newline:
51# |
52# |
53# (It does the latter if printed in "flat" mode, and the former otherwise. See
54# Group for details.)
55#
56# Concat(a, b) prints like:
57# |AAAAA
58# |AAABBB
59# |BBBBB
60#
61# Indent(3, a) prints like:
62# |AAAAA
63# | AAAAA
64# | AAAAA
65#
66# Group(a) makes a decision. It either:
67# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68# string instead of as a newline, and (ii) every Group nested inside of it is
69# printed flat.
70# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71# newlines, and (ii) the Groups inside of it make their own decision about
72# whether to be flat.
73# It makes this decision greedily. If the current line would not overflow if the
74# group printed flat, then it will print flat. This takes into account not only
75# the group itself, but the content before and after it on the same line.
76#
77# IfFlat(a, b) prints a if in flat mode or b otherwise.
78#
79# Flat(a) prints a in flat mode. You should generally not need to use it.
80#
81# ~ Measures ~
82#
83# The algorithm used here is close to the one originally described by Wadler,
84# but it precomputes a "measure" for each node in the `doc`. This "measure"
85# allows each Groups to decide whether to print flat or not without needing to
86# look ahead per Wadler's algorithm. A measure has two pieces of information:
87#
88# - Measure.flat is the width of the doc if it's printed flat.
89# - Measure.nonflat is the width of the doc until the _earliest possible_
90# newline, or -1 if it doesn't contain a Break.
91#
92# Measures are used in two steps:
93# (1) First, they're computed bottom-up on the `doc`, measuring the size of each
94# node.
95# (2) Later, PrintDoc() stores a measure in each DocFragment. These Measures
96# measure something different: the width from the doc _to the end of the
97# entire doc tree_. This second set of Measures (the ones in the
98# DocFragments) are computed top-down, and they're used to decide for each
99# Group whether to use flat mode or not, without needing to scan ahead.
100
101from __future__ import print_function
102
103from _devbuild.gen.pretty_asdl import (doc, doc_e, DocFragment, Measure,
104 MeasuredDoc, List_Measured)
105from mycpp.mylib import log, tagswitch, BufWriter
106from typing import cast, List
107
108_ = log
109
110#
111# Measurements
112#
113
114
115def _EmptyMeasure():
116 # type: () -> Measure
117 """The measure of an empty doc."""
118 return Measure(0, -1)
119
120
121def _FlattenMeasure(measure):
122 # type: (Measure) -> Measure
123 """The measure if its document is rendered flat."""
124 return Measure(measure.flat, -1)
125
126
127def _ConcatMeasure(m1, m2):
128 # type: (Measure, Measure) -> Measure
129 """Compute the measure of 2 concatenated docs.
130
131 If m1 and m2 are the measures of doc1 and doc2, then _ConcatMeasure(m1, m2)
132 is the measure of doc.Concat([doc1, doc2]). This concatenation is
133 associative but not commutative.
134 """
135 if m1.nonflat != -1:
136 return Measure(m1.flat + m2.flat, m1.nonflat)
137 elif m2.nonflat != -1:
138 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
139 else:
140 return Measure(m1.flat + m2.flat, -1)
141
142
143def _SuffixLen(measure):
144 # type: (Measure) -> int
145 """The width until the earliest possible newline, or end of document."""
146 if measure.nonflat != -1:
147 return measure.nonflat
148 else:
149 return measure.flat
150
151
152#
153# Doc Construction
154#
155
156
157def AsciiText(string):
158 # type: (str) -> MeasuredDoc
159 """Print string (which must not contain a newline)."""
160 return MeasuredDoc(doc.Text(string), Measure(len(string), -1))
161
162
163def _Break(string):
164 # type: (str) -> MeasuredDoc
165 """If in flat mode, print string, otherwise print `\n`.
166
167 Note: Doesn't try to compute Unicode width, since we control these strings.
168 """
169 return MeasuredDoc(doc.Break(string), Measure(len(string), 0))
170
171
172def _Indent(indent, mdoc):
173 # type: (int, MeasuredDoc) -> MeasuredDoc
174 """Add 'indent' spaces after every newline in mdoc."""
175 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
176
177
178def _Splice(out, mdocs):
179 # type: (List[MeasuredDoc], List[MeasuredDoc]) -> Measure
180 """Optimization for _Concat.
181
182 This reduces the total size of the doc_t tree, and thus the memory usage
183 (as long as we mylib.MaybeCollect() in _HNode!)
184
185 Example of optimizing _Concat nodes together: _Field() concatenates
186 AsciiText("name:") and _HNode(), and the latter is often a doc.Concat node.
187 """
188 measure = _EmptyMeasure()
189 for mdoc in mdocs:
190 with tagswitch(mdoc.doc) as case:
191 if case(doc_e.Concat):
192 child = cast(List_Measured, mdoc.doc)
193 # ignore return value, because the parent has the measure already
194 _Splice(out, child)
195 else:
196 out.append(mdoc)
197 measure = _ConcatMeasure(measure, mdoc.measure)
198 return measure
199
200
201def _Concat(mdocs):
202 # type: (List[MeasuredDoc]) -> MeasuredDoc
203 """Print the mdocs in order (with no space in between)."""
204 flattened = List_Measured.New()
205 measure = _Splice(flattened, mdocs)
206 return MeasuredDoc(flattened, measure)
207
208
209def _Group(mdoc):
210 # type: (MeasuredDoc) -> MeasuredDoc
211 """Print mdoc. Use flat mode if mdoc will fit on the current line."""
212 return MeasuredDoc(mdoc, mdoc.measure)
213
214
215def _IfFlat(flat_mdoc, nonflat_mdoc):
216 # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
217 """If in flat mode, print flat_mdoc; otherwise print nonflat_mdoc."""
218 return MeasuredDoc(
219 doc.IfFlat(flat_mdoc, nonflat_mdoc),
220 Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
221
222
223def _Flat(mdoc):
224 # type: (MeasuredDoc) -> MeasuredDoc
225 """Prints mdoc in flat mode."""
226 return MeasuredDoc(doc.Flat(mdoc), _FlattenMeasure(mdoc.measure))
227
228
229class PrettyPrinter(object):
230
231 def __init__(self, max_width):
232 # type: (int) -> None
233 self.max_width = max_width
234
235 def _Fits(self, prefix_len, group, suffix_measure):
236 # type: (int, MeasuredDoc, Measure) -> bool
237 """Will group fit flat on the current line?"""
238 measure = _ConcatMeasure(_FlattenMeasure(group.measure),
239 suffix_measure)
240 return prefix_len + _SuffixLen(measure) <= self.max_width
241
242 def PrintDoc(self, document, buf):
243 # type: (MeasuredDoc, BufWriter) -> None
244 """Pretty print a doc_t to a BufWriter."""
245
246 # The width of the text we've printed so far on the current line
247 prefix_len = 0
248 # A _stack_ of document fragments to print. Each fragment contains:
249 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
250 # - The indentation level to print this doc node at.
251 # - Is this doc node being printed in flat mode?
252 # - The measure _from just after the doc node, to the end of the entire document_.
253 # (Call this the suffix_measure)
254 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
255
256 max_stack = len(fragments)
257
258 while len(fragments) > 0:
259 max_stack = max(max_stack, len(fragments))
260
261 frag = fragments.pop()
262 UP_doc = frag.mdoc.doc
263 with tagswitch(UP_doc) as case:
264
265 if case(doc_e.Text):
266 text = cast(doc.Text, UP_doc)
267 buf.write(text.string)
268 prefix_len += frag.mdoc.measure.flat
269
270 elif case(doc_e.Break):
271 break_ = cast(doc.Break, UP_doc)
272 if frag.is_flat:
273 buf.write(break_.string)
274 prefix_len += frag.mdoc.measure.flat
275 else:
276 buf.write('\n')
277 buf.write_spaces(frag.indent)
278 prefix_len = frag.indent
279
280 elif case(doc_e.Indent):
281 indented = cast(doc.Indent, UP_doc)
282 fragments.append(
283 DocFragment(indented.mdoc,
284 frag.indent + indented.indent,
285 frag.is_flat, frag.measure))
286
287 elif case(doc_e.Concat):
288 concat = cast(List_Measured, UP_doc)
289
290 # If we encounter Concat([A, B, C]) with a suffix measure M,
291 # we need to push A,B,C onto the stack in reverse order:
292 # - C, with suffix_measure = B.measure + A.measure + M
293 # - B, with suffix_measure = A.measure + M
294 # - A, with suffix_measure = M
295 measure = frag.measure
296 for mdoc in reversed(concat):
297 fragments.append(
298 DocFragment(mdoc, frag.indent, frag.is_flat,
299 measure))
300 # TODO: this algorithm allocates too much!
301 measure = _ConcatMeasure(mdoc.measure, measure)
302
303 elif case(doc_e.Group):
304 # If the group would fit on the current line when printed
305 # flat, do so. Otherwise, print it non-flat.
306 group = cast(MeasuredDoc, UP_doc)
307 is_flat = self._Fits(prefix_len, group, frag.measure)
308 fragments.append(
309 DocFragment(group, frag.indent, is_flat, frag.measure))
310
311 elif case(doc_e.IfFlat):
312 if_flat = cast(doc.IfFlat, UP_doc)
313 if frag.is_flat:
314 subdoc = if_flat.flat_mdoc
315 else:
316 subdoc = if_flat.nonflat_mdoc
317 fragments.append(
318 DocFragment(subdoc, frag.indent, frag.is_flat,
319 frag.measure))
320
321 elif case(doc_e.Flat):
322 flat_doc = cast(doc.Flat, UP_doc)
323 fragments.append(
324 DocFragment(flat_doc.mdoc, frag.indent, True,
325 frag.measure))
326
327 if 0:
328 log('')
329 log('___ MAX DocFragment stack: %d', max_stack)
330 log('')
331
332
333# vim: sw=4