1 | #!/usr/bin/env python2
|
2 | """
|
3 | Pretty printing library.
|
4 |
|
5 | Pretty printing means intelligently choosing whitespace including indentation
|
6 | and newline placement, to attempt to display data nicely while staying within a
|
7 | maximum line width.
|
8 | """
|
9 |
|
10 | # ~~~ Architecture ~~~
|
11 | #
|
12 | # Based on a version of the algorithm from Wadler's "A Prettier Printer".
|
13 | #
|
14 | # Pretty printing proceeds in two phases:
|
15 | #
|
16 | # 1. Convert the thing you want to print into a `doc`.
|
17 | # 2. Print the `doc` using a standard algorithm.
|
18 | #
|
19 | # This separation keeps the details of the data you want to print separate from
|
20 | # the printing algorithm.
|
21 |
|
22 | # ~~~ Pretty Printing Overview ~~~
|
23 | #
|
24 | # If you're just using this file, you don't need to know how pretty printing
|
25 | # works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
|
26 | # or extend how values are printed, you'll need to know, so here's an overview.
|
27 | #
|
28 | # You may want to first read Walder's "A Prettier Printer", which this is based
|
29 | # off of:
|
30 | # https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
|
31 | #
|
32 | # Some additional reading, though only tangentially related:
|
33 | #
|
34 | # - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
|
35 | # - https://lindig.github.io/papers/strictly-pretty-2000.pdf
|
36 | # - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
|
37 | # - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
|
38 | # - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
|
39 | #
|
40 | # ~ Constructors ~
|
41 | #
|
42 | # There are just a few constructors for `doc`, from which everything else is
|
43 | # built from.
|
44 | #
|
45 | # Text(string) prints like:
|
46 | # |string
|
47 | #
|
48 | # Break(string) prints like:
|
49 | # |string
|
50 | # or like a newline:
|
51 | # |
|
52 | # |
|
53 | # (It does the latter if printed in "flat" mode, and the former otherwise. See
|
54 | # Group for details.)
|
55 | #
|
56 | # Concat(a, b) prints like:
|
57 | # |AAAAA
|
58 | # |AAABBB
|
59 | # |BBBBB
|
60 | #
|
61 | # Indent(3, a) prints like:
|
62 | # |AAAAA
|
63 | # | AAAAA
|
64 | # | AAAAA
|
65 | #
|
66 | # Group(a) makes a decision. It either:
|
67 | # - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
|
68 | # string instead of as a newline, and (ii) every Group nested inside of it is
|
69 | # printed flat.
|
70 | # - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
|
71 | # newlines, and (ii) the Groups inside of it make their own decision about
|
72 | # whether to be flat.
|
73 | # It makes this decision greedily. If the current line would not overflow if the
|
74 | # group printed flat, then it will print flat. This takes into account not only
|
75 | # the group itself, but the content before and after it on the same line.
|
76 | #
|
77 | # IfFlat(a, b) prints a if in flat mode or b otherwise.
|
78 | #
|
79 | # Flat(a) prints a in flat mode. You should generally not need to use it.
|
80 | #
|
81 | # ~ Measures ~
|
82 | #
|
83 | # The algorithm used here is close to the one originally described by Wadler,
|
84 | # but it precomputes a "measure" for each node in the `doc`. This "measure"
|
85 | # allows each Groups to decide whether to print flat or not without needing to
|
86 | # look ahead per Wadler's algorithm. A measure has two pieces of information:
|
87 | #
|
88 | # - Measure.flat is the width of the doc if it's printed flat.
|
89 | # - Measure.nonflat is the width of the doc until the _earliest possible_
|
90 | # newline, or -1 if it doesn't contain a Break.
|
91 | #
|
92 | # Measures are used in two steps:
|
93 | # (1) First, they're computed bottom-up on the `doc`, measuring the size of each
|
94 | # node.
|
95 | # (2) Later, PrintDoc() stores a measure in each DocFragment. These Measures
|
96 | # measure something different: the width from the doc _to the end of the
|
97 | # entire doc tree_. This second set of Measures (the ones in the
|
98 | # DocFragments) are computed top-down, and they're used to decide for each
|
99 | # Group whether to use flat mode or not, without needing to scan ahead.
|
100 |
|
101 | from __future__ import print_function
|
102 |
|
103 | from _devbuild.gen.pretty_asdl import (doc, doc_e, DocFragment, Measure,
|
104 | MeasuredDoc, List_Measured)
|
105 | from mycpp.mylib import log, tagswitch, BufWriter
|
106 | from typing import cast, List
|
107 |
|
108 | _ = log
|
109 |
|
110 | #
|
111 | # Measurements
|
112 | #
|
113 |
|
114 |
|
115 | def _EmptyMeasure():
|
116 | # type: () -> Measure
|
117 | """The measure of an empty doc."""
|
118 | return Measure(0, -1)
|
119 |
|
120 |
|
121 | def _FlattenMeasure(measure):
|
122 | # type: (Measure) -> Measure
|
123 | """The measure if its document is rendered flat."""
|
124 | return Measure(measure.flat, -1)
|
125 |
|
126 |
|
127 | def _ConcatMeasure(m1, m2):
|
128 | # type: (Measure, Measure) -> Measure
|
129 | """Compute the measure of 2 concatenated docs.
|
130 |
|
131 | If m1 and m2 are the measures of doc1 and doc2, then _ConcatMeasure(m1, m2)
|
132 | is the measure of doc.Concat([doc1, doc2]). This concatenation is
|
133 | associative but not commutative.
|
134 | """
|
135 | if m1.nonflat != -1:
|
136 | return Measure(m1.flat + m2.flat, m1.nonflat)
|
137 | elif m2.nonflat != -1:
|
138 | return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
|
139 | else:
|
140 | return Measure(m1.flat + m2.flat, -1)
|
141 |
|
142 |
|
143 | def _SuffixLen(measure):
|
144 | # type: (Measure) -> int
|
145 | """The width until the earliest possible newline, or end of document."""
|
146 | if measure.nonflat != -1:
|
147 | return measure.nonflat
|
148 | else:
|
149 | return measure.flat
|
150 |
|
151 |
|
152 | #
|
153 | # Doc Construction
|
154 | #
|
155 |
|
156 |
|
157 | def AsciiText(string):
|
158 | # type: (str) -> MeasuredDoc
|
159 | """Print string (which must not contain a newline)."""
|
160 | return MeasuredDoc(doc.Text(string), Measure(len(string), -1))
|
161 |
|
162 |
|
163 | def _Break(string):
|
164 | # type: (str) -> MeasuredDoc
|
165 | """If in flat mode, print string, otherwise print `\n`.
|
166 |
|
167 | Note: Doesn't try to compute Unicode width, since we control these strings.
|
168 | """
|
169 | return MeasuredDoc(doc.Break(string), Measure(len(string), 0))
|
170 |
|
171 |
|
172 | def _Indent(indent, mdoc):
|
173 | # type: (int, MeasuredDoc) -> MeasuredDoc
|
174 | """Add 'indent' spaces after every newline in mdoc."""
|
175 | return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
|
176 |
|
177 |
|
178 | def _Splice(out, mdocs):
|
179 | # type: (List[MeasuredDoc], List[MeasuredDoc]) -> Measure
|
180 | """Optimization for _Concat.
|
181 |
|
182 | This reduces the total size of the doc_t tree, and thus the memory usage
|
183 | (as long as we mylib.MaybeCollect() in _HNode!)
|
184 |
|
185 | Example of optimizing _Concat nodes together: _Field() concatenates
|
186 | AsciiText("name:") and _HNode(), and the latter is often a doc.Concat node.
|
187 | """
|
188 | measure = _EmptyMeasure()
|
189 | for mdoc in mdocs:
|
190 | with tagswitch(mdoc.doc) as case:
|
191 | if case(doc_e.Concat):
|
192 | child = cast(List_Measured, mdoc.doc)
|
193 | # ignore return value, because the parent has the measure already
|
194 | _Splice(out, child)
|
195 | else:
|
196 | out.append(mdoc)
|
197 | measure = _ConcatMeasure(measure, mdoc.measure)
|
198 | return measure
|
199 |
|
200 |
|
201 | def _Concat(mdocs):
|
202 | # type: (List[MeasuredDoc]) -> MeasuredDoc
|
203 | """Print the mdocs in order (with no space in between)."""
|
204 | flattened = List_Measured.New()
|
205 | measure = _Splice(flattened, mdocs)
|
206 | return MeasuredDoc(flattened, measure)
|
207 |
|
208 |
|
209 | def _Group(mdoc):
|
210 | # type: (MeasuredDoc) -> MeasuredDoc
|
211 | """Print mdoc. Use flat mode if mdoc will fit on the current line."""
|
212 | return MeasuredDoc(mdoc, mdoc.measure)
|
213 |
|
214 |
|
215 | def _IfFlat(flat_mdoc, nonflat_mdoc):
|
216 | # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
|
217 | """If in flat mode, print flat_mdoc; otherwise print nonflat_mdoc."""
|
218 | return MeasuredDoc(
|
219 | doc.IfFlat(flat_mdoc, nonflat_mdoc),
|
220 | Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
|
221 |
|
222 |
|
223 | def _Flat(mdoc):
|
224 | # type: (MeasuredDoc) -> MeasuredDoc
|
225 | """Prints mdoc in flat mode."""
|
226 | return MeasuredDoc(doc.Flat(mdoc), _FlattenMeasure(mdoc.measure))
|
227 |
|
228 |
|
229 | class PrettyPrinter(object):
|
230 |
|
231 | def __init__(self, max_width):
|
232 | # type: (int) -> None
|
233 | self.max_width = max_width
|
234 |
|
235 | def _Fits(self, prefix_len, group, suffix_measure):
|
236 | # type: (int, MeasuredDoc, Measure) -> bool
|
237 | """Will group fit flat on the current line?"""
|
238 | measure = _ConcatMeasure(_FlattenMeasure(group.measure),
|
239 | suffix_measure)
|
240 | return prefix_len + _SuffixLen(measure) <= self.max_width
|
241 |
|
242 | def PrintDoc(self, document, buf):
|
243 | # type: (MeasuredDoc, BufWriter) -> None
|
244 | """Pretty print a doc_t to a BufWriter."""
|
245 |
|
246 | # The width of the text we've printed so far on the current line
|
247 | prefix_len = 0
|
248 | # A _stack_ of document fragments to print. Each fragment contains:
|
249 | # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
|
250 | # - The indentation level to print this doc node at.
|
251 | # - Is this doc node being printed in flat mode?
|
252 | # - The measure _from just after the doc node, to the end of the entire document_.
|
253 | # (Call this the suffix_measure)
|
254 | fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
|
255 |
|
256 | max_stack = len(fragments)
|
257 |
|
258 | while len(fragments) > 0:
|
259 | max_stack = max(max_stack, len(fragments))
|
260 |
|
261 | frag = fragments.pop()
|
262 | UP_doc = frag.mdoc.doc
|
263 | with tagswitch(UP_doc) as case:
|
264 |
|
265 | if case(doc_e.Text):
|
266 | text = cast(doc.Text, UP_doc)
|
267 | buf.write(text.string)
|
268 | prefix_len += frag.mdoc.measure.flat
|
269 |
|
270 | elif case(doc_e.Break):
|
271 | break_ = cast(doc.Break, UP_doc)
|
272 | if frag.is_flat:
|
273 | buf.write(break_.string)
|
274 | prefix_len += frag.mdoc.measure.flat
|
275 | else:
|
276 | buf.write('\n')
|
277 | buf.write_spaces(frag.indent)
|
278 | prefix_len = frag.indent
|
279 |
|
280 | elif case(doc_e.Indent):
|
281 | indented = cast(doc.Indent, UP_doc)
|
282 | fragments.append(
|
283 | DocFragment(indented.mdoc,
|
284 | frag.indent + indented.indent,
|
285 | frag.is_flat, frag.measure))
|
286 |
|
287 | elif case(doc_e.Concat):
|
288 | concat = cast(List_Measured, UP_doc)
|
289 |
|
290 | # If we encounter Concat([A, B, C]) with a suffix measure M,
|
291 | # we need to push A,B,C onto the stack in reverse order:
|
292 | # - C, with suffix_measure = B.measure + A.measure + M
|
293 | # - B, with suffix_measure = A.measure + M
|
294 | # - A, with suffix_measure = M
|
295 | measure = frag.measure
|
296 | for mdoc in reversed(concat):
|
297 | fragments.append(
|
298 | DocFragment(mdoc, frag.indent, frag.is_flat,
|
299 | measure))
|
300 | # TODO: this algorithm allocates too much!
|
301 | measure = _ConcatMeasure(mdoc.measure, measure)
|
302 |
|
303 | elif case(doc_e.Group):
|
304 | # If the group would fit on the current line when printed
|
305 | # flat, do so. Otherwise, print it non-flat.
|
306 | group = cast(MeasuredDoc, UP_doc)
|
307 | is_flat = self._Fits(prefix_len, group, frag.measure)
|
308 | fragments.append(
|
309 | DocFragment(group, frag.indent, is_flat, frag.measure))
|
310 |
|
311 | elif case(doc_e.IfFlat):
|
312 | if_flat = cast(doc.IfFlat, UP_doc)
|
313 | if frag.is_flat:
|
314 | subdoc = if_flat.flat_mdoc
|
315 | else:
|
316 | subdoc = if_flat.nonflat_mdoc
|
317 | fragments.append(
|
318 | DocFragment(subdoc, frag.indent, frag.is_flat,
|
319 | frag.measure))
|
320 |
|
321 | elif case(doc_e.Flat):
|
322 | flat_doc = cast(doc.Flat, UP_doc)
|
323 | fragments.append(
|
324 | DocFragment(flat_doc.mdoc, frag.indent, True,
|
325 | frag.measure))
|
326 |
|
327 | if 0:
|
328 | log('')
|
329 | log('___ MAX DocFragment stack: %d', max_stack)
|
330 | log('')
|
331 |
|
332 |
|
333 | # vim: sw=4
|