1 | #!/usr/bin/env python2
|
2 | """
|
3 | Pretty printing library.
|
4 |
|
5 | Pretty printing means intelligently choosing whitespace including indentation
|
6 | and newline placement, to attempt to display data nicely while staying within a
|
7 | maximum line width.
|
8 | """
|
9 |
|
10 | # ~~~ Architecture ~~~
|
11 | #
|
12 | # Based on a version of the algorithm from Wadler's "A Prettier Printer".
|
13 | #
|
14 | # Pretty printing proceeds in two phases:
|
15 | #
|
16 | # 1. Convert the thing you want to print into a `doc`.
|
17 | # 2. Print the `doc` using a standard algorithm.
|
18 | #
|
19 | # This separation keeps the details of the data you want to print separate from
|
20 | # the printing algorithm.
|
21 |
|
22 | # ~~~ Pretty Printing Overview ~~~
|
23 | #
|
24 | # If you're just using this file, you don't need to know how pretty printing
|
25 | # works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
|
26 | # or extend how values are printed, you'll need to know, so here's an overview.
|
27 | #
|
28 | # You may want to first read Walder's "A Prettier Printer", which this is based
|
29 | # off of:
|
30 | # https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
|
31 | #
|
32 | # Some additional reading, though only tangentially related:
|
33 | #
|
34 | # - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
|
35 | # - https://lindig.github.io/papers/strictly-pretty-2000.pdf
|
36 | # - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
|
37 | # - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
|
38 | # - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
|
39 | #
|
40 | # ~ Constructors ~
|
41 | #
|
42 | # There are just a few constructors for `doc`, from which everything else is
|
43 | # built from.
|
44 | #
|
45 | # Text(string) prints like:
|
46 | # |string
|
47 | #
|
48 | # Break(string) prints like:
|
49 | # |string
|
50 | # or like a newline:
|
51 | # |
|
52 | # |
|
53 | # (It does the latter if printed in "flat" mode, and the former otherwise. See
|
54 | # Group for details.)
|
55 | #
|
56 | # Concat(a, b) prints like:
|
57 | # |AAAAA
|
58 | # |AAABBB
|
59 | # |BBBBB
|
60 | #
|
61 | # Indent(3, a) prints like:
|
62 | # |AAAAA
|
63 | # | AAAAA
|
64 | # | AAAAA
|
65 | #
|
66 | # Group(a) makes a decision. It either:
|
67 | # - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
|
68 | # string instead of as a newline, and (ii) every Group nested inside of it is
|
69 | # printed flat.
|
70 | # - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
|
71 | # newlines, and (ii) the Groups inside of it make their own decision about
|
72 | # whether to be flat.
|
73 | # It makes this decision greedily. If the current line would not overflow if the
|
74 | # group printed flat, then it will print flat. This takes into account not only
|
75 | # the group itself, but the content before and after it on the same line.
|
76 | #
|
77 | # IfFlat(a, b) prints a if in flat mode or b otherwise.
|
78 | #
|
79 | # Flat(a) prints a in flat mode. You should generally not need to use it.
|
80 | #
|
81 | # ~ Measures ~
|
82 | #
|
83 | # The algorithm used here is close to the one originally described by Wadler,
|
84 | # but it precomputes a "measure" for each node in the `doc`. This "measure"
|
85 | # allows each Groups to decide whether to print flat or not without needing to
|
86 | # look ahead per Wadler's algorithm. A measure has two pieces of information:
|
87 | #
|
88 | # - Measure.flat is the width of the doc if it's printed flat.
|
89 | # - Measure.nonflat is the width of the doc until the _earliest possible_
|
90 | # newline, or -1 if it doesn't contain a Break.
|
91 | #
|
92 | # Measures are used in two steps:
|
93 | # (1) First, they're computed bottom-up on the `doc`, measuring the size of each
|
94 | # node.
|
95 | # (2) Later, PrintDoc() stores a measure in each DocFragment. These Measures
|
96 | # measure something different: the width from the doc _to the end of the
|
97 | # entire doc tree_. This second set of Measures (the ones in the
|
98 | # DocFragments) are computed top-down, and they're used to decide for each
|
99 | # Group whether to use flat mode or not, without needing to scan ahead.
|
100 |
|
101 | from __future__ import print_function
|
102 |
|
103 | from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
|
104 | from mycpp.mylib import log, tagswitch, BufWriter
|
105 | from typing import cast, List
|
106 |
|
107 | _ = log
|
108 |
|
109 | ################
|
110 | # Measurements #
|
111 | ################
|
112 |
|
113 |
|
114 | def _EmptyMeasure():
|
115 | # type: () -> Measure
|
116 | """The measure of an empty doc."""
|
117 | return Measure(0, -1)
|
118 |
|
119 |
|
120 | def _FlattenMeasure(measure):
|
121 | # type: (Measure) -> Measure
|
122 | """The measure if its document is rendered flat."""
|
123 | return Measure(measure.flat, -1)
|
124 |
|
125 |
|
126 | def _ConcatMeasure(m1, m2):
|
127 | # type: (Measure, Measure) -> Measure
|
128 | """Compute the measure of concatenated docs.
|
129 |
|
130 | If m1 and m2 are the measures of doc1 and doc2,
|
131 | then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
|
132 | This concatenation is associative but not commutative."""
|
133 | if m1.nonflat != -1:
|
134 | return Measure(m1.flat + m2.flat, m1.nonflat)
|
135 | elif m2.nonflat != -1:
|
136 | return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
|
137 | else:
|
138 | return Measure(m1.flat + m2.flat, -1)
|
139 |
|
140 |
|
141 | def _SuffixLen(measure):
|
142 | # type: (Measure) -> int
|
143 | """The width until the earliest possible newline, or end of document."""
|
144 | if measure.nonflat != -1:
|
145 | return measure.nonflat
|
146 | else:
|
147 | return measure.flat
|
148 |
|
149 |
|
150 | ####################
|
151 | # Doc Construction #
|
152 | ####################
|
153 |
|
154 |
|
155 | def AsciiText(string):
|
156 | # type: (str) -> MeasuredDoc
|
157 | """Print `string` (which must not contain a newline)."""
|
158 | return MeasuredDoc(doc.Text(string), Measure(len(string), -1))
|
159 |
|
160 |
|
161 | def _Break(string):
|
162 | # type: (str) -> MeasuredDoc
|
163 | """If in `flat` mode, print `string`, otherwise print `\n`.
|
164 |
|
165 | Note: Doesn't try to compute Unicode width, since we control these strings.
|
166 | """
|
167 | return MeasuredDoc(doc.Break(string), Measure(len(string), 0))
|
168 |
|
169 |
|
170 | def _Indent(indent, mdoc):
|
171 | # type: (int, MeasuredDoc) -> MeasuredDoc
|
172 | """Add `indent` spaces after every newline in `mdoc`."""
|
173 | return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
|
174 |
|
175 |
|
176 | def _Concat(mdocs):
|
177 | # type: (List[MeasuredDoc]) -> MeasuredDoc
|
178 | """Print the mdocs in order (with no space in between)."""
|
179 | measure = _EmptyMeasure()
|
180 | for mdoc in mdocs:
|
181 | measure = _ConcatMeasure(measure, mdoc.measure)
|
182 | return MeasuredDoc(doc.Concat(mdocs), measure)
|
183 |
|
184 |
|
185 | def _Group(mdoc):
|
186 | # type: (MeasuredDoc) -> MeasuredDoc
|
187 | """Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
|
188 | return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
|
189 |
|
190 |
|
191 | def _IfFlat(flat_mdoc, nonflat_mdoc):
|
192 | # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
|
193 | """If in flat mode, print `flat_mdoc` otherwise print `nonflat_mdoc`."""
|
194 | return MeasuredDoc(
|
195 | doc.IfFlat(flat_mdoc, nonflat_mdoc),
|
196 | Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
|
197 |
|
198 |
|
199 | def _Flat(mdoc):
|
200 | # type: (MeasuredDoc) -> MeasuredDoc
|
201 | """Prints `mdoc` in flat mode."""
|
202 | return MeasuredDoc(doc.Flat(mdoc), _FlattenMeasure(mdoc.measure))
|
203 |
|
204 |
|
205 | ###################
|
206 | # Pretty Printing #
|
207 | ###################
|
208 |
|
209 |
|
210 | class PrettyPrinter(object):
|
211 |
|
212 | def __init__(self, max_width):
|
213 | # type: (int) -> None
|
214 | self.max_width = max_width
|
215 |
|
216 | def _Fits(self, prefix_len, group, suffix_measure):
|
217 | # type: (int, doc.Group, Measure) -> bool
|
218 | """Will `group` fit flat on the current line?"""
|
219 | measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
|
220 | suffix_measure)
|
221 | return prefix_len + _SuffixLen(measure) <= self.max_width
|
222 |
|
223 | def PrintDoc(self, document, buf):
|
224 | # type: (MeasuredDoc, BufWriter) -> None
|
225 | """Pretty print a `pretty.doc` to a BufWriter."""
|
226 |
|
227 | # The width of the text we've printed so far on the current line
|
228 | prefix_len = 0
|
229 | # A _stack_ of document fragments to print. Each fragment contains:
|
230 | # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
|
231 | # - The indentation level to print this doc node at.
|
232 | # - Is this doc node being printed in flat mode?
|
233 | # - The measure _from just after the doc node, to the end of the entire document_.
|
234 | # (Call this the suffix_measure)
|
235 | fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
|
236 |
|
237 | while len(fragments) > 0:
|
238 | frag = fragments.pop()
|
239 | with tagswitch(frag.mdoc.doc) as case:
|
240 |
|
241 | if case(doc_e.Text):
|
242 | text = cast(doc.Text, frag.mdoc.doc)
|
243 | buf.write(text.string)
|
244 | prefix_len += frag.mdoc.measure.flat
|
245 |
|
246 | elif case(doc_e.Break):
|
247 | if frag.is_flat:
|
248 | break_str = cast(doc.Break, frag.mdoc.doc).string
|
249 | buf.write(break_str)
|
250 | prefix_len += frag.mdoc.measure.flat
|
251 | else:
|
252 | buf.write('\n')
|
253 | buf.write_spaces(frag.indent)
|
254 | prefix_len = frag.indent
|
255 |
|
256 | elif case(doc_e.Indent):
|
257 | indented = cast(doc.Indent, frag.mdoc.doc)
|
258 | fragments.append(
|
259 | DocFragment(indented.mdoc,
|
260 | frag.indent + indented.indent,
|
261 | frag.is_flat, frag.measure))
|
262 |
|
263 | elif case(doc_e.Concat):
|
264 | # If we encounter Concat([A, B, C]) with a suffix measure M,
|
265 | # we need to push A,B,C onto the stack in reverse order:
|
266 | # - C, with suffix_measure = B.measure + A.measure + M
|
267 | # - B, with suffix_measure = A.measure + M
|
268 | # - A, with suffix_measure = M
|
269 | concat = cast(doc.Concat, frag.mdoc.doc)
|
270 | measure = frag.measure
|
271 | for mdoc in reversed(concat.mdocs):
|
272 | fragments.append(
|
273 | DocFragment(mdoc, frag.indent, frag.is_flat,
|
274 | measure))
|
275 | measure = _ConcatMeasure(mdoc.measure, measure)
|
276 |
|
277 | elif case(doc_e.Group):
|
278 | # If the group would fit on the current line when printed
|
279 | # flat, do so. Otherwise, print it non-flat.
|
280 | group = cast(doc.Group, frag.mdoc.doc)
|
281 | flat = self._Fits(prefix_len, group, frag.measure)
|
282 | fragments.append(
|
283 | DocFragment(group.mdoc, frag.indent, flat,
|
284 | frag.measure))
|
285 |
|
286 | elif case(doc_e.IfFlat):
|
287 | if_flat = cast(doc.IfFlat, frag.mdoc.doc)
|
288 | if frag.is_flat:
|
289 | subdoc = if_flat.flat_mdoc
|
290 | else:
|
291 | subdoc = if_flat.nonflat_mdoc
|
292 | fragments.append(
|
293 | DocFragment(subdoc, frag.indent, frag.is_flat,
|
294 | frag.measure))
|
295 |
|
296 | elif case(doc_e.Flat):
|
297 | flat_doc = cast(doc.Flat, frag.mdoc.doc)
|
298 | fragments.append(
|
299 | DocFragment(flat_doc.mdoc, frag.indent, True,
|
300 | frag.measure))
|
301 |
|
302 |
|
303 | # vim: sw=4
|