OILS / display / pretty.py View on Github | oils.pub

332 lines, 117 significant
1#!/usr/bin/env python2
2"""pretty.py - Pretty printing library.
3
4Pretty printing means intelligently choosing whitespace including indentation
5and newline placement, to attempt to display data nicely while staying within a
6maximum line width.
7"""
8
9# ~~~ Architecture ~~~
10#
11# Based on a version of the algorithm from Wadler's "A Prettier Printer".
12#
13# Pretty printing proceeds in two phases:
14#
15# 1. Convert the thing you want to print into a `doc`.
16# 2. Print the `doc` using a standard algorithm.
17#
18# This separation keeps the details of the data you want to print separate from
19# the printing algorithm.
20
21# ~~~ Pretty Printing Overview ~~~
22#
23# If you're just using this file, you don't need to know how pretty printing
24# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
25# or extend how values are printed, you'll need to know, so here's an overview.
26#
27# You may want to first read Walder's "A Prettier Printer", which this is based
28# off of:
29# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
30#
31# Some additional reading, though only tangentially related:
32#
33# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
34# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
35# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
36# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
37# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
38#
39# ~ Constructors ~
40#
41# There are just a few constructors for `doc`, from which everything else is
42# built from.
43#
44# Text(string) prints like:
45# |string
46#
47# Break(string) prints like:
48# |string
49# or like a newline:
50# |
51# |
52# (It does the latter if printed in "flat" mode, and the former otherwise. See
53# Group for details.)
54#
55# Concat(a, b) prints like:
56# |AAAAA
57# |AAABBB
58# |BBBBB
59#
60# Indent(3, a) prints like:
61# |AAAAA
62# | AAAAA
63# | AAAAA
64#
65# Group(a) makes a decision. It either:
66# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
67# string instead of as a newline, and (ii) every Group nested inside of it is
68# printed flat.
69# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
70# newlines, and (ii) the Groups inside of it make their own decision about
71# whether to be flat.
72# It makes this decision greedily. If the current line would not overflow if the
73# group printed flat, then it will print flat. This takes into account not only
74# the group itself, but the content before and after it on the same line.
75#
76# IfFlat(a, b) prints a if in flat mode or b otherwise.
77#
78# Flat(a) prints a in flat mode. You should generally not need to use it.
79#
80# ~ Measures ~
81#
82# The algorithm used here is close to the one originally described by Wadler,
83# but it precomputes a "measure" for each node in the `doc`. This "measure"
84# allows each Groups to decide whether to print flat or not without needing to
85# look ahead per Wadler's algorithm. A measure has two pieces of information:
86#
87# - Measure.flat is the width of the doc if it's printed flat.
88# - Measure.nonflat is the width of the doc until the _earliest possible_
89# newline, or -1 if it doesn't contain a Break.
90#
91# Measures are used in two steps:
92# (1) First, they're computed bottom-up on the `doc`, measuring the size of each
93# node.
94# (2) Later, PrintDoc() stores a measure in each DocFragment. These Measures
95# measure something different: the width from the doc _to the end of the
96# entire doc tree_. This second set of Measures (the ones in the
97# DocFragments) are computed top-down, and they're used to decide for each
98# Group whether to use flat mode or not, without needing to scan ahead.
99
100from __future__ import print_function
101
102from _devbuild.gen.pretty_asdl import (doc, doc_e, DocFragment, Measure,
103 MeasuredDoc, List_Measured)
104from mycpp.mylib import log, tagswitch, BufWriter
105from typing import cast, List
106
107_ = log
108
109#
110# Measurements
111#
112
113
114def _EmptyMeasure():
115 # type: () -> Measure
116 """The measure of an empty doc."""
117 return Measure(0, -1)
118
119
120def _FlattenMeasure(measure):
121 # type: (Measure) -> Measure
122 """The measure if its document is rendered flat."""
123 return Measure(measure.flat, -1)
124
125
126def _ConcatMeasure(m1, m2):
127 # type: (Measure, Measure) -> Measure
128 """Compute the measure of 2 concatenated docs.
129
130 If m1 and m2 are the measures of doc1 and doc2, then _ConcatMeasure(m1, m2)
131 is the measure of doc.Concat([doc1, doc2]). This concatenation is
132 associative but not commutative.
133 """
134 if m1.nonflat != -1:
135 return Measure(m1.flat + m2.flat, m1.nonflat)
136 elif m2.nonflat != -1:
137 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
138 else:
139 return Measure(m1.flat + m2.flat, -1)
140
141
142def _SuffixLen(measure):
143 # type: (Measure) -> int
144 """The width until the earliest possible newline, or end of document."""
145 if measure.nonflat != -1:
146 return measure.nonflat
147 else:
148 return measure.flat
149
150
151#
152# Doc Construction
153#
154
155
156def AsciiText(string):
157 # type: (str) -> MeasuredDoc
158 """Print string (which must not contain a newline)."""
159 return MeasuredDoc(doc.Text(string), Measure(len(string), -1))
160
161
162def _Break(string):
163 # type: (str) -> MeasuredDoc
164 """If in flat mode, print string, otherwise print `\n`.
165
166 Note: Doesn't try to compute Unicode width, since we control these strings.
167 """
168 return MeasuredDoc(doc.Break(string), Measure(len(string), 0))
169
170
171def _Indent(indent, mdoc):
172 # type: (int, MeasuredDoc) -> MeasuredDoc
173 """Add 'indent' spaces after every newline in mdoc."""
174 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
175
176
177def _Splice(out, mdocs):
178 # type: (List[MeasuredDoc], List[MeasuredDoc]) -> Measure
179 """Optimization for _Concat.
180
181 This reduces the total size of the doc_t tree, and thus the memory usage
182 (as long as we mylib.MaybeCollect() in _HNode!)
183
184 Example of optimizing _Concat nodes together: _Field() concatenates
185 AsciiText("name:") and _HNode(), and the latter is often a doc.Concat node.
186 """
187 measure = _EmptyMeasure()
188 for mdoc in mdocs:
189 with tagswitch(mdoc.doc) as case:
190 if case(doc_e.Concat):
191 child = cast(List_Measured, mdoc.doc)
192 # ignore return value, because the parent has the measure already
193 _Splice(out, child)
194 else:
195 out.append(mdoc)
196 measure = _ConcatMeasure(measure, mdoc.measure)
197 return measure
198
199
200def _Concat(mdocs):
201 # type: (List[MeasuredDoc]) -> MeasuredDoc
202 """Print the mdocs in order (with no space in between)."""
203 flattened = List_Measured.New()
204 measure = _Splice(flattened, mdocs)
205 return MeasuredDoc(flattened, measure)
206
207
208def _Group(mdoc):
209 # type: (MeasuredDoc) -> MeasuredDoc
210 """Print mdoc. Use flat mode if mdoc will fit on the current line."""
211 return MeasuredDoc(mdoc, mdoc.measure)
212
213
214def _IfFlat(flat_mdoc, nonflat_mdoc):
215 # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
216 """If in flat mode, print flat_mdoc; otherwise print nonflat_mdoc."""
217 return MeasuredDoc(
218 doc.IfFlat(flat_mdoc, nonflat_mdoc),
219 Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
220
221
222def _Flat(mdoc):
223 # type: (MeasuredDoc) -> MeasuredDoc
224 """Prints mdoc in flat mode."""
225 return MeasuredDoc(doc.Flat(mdoc), _FlattenMeasure(mdoc.measure))
226
227
228class PrettyPrinter(object):
229
230 def __init__(self, max_width):
231 # type: (int) -> None
232 self.max_width = max_width
233
234 def _Fits(self, prefix_len, group, suffix_measure):
235 # type: (int, MeasuredDoc, Measure) -> bool
236 """Will group fit flat on the current line?"""
237 measure = _ConcatMeasure(_FlattenMeasure(group.measure),
238 suffix_measure)
239 return prefix_len + _SuffixLen(measure) <= self.max_width
240
241 def PrintDoc(self, document, buf):
242 # type: (MeasuredDoc, BufWriter) -> None
243 """Pretty print a doc_t to a BufWriter."""
244
245 # The width of the text we've printed so far on the current line
246 prefix_len = 0
247 # A _stack_ of document fragments to print. Each fragment contains:
248 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
249 # - The indentation level to print this doc node at.
250 # - Is this doc node being printed in flat mode?
251 # - The measure _from just after the doc node, to the end of the entire document_.
252 # (Call this the suffix_measure)
253 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
254
255 max_stack = len(fragments)
256
257 while len(fragments) > 0:
258 max_stack = max(max_stack, len(fragments))
259
260 frag = fragments.pop()
261 UP_doc = frag.mdoc.doc
262 with tagswitch(UP_doc) as case:
263
264 if case(doc_e.Text):
265 text = cast(doc.Text, UP_doc)
266 buf.write(text.string)
267 prefix_len += frag.mdoc.measure.flat
268
269 elif case(doc_e.Break):
270 break_ = cast(doc.Break, UP_doc)
271 if frag.is_flat:
272 buf.write(break_.string)
273 prefix_len += frag.mdoc.measure.flat
274 else:
275 buf.write('\n')
276 buf.write_spaces(frag.indent)
277 prefix_len = frag.indent
278
279 elif case(doc_e.Indent):
280 indented = cast(doc.Indent, UP_doc)
281 fragments.append(
282 DocFragment(indented.mdoc,
283 frag.indent + indented.indent,
284 frag.is_flat, frag.measure))
285
286 elif case(doc_e.Concat):
287 concat = cast(List_Measured, UP_doc)
288
289 # If we encounter Concat([A, B, C]) with a suffix measure M,
290 # we need to push A,B,C onto the stack in reverse order:
291 # - C, with suffix_measure = B.measure + A.measure + M
292 # - B, with suffix_measure = A.measure + M
293 # - A, with suffix_measure = M
294 measure = frag.measure
295 for mdoc in reversed(concat):
296 fragments.append(
297 DocFragment(mdoc, frag.indent, frag.is_flat,
298 measure))
299 # TODO: this algorithm allocates too much!
300 measure = _ConcatMeasure(mdoc.measure, measure)
301
302 elif case(doc_e.Group):
303 # If the group would fit on the current line when printed
304 # flat, do so. Otherwise, print it non-flat.
305 group = cast(MeasuredDoc, UP_doc)
306 is_flat = self._Fits(prefix_len, group, frag.measure)
307 fragments.append(
308 DocFragment(group, frag.indent, is_flat, frag.measure))
309
310 elif case(doc_e.IfFlat):
311 if_flat = cast(doc.IfFlat, UP_doc)
312 if frag.is_flat:
313 subdoc = if_flat.flat_mdoc
314 else:
315 subdoc = if_flat.nonflat_mdoc
316 fragments.append(
317 DocFragment(subdoc, frag.indent, frag.is_flat,
318 frag.measure))
319
320 elif case(doc_e.Flat):
321 flat_doc = cast(doc.Flat, UP_doc)
322 fragments.append(
323 DocFragment(flat_doc.mdoc, frag.indent, True,
324 frag.measure))
325
326 if 0:
327 log('')
328 log('___ MAX DocFragment stack: %d', max_stack)
329 log('')
330
331
332# vim: sw=4