OILS / display / pretty.py View on Github | oilshell.org

303 lines, 100 significant
1#!/usr/bin/env python2
2"""
3Pretty printing library.
4
5Pretty printing means intelligently choosing whitespace including indentation
6and newline placement, to attempt to display data nicely while staying within a
7maximum line width.
8"""
9
10# ~~~ Architecture ~~~
11#
12# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13#
14# Pretty printing proceeds in two phases:
15#
16# 1. Convert the thing you want to print into a `doc`.
17# 2. Print the `doc` using a standard algorithm.
18#
19# This separation keeps the details of the data you want to print separate from
20# the printing algorithm.
21
22# ~~~ Pretty Printing Overview ~~~
23#
24# If you're just using this file, you don't need to know how pretty printing
25# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26# or extend how values are printed, you'll need to know, so here's an overview.
27#
28# You may want to first read Walder's "A Prettier Printer", which this is based
29# off of:
30# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31#
32# Some additional reading, though only tangentially related:
33#
34# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39#
40# ~ Constructors ~
41#
42# There are just a few constructors for `doc`, from which everything else is
43# built from.
44#
45# Text(string) prints like:
46# |string
47#
48# Break(string) prints like:
49# |string
50# or like a newline:
51# |
52# |
53# (It does the latter if printed in "flat" mode, and the former otherwise. See
54# Group for details.)
55#
56# Concat(a, b) prints like:
57# |AAAAA
58# |AAABBB
59# |BBBBB
60#
61# Indent(3, a) prints like:
62# |AAAAA
63# | AAAAA
64# | AAAAA
65#
66# Group(a) makes a decision. It either:
67# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68# string instead of as a newline, and (ii) every Group nested inside of it is
69# printed flat.
70# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71# newlines, and (ii) the Groups inside of it make their own decision about
72# whether to be flat.
73# It makes this decision greedily. If the current line would not overflow if the
74# group printed flat, then it will print flat. This takes into account not only
75# the group itself, but the content before and after it on the same line.
76#
77# IfFlat(a, b) prints a if in flat mode or b otherwise.
78#
79# Flat(a) prints a in flat mode. You should generally not need to use it.
80#
81# ~ Measures ~
82#
83# The algorithm used here is close to the one originally described by Wadler,
84# but it precomputes a "measure" for each node in the `doc`. This "measure"
85# allows each Groups to decide whether to print flat or not without needing to
86# look ahead per Wadler's algorithm. A measure has two pieces of information:
87#
88# - Measure.flat is the width of the doc if it's printed flat.
89# - Measure.nonflat is the width of the doc until the _earliest possible_
90# newline, or -1 if it doesn't contain a Break.
91#
92# Measures are used in two steps:
93# (1) First, they're computed bottom-up on the `doc`, measuring the size of each
94# node.
95# (2) Later, PrintDoc() stores a measure in each DocFragment. These Measures
96# measure something different: the width from the doc _to the end of the
97# entire doc tree_. This second set of Measures (the ones in the
98# DocFragments) are computed top-down, and they're used to decide for each
99# Group whether to use flat mode or not, without needing to scan ahead.
100
101from __future__ import print_function
102
103from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
104from mycpp.mylib import log, tagswitch, BufWriter
105from typing import cast, List
106
107_ = log
108
109################
110# Measurements #
111################
112
113
114def _EmptyMeasure():
115 # type: () -> Measure
116 """The measure of an empty doc."""
117 return Measure(0, -1)
118
119
120def _FlattenMeasure(measure):
121 # type: (Measure) -> Measure
122 """The measure if its document is rendered flat."""
123 return Measure(measure.flat, -1)
124
125
126def _ConcatMeasure(m1, m2):
127 # type: (Measure, Measure) -> Measure
128 """Compute the measure of concatenated docs.
129
130 If m1 and m2 are the measures of doc1 and doc2,
131 then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
132 This concatenation is associative but not commutative."""
133 if m1.nonflat != -1:
134 return Measure(m1.flat + m2.flat, m1.nonflat)
135 elif m2.nonflat != -1:
136 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
137 else:
138 return Measure(m1.flat + m2.flat, -1)
139
140
141def _SuffixLen(measure):
142 # type: (Measure) -> int
143 """The width until the earliest possible newline, or end of document."""
144 if measure.nonflat != -1:
145 return measure.nonflat
146 else:
147 return measure.flat
148
149
150####################
151# Doc Construction #
152####################
153
154
155def AsciiText(string):
156 # type: (str) -> MeasuredDoc
157 """Print `string` (which must not contain a newline)."""
158 return MeasuredDoc(doc.Text(string), Measure(len(string), -1))
159
160
161def _Break(string):
162 # type: (str) -> MeasuredDoc
163 """If in `flat` mode, print `string`, otherwise print `\n`.
164
165 Note: Doesn't try to compute Unicode width, since we control these strings.
166 """
167 return MeasuredDoc(doc.Break(string), Measure(len(string), 0))
168
169
170def _Indent(indent, mdoc):
171 # type: (int, MeasuredDoc) -> MeasuredDoc
172 """Add `indent` spaces after every newline in `mdoc`."""
173 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
174
175
176def _Concat(mdocs):
177 # type: (List[MeasuredDoc]) -> MeasuredDoc
178 """Print the mdocs in order (with no space in between)."""
179 measure = _EmptyMeasure()
180 for mdoc in mdocs:
181 measure = _ConcatMeasure(measure, mdoc.measure)
182 return MeasuredDoc(doc.Concat(mdocs), measure)
183
184
185def _Group(mdoc):
186 # type: (MeasuredDoc) -> MeasuredDoc
187 """Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
188 return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
189
190
191def _IfFlat(flat_mdoc, nonflat_mdoc):
192 # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
193 """If in flat mode, print `flat_mdoc` otherwise print `nonflat_mdoc`."""
194 return MeasuredDoc(
195 doc.IfFlat(flat_mdoc, nonflat_mdoc),
196 Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
197
198
199def _Flat(mdoc):
200 # type: (MeasuredDoc) -> MeasuredDoc
201 """Prints `mdoc` in flat mode."""
202 return MeasuredDoc(doc.Flat(mdoc), _FlattenMeasure(mdoc.measure))
203
204
205###################
206# Pretty Printing #
207###################
208
209
210class PrettyPrinter(object):
211
212 def __init__(self, max_width):
213 # type: (int) -> None
214 self.max_width = max_width
215
216 def _Fits(self, prefix_len, group, suffix_measure):
217 # type: (int, doc.Group, Measure) -> bool
218 """Will `group` fit flat on the current line?"""
219 measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
220 suffix_measure)
221 return prefix_len + _SuffixLen(measure) <= self.max_width
222
223 def PrintDoc(self, document, buf):
224 # type: (MeasuredDoc, BufWriter) -> None
225 """Pretty print a `pretty.doc` to a BufWriter."""
226
227 # The width of the text we've printed so far on the current line
228 prefix_len = 0
229 # A _stack_ of document fragments to print. Each fragment contains:
230 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
231 # - The indentation level to print this doc node at.
232 # - Is this doc node being printed in flat mode?
233 # - The measure _from just after the doc node, to the end of the entire document_.
234 # (Call this the suffix_measure)
235 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
236
237 while len(fragments) > 0:
238 frag = fragments.pop()
239 with tagswitch(frag.mdoc.doc) as case:
240
241 if case(doc_e.Text):
242 text = cast(doc.Text, frag.mdoc.doc)
243 buf.write(text.string)
244 prefix_len += frag.mdoc.measure.flat
245
246 elif case(doc_e.Break):
247 if frag.is_flat:
248 break_str = cast(doc.Break, frag.mdoc.doc).string
249 buf.write(break_str)
250 prefix_len += frag.mdoc.measure.flat
251 else:
252 buf.write('\n')
253 buf.write_spaces(frag.indent)
254 prefix_len = frag.indent
255
256 elif case(doc_e.Indent):
257 indented = cast(doc.Indent, frag.mdoc.doc)
258 fragments.append(
259 DocFragment(indented.mdoc,
260 frag.indent + indented.indent,
261 frag.is_flat, frag.measure))
262
263 elif case(doc_e.Concat):
264 # If we encounter Concat([A, B, C]) with a suffix measure M,
265 # we need to push A,B,C onto the stack in reverse order:
266 # - C, with suffix_measure = B.measure + A.measure + M
267 # - B, with suffix_measure = A.measure + M
268 # - A, with suffix_measure = M
269 concat = cast(doc.Concat, frag.mdoc.doc)
270 measure = frag.measure
271 for mdoc in reversed(concat.mdocs):
272 fragments.append(
273 DocFragment(mdoc, frag.indent, frag.is_flat,
274 measure))
275 measure = _ConcatMeasure(mdoc.measure, measure)
276
277 elif case(doc_e.Group):
278 # If the group would fit on the current line when printed
279 # flat, do so. Otherwise, print it non-flat.
280 group = cast(doc.Group, frag.mdoc.doc)
281 flat = self._Fits(prefix_len, group, frag.measure)
282 fragments.append(
283 DocFragment(group.mdoc, frag.indent, flat,
284 frag.measure))
285
286 elif case(doc_e.IfFlat):
287 if_flat = cast(doc.IfFlat, frag.mdoc.doc)
288 if frag.is_flat:
289 subdoc = if_flat.flat_mdoc
290 else:
291 subdoc = if_flat.nonflat_mdoc
292 fragments.append(
293 DocFragment(subdoc, frag.indent, frag.is_flat,
294 frag.measure))
295
296 elif case(doc_e.Flat):
297 flat_doc = cast(doc.Flat, frag.mdoc.doc)
298 fragments.append(
299 DocFragment(flat_doc.mdoc, frag.indent, True,
300 frag.measure))
301
302
303# vim: sw=4