display/pretty.py

OILS / display / pretty.py View on Github | oilshell.org

303 lines, 100 significant

1	#!/usr/bin/env python2
2	"""
3	Pretty printing library.
4
5	Pretty printing means intelligently choosing whitespace including indentation
6	and newline placement, to attempt to display data nicely while staying within a
7	maximum line width.
8	"""
9
10	# ~~~ Architecture ~~~
11	#
12	# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13	#
14	# Pretty printing proceeds in two phases:
15	#
16	# 1. Convert the thing you want to print into a `doc`.
17	# 2. Print the `doc` using a standard algorithm.
18	#
19	# This separation keeps the details of the data you want to print separate from
20	# the printing algorithm.
21
22	# ~~~ Pretty Printing Overview ~~~
23	#
24	# If you're just using this file, you don't need to know how pretty printing
25	# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26	# or extend how values are printed, you'll need to know, so here's an overview.
27	#
28	# You may want to first read Walder's "A Prettier Printer", which this is based
29	# off of:
30	# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31	#
32	# Some additional reading, though only tangentially related:
33	#
34	# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35	# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36	# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37	# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38	# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39	#
40	# ~ Constructors ~
41	#
42	# There are just a few constructors for `doc`, from which everything else is
43	# built from.
44	#
45	# Text(string) prints like:
46	# \|string
47	#
48	# Break(string) prints like:
49	# \|string
50	# or like a newline:
51	# \|
52	# \|
53	# (It does the latter if printed in "flat" mode, and the former otherwise. See
54	# Group for details.)
55	#
56	# Concat(a, b) prints like:
57	# \|AAAAA
58	# \|AAABBB
59	# \|BBBBB
60	#
61	# Indent(3, a) prints like:
62	# \|AAAAA
63	# \| AAAAA
64	# \| AAAAA
65	#
66	# Group(a) makes a decision. It either:
67	# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68	# string instead of as a newline, and (ii) every Group nested inside of it is
69	# printed flat.
70	# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71	# newlines, and (ii) the Groups inside of it make their own decision about
72	# whether to be flat.
73	# It makes this decision greedily. If the current line would not overflow if the
74	# group printed flat, then it will print flat. This takes into account not only
75	# the group itself, but the content before and after it on the same line.
76	#
77	# IfFlat(a, b) prints a if in flat mode or b otherwise.
78	#
79	# Flat(a) prints a in flat mode. You should generally not need to use it.
80	#
81	# ~ Measures ~
82	#
83	# The algorithm used here is close to the one originally described by Wadler,
84	# but it precomputes a "measure" for each node in the `doc`. This "measure"
85	# allows each Groups to decide whether to print flat or not without needing to
86	# look ahead per Wadler's algorithm. A measure has two pieces of information:
87	#
88	# - Measure.flat is the width of the doc if it's printed flat.
89	# - Measure.nonflat is the width of the doc until the _earliest possible_
90	# newline, or -1 if it doesn't contain a Break.
91	#
92	# Measures are used in two steps:
93	# (1) First, they're computed bottom-up on the `doc`, measuring the size of each
94	# node.
95	# (2) Later, PrintDoc() stores a measure in each DocFragment. These Measures
96	# measure something different: the width from the doc _to the end of the
97	# entire doc tree_. This second set of Measures (the ones in the
98	# DocFragments) are computed top-down, and they're used to decide for each
99	# Group whether to use flat mode or not, without needing to scan ahead.
100
101	from __future__ import print_function
102
103	from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
104	from mycpp.mylib import log, tagswitch, BufWriter
105	from typing import cast, List
106
107	_ = log
108
109	################
110	# Measurements #
111	################
112
113
114	def _EmptyMeasure():
115	# type: () -> Measure
116	"""The measure of an empty doc."""
117	return Measure(0, -1)
118
119
120	def _FlattenMeasure(measure):
121	# type: (Measure) -> Measure
122	"""The measure if its document is rendered flat."""
123	return Measure(measure.flat, -1)
124
125
126	def _ConcatMeasure(m1, m2):
127	# type: (Measure, Measure) -> Measure
128	"""Compute the measure of concatenated docs.
129
130	If m1 and m2 are the measures of doc1 and doc2,
131	then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
132	This concatenation is associative but not commutative."""
133	if m1.nonflat != -1:
134	return Measure(m1.flat + m2.flat, m1.nonflat)
135	elif m2.nonflat != -1:
136	return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
137	else:
138	return Measure(m1.flat + m2.flat, -1)
139
140
141	def _SuffixLen(measure):
142	# type: (Measure) -> int
143	"""The width until the earliest possible newline, or end of document."""
144	if measure.nonflat != -1:
145	return measure.nonflat
146	else:
147	return measure.flat
148
149
150	####################
151	# Doc Construction #
152	####################
153
154
155	def AsciiText(string):
156	# type: (str) -> MeasuredDoc
157	"""Print `string` (which must not contain a newline)."""
158	return MeasuredDoc(doc.Text(string), Measure(len(string), -1))
159
160
161	def _Break(string):
162	# type: (str) -> MeasuredDoc
163	"""If in `flat` mode, print `string`, otherwise print `\n`.
164
165	Note: Doesn't try to compute Unicode width, since we control these strings.
166	"""
167	return MeasuredDoc(doc.Break(string), Measure(len(string), 0))
168
169
170	def _Indent(indent, mdoc):
171	# type: (int, MeasuredDoc) -> MeasuredDoc
172	"""Add `indent` spaces after every newline in `mdoc`."""
173	return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
174
175
176	def _Concat(mdocs):
177	# type: (List[MeasuredDoc]) -> MeasuredDoc
178	"""Print the mdocs in order (with no space in between)."""
179	measure = _EmptyMeasure()
180	for mdoc in mdocs:
181	measure = _ConcatMeasure(measure, mdoc.measure)
182	return MeasuredDoc(doc.Concat(mdocs), measure)
183
184
185	def _Group(mdoc):
186	# type: (MeasuredDoc) -> MeasuredDoc
187	"""Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
188	return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
189
190
191	def _IfFlat(flat_mdoc, nonflat_mdoc):
192	# type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
193	"""If in flat mode, print `flat_mdoc` otherwise print `nonflat_mdoc`."""
194	return MeasuredDoc(
195	doc.IfFlat(flat_mdoc, nonflat_mdoc),
196	Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
197
198
199	def _Flat(mdoc):
200	# type: (MeasuredDoc) -> MeasuredDoc
201	"""Prints `mdoc` in flat mode."""
202	return MeasuredDoc(doc.Flat(mdoc), _FlattenMeasure(mdoc.measure))
203
204
205	###################
206	# Pretty Printing #
207	###################
208
209
210	class PrettyPrinter(object):
211
212	def __init__(self, max_width):
213	# type: (int) -> None
214	self.max_width = max_width
215
216	def _Fits(self, prefix_len, group, suffix_measure):
217	# type: (int, doc.Group, Measure) -> bool
218	"""Will `group` fit flat on the current line?"""
219	measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
220	suffix_measure)
221	return prefix_len + _SuffixLen(measure) <= self.max_width
222
223	def PrintDoc(self, document, buf):
224	# type: (MeasuredDoc, BufWriter) -> None
225	"""Pretty print a `pretty.doc` to a BufWriter."""
226
227	# The width of the text we've printed so far on the current line
228	prefix_len = 0
229	# A _stack_ of document fragments to print. Each fragment contains:
230	# - A MeasuredDoc (doc node and its measure, saying how "big" it is)
231	# - The indentation level to print this doc node at.
232	# - Is this doc node being printed in flat mode?
233	# - The measure _from just after the doc node, to the end of the entire document_.
234	# (Call this the suffix_measure)
235	fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
236
237	while len(fragments) > 0:
238	frag = fragments.pop()
239	with tagswitch(frag.mdoc.doc) as case:
240
241	if case(doc_e.Text):
242	text = cast(doc.Text, frag.mdoc.doc)
243	buf.write(text.string)
244	prefix_len += frag.mdoc.measure.flat
245
246	elif case(doc_e.Break):
247	if frag.is_flat:
248	break_str = cast(doc.Break, frag.mdoc.doc).string
249	buf.write(break_str)
250	prefix_len += frag.mdoc.measure.flat
251	else:
252	buf.write('\n')
253	buf.write_spaces(frag.indent)
254	prefix_len = frag.indent
255
256	elif case(doc_e.Indent):
257	indented = cast(doc.Indent, frag.mdoc.doc)
258	fragments.append(
259	DocFragment(indented.mdoc,
260	frag.indent + indented.indent,
261	frag.is_flat, frag.measure))
262
263	elif case(doc_e.Concat):
264	# If we encounter Concat([A, B, C]) with a suffix measure M,
265	# we need to push A,B,C onto the stack in reverse order:
266	# - C, with suffix_measure = B.measure + A.measure + M
267	# - B, with suffix_measure = A.measure + M
268	# - A, with suffix_measure = M
269	concat = cast(doc.Concat, frag.mdoc.doc)
270	measure = frag.measure
271	for mdoc in reversed(concat.mdocs):
272	fragments.append(
273	DocFragment(mdoc, frag.indent, frag.is_flat,
274	measure))
275	measure = _ConcatMeasure(mdoc.measure, measure)
276
277	elif case(doc_e.Group):
278	# If the group would fit on the current line when printed
279	# flat, do so. Otherwise, print it non-flat.
280	group = cast(doc.Group, frag.mdoc.doc)
281	flat = self._Fits(prefix_len, group, frag.measure)
282	fragments.append(
283	DocFragment(group.mdoc, frag.indent, flat,
284	frag.measure))
285
286	elif case(doc_e.IfFlat):
287	if_flat = cast(doc.IfFlat, frag.mdoc.doc)
288	if frag.is_flat:
289	subdoc = if_flat.flat_mdoc
290	else:
291	subdoc = if_flat.nonflat_mdoc
292	fragments.append(
293	DocFragment(subdoc, frag.indent, frag.is_flat,
294	frag.measure))
295
296	elif case(doc_e.Flat):
297	flat_doc = cast(doc.Flat, frag.mdoc.doc)
298	fragments.append(
299	DocFragment(flat_doc.mdoc, frag.indent, True,
300	frag.measure))
301
302
303	# vim: sw=4