OILS / doctools / oils_doc.py View on Github | oils.pub

671 lines, 374 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14from _devbuild.gen.htm8_asdl import h8_id
15
16import cgi
17from typing import Iterator
18from typing import Any
19from typing import List
20from typing import Optional
21try:
22 from cStringIO import StringIO
23except ImportError:
24 # for python3
25 from io import StringIO # type: ignore
26import re
27import sys
28
29from doctools.util import log
30from lazylex import html
31
32try:
33 import pygments
34except ImportError:
35 pygments = None
36
37
38class _Abbrev(object):
39
40 def __init__(self, fmt):
41 # type: (str) -> None
42 self.fmt = fmt
43
44 def __call__(self, value):
45 # type: (str) -> str
46 return self.fmt % {'value': value}
47
48
49_ABBREVIATIONS = {
50 'xref':
51 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
52
53 # alias for osh-help, for backward compatibility
54 # to link to the same version
55
56 # TODO: Remove all of these broken links!
57 'help':
58 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
59 'osh-help':
60 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
61 'oil-help':
62 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
63
64 # New style: one for every chapter?
65 # Problem: can't use relative links here, because some are from doc/ref, and
66 # some are from doc
67 'chap-type-method':
68 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
69 'chap-plugin':
70 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
71 'chap-builtin-cmd':
72 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
73
74 # for blog
75 'osh-help-latest':
76 _Abbrev(
77 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
78 ),
79 'oil-help-latest':
80 _Abbrev(
81 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
82 ),
83
84 # For the blog
85 'oils-doc':
86 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
87 'blog-tag':
88 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
89 'oils-commit':
90 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
91 'oils-src':
92 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
93 'blog-code-src':
94 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
95 'issue':
96 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
97 'wiki':
98 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
99}
100
101# Backward compatibility
102_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
103_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
104_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
105
106# $xref:foo
107_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
108
109
110def ExpandLinks(s):
111 # type: (str) -> str
112 """Expand $xref:bash and so forth."""
113 f = StringIO()
114 out = html.Output(s, f)
115
116 tag_lexer = html.TagLexer(s)
117
118 pos = 0
119
120 it = html.ValidTokens(s)
121 while True:
122 try:
123 tok_id, end_pos = next(it)
124 except StopIteration:
125 break
126
127 if tok_id == h8_id.StartTag:
128
129 tag_lexer.Reset(pos, end_pos)
130 if tag_lexer.TagName() == 'a':
131 open_tag_right = end_pos
132
133 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
134 if href_start == -1:
135 continue
136
137 href_raw = s[href_start:href_end]
138
139 new = None
140 m = _SHORTCUT_RE.match(href_raw)
141 if m:
142 abbrev_name, arg = m.groups()
143 if not arg:
144 close_tag_left, _ = html.ReadUntilEndTag(
145 it, tag_lexer, 'a')
146 arg = s[open_tag_right:close_tag_left]
147
148 # Hack to so we can write [Wiki Page]($wiki) and have the
149 # link look like /Wiki-Page/
150 if abbrev_name == 'wiki':
151 arg = arg.replace(' ', '-')
152
153 func = _ABBREVIATIONS.get(abbrev_name)
154 if not func:
155 raise RuntimeError('Invalid abbreviation %r' %
156 abbrev_name)
157 new = func(arg)
158
159 if new is not None:
160 out.PrintUntil(href_start)
161 f.write(cgi.escape(new))
162 out.SkipTo(href_end)
163
164 pos = end_pos
165
166 out.PrintTheRest()
167
168 return f.getvalue()
169
170
171class _Plugin(object):
172 """
173 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
174 """
175
176 def __init__(self, s, start_pos, end_pos):
177 # type: (str, int, int) -> None
178 self.s = s
179 self.start_pos = start_pos
180 self.end_pos = end_pos
181
182 def PrintHighlighted(self, out):
183 raise NotImplementedError()
184
185
186# Optional newline at end
187_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
188
189_PROMPT_LINE_RE = re.compile(
190 r'''
191(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
192(.*?) # arbitrary text
193(?: # don't highlight tab completion
194 (&lt;TAB&gt;) # it's HTML escaped!!!
195 .*?
196)?
197(?:
198 [ ][ ]([#] .*) # optionally: two spaces then a comment
199)?
200$
201''', re.VERBOSE)
202
203_EOL_COMMENT_RE = re.compile(
204 r'''
205.*? # arbitrary text
206[ ][ ]([#] .*) # two spaces then a comment
207$
208''', re.VERBOSE)
209
210_COMMENT_LINE_RE = re.compile(r'#.*')
211
212
213def Lines(s, start_pos, end_pos):
214 # type: (str, int, int) -> Iterator[int]
215 """Yields positions in s that end a line."""
216 pos = start_pos
217 while pos < end_pos:
218 m = _LINE_RE.match(s, pos, end_pos)
219 if not m:
220 raise RuntimeError("Should have matched a line")
221 line_end = m.end(0)
222
223 yield line_end
224
225 pos = line_end
226
227
228class ShPromptPlugin(_Plugin):
229 """Highlight shell prompts."""
230
231 def PrintHighlighted(self, out):
232 # type: (html.Output) -> None
233 pos = self.start_pos
234 for line_end in Lines(self.s, self.start_pos, self.end_pos):
235
236 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
237 if m:
238 out.PrintUntil(m.start(0))
239 out.Print('<span class="sh-comment">')
240 out.PrintUntil(m.end(0))
241 out.Print('</span>')
242 else:
243 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
244 if m:
245 #log('MATCH %r', m.groups())
246
247 out.PrintUntil(m.start(1))
248 out.Print('<span class="sh-prompt">')
249 out.PrintUntil(m.end(1))
250 out.Print('</span>')
251
252 out.PrintUntil(m.start(2))
253 out.Print('<span class="sh-command">')
254 out.PrintUntil(m.end(2))
255 out.Print('</span>')
256
257 if m.group(3):
258 out.PrintUntil(m.start(3))
259 out.Print('<span class="sh-tab-complete">')
260 out.PrintUntil(m.end(3))
261 out.Print('</span>')
262
263 if m.group(4):
264 out.PrintUntil(m.start(4))
265 out.Print('<span class="sh-comment">')
266 out.PrintUntil(m.end(4))
267 out.Print('</span>')
268 else:
269 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
270 if m:
271 out.PrintUntil(m.start(1))
272 out.Print('<span class="sh-comment">')
273 out.PrintUntil(m.end(1))
274 out.Print('</span>')
275
276 out.PrintUntil(line_end)
277
278 pos = line_end
279
280
281class HelpTopicsPlugin(_Plugin):
282 """Highlight blocks of doc/ref/toc-*.md."""
283
284 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
285 _Plugin.__init__(self, s, start_pos, end_pos)
286 self.chapter = chapter
287 self.linkify_stop_col = linkify_stop_col
288
289 def PrintHighlighted(self, out):
290 from doctools import help_gen
291
292 debug_out = []
293 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
294 self.linkify_stop_col)
295
296 pos = self.start_pos
297 for line_end in Lines(self.s, self.start_pos, self.end_pos):
298 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
299 # add tags and leave everything alone.
300 line = self.s[pos:line_end]
301
302 html_line = r.Render(line)
303
304 if html_line is not None:
305 out.PrintUntil(pos)
306 out.Print(html_line)
307 out.SkipTo(line_end)
308
309 pos = line_end
310
311 return debug_out
312
313
314class PygmentsPlugin(_Plugin):
315
316 def __init__(self, s, start_pos, end_pos, lang):
317 _Plugin.__init__(self, s, start_pos, end_pos)
318 self.lang = lang
319
320 def PrintHighlighted(self, out):
321 # unescape before passing to pygments, which will escape
322 code = html.ToText(self.s, self.start_pos, self.end_pos)
323
324 lexer = pygments.lexers.get_lexer_by_name(self.lang)
325 formatter = pygments.formatters.HtmlFormatter()
326
327 highlighted = pygments.highlight(code, lexer, formatter)
328 out.Print(highlighted)
329
330
331def SimpleHighlightCode(s):
332 """Simple highlighting for test/shell-vs-shell.sh."""
333
334 f = StringIO()
335 out = html.Output(s, f)
336
337 tag_lexer = html.TagLexer(s)
338
339 pos = 0
340
341 it = html.ValidTokens(s)
342
343 while True:
344 try:
345 tok_id, end_pos = next(it)
346 except StopIteration:
347 break
348
349 if tok_id == h8_id.StartTag:
350
351 tag_lexer.Reset(pos, end_pos)
352 if tag_lexer.TagName() == 'pre':
353 pre_start_pos = pos
354 pre_end_pos = end_pos
355
356 slash_pre_right, slash_pre_right = \
357 html.ReadUntilEndTag(it, tag_lexer, 'pre')
358
359 out.PrintUntil(pre_end_pos)
360
361 # Using ShPromptPlugin because it does the comment highlighting we want!
362 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
363 plugin.PrintHighlighted(out)
364
365 out.SkipTo(slash_pre_right)
366
367 pos = end_pos
368
369 out.PrintTheRest()
370
371 return f.getvalue()
372
373
374CSS_CLASS_RE = re.compile(
375 r'''
376 language-chapter-links-
377 ([a-z0-9-]+) # chapter name
378 (?:_(\d+))? # optional linkify_stop_col
379 ''', re.VERBOSE)
380
381
382def HighlightCode(s, default_highlighter, debug_out=None):
383 # type: (str, Optional[Any], Optional[List]) -> str
384 """
385 Algorithm:
386 1. Collect what's inside <pre><code> ...
387 2. Then read lines with ShPromptPlugin.
388 3. If the line looks like a shell prompt and command, highlight them with
389 <span>
390 """
391 if debug_out is None:
392 debug_out = []
393
394 f = StringIO()
395 out = html.Output(s, f)
396
397 tag_lexer = html.TagLexer(s)
398
399 pos = 0
400
401 it = html.ValidTokens(s)
402
403 while True:
404 try:
405 tok_id, end_pos = next(it)
406 except StopIteration:
407 break
408
409 if tok_id == h8_id.StartTag:
410
411 tag_lexer.Reset(pos, end_pos)
412 if tag_lexer.TagName() == 'pre':
413 pre_start_pos = pos
414 pos = end_pos
415
416 try:
417 tok_id, end_pos = next(it)
418 except StopIteration:
419 break
420
421 tag_lexer.Reset(pos, end_pos)
422 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
423
424 css_class = tag_lexer.GetAttrRaw('class')
425 code_start_pos = end_pos
426
427 if css_class is None:
428 slash_code_left, slash_code_right = \
429 html.ReadUntilEndTag(it, tag_lexer, 'code')
430
431 if default_highlighter is not None:
432 # TODO: Refactor this to remove duplication with
433 # language-{sh-prompt,oil-sh} below
434
435 # oil-sh for compatibility
436 if default_highlighter in ('sh-prompt', 'oils-sh',
437 'oil-sh'):
438 out.PrintUntil(code_start_pos)
439
440 # Using ShPromptPlugin because it does the comment highlighting
441 # we want!
442 plugin = ShPromptPlugin(
443 s, code_start_pos, slash_code_left)
444 plugin.PrintHighlighted(out)
445
446 out.SkipTo(slash_code_left)
447 else:
448 raise RuntimeError(
449 'Unknown default highlighter %r' %
450 default_highlighter)
451
452 elif css_class.startswith('language'):
453 slash_code_left, slash_code_right = \
454 html.ReadUntilEndTag(it, tag_lexer, 'code')
455
456 if css_class == 'language-none':
457 # Allow ```none
458 pass
459
460 elif css_class in ('language-sh-prompt',
461 'language-oil-sh'):
462 # Here's we're KEEPING the original <pre><code>
463 # Print everything up to and including <pre><code language="...">
464 out.PrintUntil(code_start_pos)
465
466 plugin = ShPromptPlugin(s, code_start_pos,
467 slash_code_left)
468 plugin.PrintHighlighted(out)
469
470 out.SkipTo(slash_code_left)
471
472 elif css_class == 'language-ysh':
473 # TODO: Write an Oil syntax highlighter.
474 pass
475
476 elif css_class.startswith('language-chapter-links-'):
477 m = CSS_CLASS_RE.match(css_class)
478 assert m is not None, css_class
479
480 #log('%s GROUPS %s', css_class, m.groups())
481 chapter, num_str = m.groups()
482 if num_str is not None:
483 linkify_stop_col = int(num_str)
484 else:
485 linkify_stop_col = -1
486
487 out.PrintUntil(code_start_pos)
488
489 plugin = HelpTopicsPlugin(s, code_start_pos,
490 slash_code_left, chapter,
491 linkify_stop_col)
492
493 block_debug_info = plugin.PrintHighlighted(out)
494
495 # e.g. these are links to cmd-lang within a block in toc-ysh
496 chap_block = {
497 'to_chap': chapter,
498 'lines': block_debug_info
499 }
500 debug_out.append(chap_block)
501
502 out.SkipTo(slash_code_left)
503
504 else: # language-*: Use Pygments
505 if pygments is None:
506 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
507 )
508 continue
509
510 # We REMOVE the original <pre><code> because
511 # Pygments gives you a <pre> already
512
513 # We just read closing </code>, and the next one
514 # should be </pre>.
515 try:
516 tok_id, end_pos = next(it)
517 except StopIteration:
518 break
519 tag_lexer.Reset(slash_code_right, end_pos)
520 assert tok_id == h8_id.EndTag, tok_id
521 assert (tag_lexer.TagName() == 'pre'
522 ), tag_lexer.TagName()
523 slash_pre_right = end_pos
524
525 out.PrintUntil(pre_start_pos)
526
527 lang = css_class[len('language-'):]
528 plugin = PygmentsPlugin(s, code_start_pos,
529 slash_code_left, lang)
530 plugin.PrintHighlighted(out)
531
532 out.SkipTo(slash_pre_right)
533 f.write('<!-- done pygments -->\n')
534
535 pos = end_pos
536
537 out.PrintTheRest()
538
539 return f.getvalue()
540
541
542def ExtractCode(s, f):
543 """Print code blocks to a plain text file.
544
545 So we can at least validate the syntax.
546
547 Similar to the algorithm code above:
548
549 1. Collect what's inside <pre><code> ...
550 2. Decode &amp; -> &,e tc. and return it
551 """
552 out = html.Output(s, f)
553 tag_lexer = html.TagLexer(s)
554
555 block_num = 0
556 pos = 0
557 it = html.ValidTokens(s)
558
559 while True:
560 try:
561 tok_id, end_pos = next(it)
562 except StopIteration:
563 break
564
565 if tok_id == h8_id.StartTag:
566 tag_lexer.Reset(pos, end_pos)
567 if tag_lexer.TagName() == 'pre':
568 pre_start_pos = pos
569 pos = end_pos
570
571 try:
572 tok_id, end_pos = next(it)
573 except StopIteration:
574 break
575
576 tag_lexer.Reset(pos, end_pos)
577 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
578
579 css_class = tag_lexer.GetAttrRaw('class')
580 # Skip code blocks that look like ```foo
581 # Usually we use 'oil-sh' as the default_highlighter, and
582 # all those code blocks should be extracted. TODO: maybe
583 # this should be oil-language?
584 if css_class is None:
585 code_start_pos = end_pos
586
587 out.SkipTo(code_start_pos)
588 out.Print('# block %d' % block_num)
589 out.Print('\n')
590
591 slash_code_left, slash_code_right = \
592 html.ReadUntilEndTag(it, tag_lexer, 'code')
593
594 text = html.ToText(s, code_start_pos, slash_code_left)
595 out.SkipTo(slash_code_left)
596
597 out.Print(text)
598 out.Print('\n')
599
600 block_num += 1
601
602 pos = end_pos
603
604 #out.PrintTheRest()
605
606
607class ShellSession(object):
608 """
609 TODO: Pass this to HighlightCode as a plugin
610
611 $ x=one
612 $ echo $x
613 $ echo two
614
615 Becomes
616
617 $ x=one
618 $ echo $x
619 one
620 $ echo two
621 two
622
623 And then you will have
624 blog/2019/12/_shell_session/
625 $hash1-stdout.txt
626 $hash2-stdout.txt
627
628 It hashes the command with md5 and then brings it back.
629 If the file already exists then it doesn't run it again.
630 You can delete the file to redo it.
631
632 TODO: write a loop that reads one line at a time, writes, it, then reads
633 output from bash.
634 Use the Lines iterator to get lines.
635 For extra credit, you can solve the PS2 problem? That's easily done with
636 Oil's parser.
637 """
638
639 def __init__(self, shell_exe, cache_dir):
640 """
641 Args:
642 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
643 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
644 """
645 self.shell_exe = shell_exe
646 self.cache_dir = cache_dir
647
648 def PrintHighlighted(self, s, start_pos, end_pos, out):
649 """
650 Args:
651 s: an HTML string.
652 """
653 pass
654
655
656def main(argv):
657 action = argv[1]
658
659 if action == 'highlight':
660 # for test/shell-vs-shell.sh
661
662 html = sys.stdin.read()
663 out = SimpleHighlightCode(html)
664 print(out)
665
666 else:
667 raise RuntimeError('Invalid action %r' % action)
668
669
670if __name__ == '__main__':
671 main(sys.argv)