OILS / doctools / oils_doc.py View on Github | oils.pub

678 lines, 371 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14from _devbuild.gen.htm8_asdl import h8_id
15
16import cgi
17try:
18 from cStringIO import StringIO
19except ImportError:
20 # for python3
21 from io import StringIO # type: ignore
22import re
23import sys
24
25from typing import Iterator, Any, List, Optional, IO
26
27from doctools.util import log
28from lazylex import html
29
30try:
31 import pygments
32except ImportError:
33 pygments = None
34
35
36class _Abbrev(object):
37
38 def __init__(self, fmt):
39 # type: (str) -> None
40 self.fmt = fmt
41
42 def __call__(self, value):
43 # type: (str) -> str
44 return self.fmt % {'value': value}
45
46
47_ABBREVIATIONS = {
48 'xref':
49 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
50
51 # alias for osh-help, for backward compatibility
52 # to link to the same version
53
54 # TODO: Remove all of these broken links!
55 'help':
56 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
57 'osh-help':
58 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
59 'oil-help':
60 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
61
62 # New style: one for every chapter?
63 # Problem: can't use relative links here, because some are from doc/ref, and
64 # some are from doc
65 'chap-type-method':
66 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
67 'chap-plugin':
68 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
69 'chap-builtin-cmd':
70 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
71
72 # for blog
73 'osh-help-latest':
74 _Abbrev(
75 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
76 ),
77 'oil-help-latest':
78 _Abbrev(
79 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
80 ),
81
82 # For the blog
83 'oils-doc':
84 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
85 'blog-tag':
86 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
87 'oils-commit':
88 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
89 'oils-src':
90 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
91 'blog-code-src':
92 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
93 'issue':
94 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
95 'wiki':
96 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
97}
98
99# Backward compatibility
100_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
101_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
102_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
103
104# $xref:foo
105_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
106
107
108def ExpandLinks(s):
109 # type: (str) -> str
110 """Expand $xref:bash and so forth."""
111 f = StringIO()
112 out = html.Output(s, f)
113
114 tag_lexer = html.TagLexer(s)
115
116 pos = 0
117
118 it = html.ValidTokens(s)
119 while True:
120 try:
121 tok_id, end_pos = next(it)
122 except StopIteration:
123 break
124
125 if tok_id == h8_id.StartTag:
126
127 tag_lexer.Reset(pos, end_pos)
128 if tag_lexer.TagName() == 'a':
129 open_tag_right = end_pos
130
131 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
132 if href_start == -1:
133 continue
134
135 href_raw = s[href_start:href_end]
136
137 new = None
138 m = _SHORTCUT_RE.match(href_raw)
139 if m:
140 abbrev_name, arg = m.groups()
141 if not arg:
142 close_tag_left, _ = html.ReadUntilEndTag(
143 it, tag_lexer, 'a')
144 arg = s[open_tag_right:close_tag_left]
145
146 # Hack to so we can write [Wiki Page]($wiki) and have the
147 # link look like /Wiki-Page/
148 if abbrev_name == 'wiki':
149 arg = arg.replace(' ', '-')
150
151 func = _ABBREVIATIONS.get(abbrev_name)
152 if not func:
153 raise RuntimeError('Invalid abbreviation %r' %
154 abbrev_name)
155 new = func(arg)
156
157 if new is not None:
158 out.PrintUntil(href_start)
159 f.write(cgi.escape(new))
160 out.SkipTo(href_end)
161
162 pos = end_pos
163
164 out.PrintTheRest()
165
166 return f.getvalue()
167
168
169class _Plugin(object):
170 """
171 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
172 """
173
174 def __init__(self, s, start_pos, end_pos):
175 # type: (str, int, int) -> None
176 self.s = s
177 self.start_pos = start_pos
178 self.end_pos = end_pos
179
180 def PrintHighlighted(self, out):
181 # type: (html.Output) -> None
182 raise NotImplementedError()
183
184
185# Optional newline at end
186_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
187
188_PROMPT_LINE_RE = re.compile(
189 r'''
190(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
191(.*?) # arbitrary text
192(?: # don't highlight tab completion
193 (&lt;TAB&gt;) # it's HTML escaped!!!
194 .*?
195)?
196(?:
197 [ ][ ]([#] .*) # optionally: two spaces then a comment
198)?
199$
200''', re.VERBOSE)
201
202_EOL_COMMENT_RE = re.compile(
203 r'''
204.*? # arbitrary text
205[ ][ ]([#] .*) # two spaces then a comment
206$
207''', re.VERBOSE)
208
209_COMMENT_LINE_RE = re.compile(r'#.*')
210
211
212def Lines(s, start_pos, end_pos):
213 # type: (str, int, int) -> Iterator[int]
214 """Yields positions in s that end a line."""
215 pos = start_pos
216 while pos < end_pos:
217 m = _LINE_RE.match(s, pos, end_pos)
218 if not m:
219 raise RuntimeError("Should have matched a line")
220 line_end = m.end(0)
221
222 yield line_end
223
224 pos = line_end
225
226
227class ShPromptPlugin(_Plugin):
228 """Highlight shell prompts."""
229
230 def PrintHighlighted(self, out):
231 # type: (html.Output) -> None
232 pos = self.start_pos
233 for line_end in Lines(self.s, self.start_pos, self.end_pos):
234
235 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
236 if m:
237 out.PrintUntil(m.start(0))
238 out.Print('<span class="sh-comment">')
239 out.PrintUntil(m.end(0))
240 out.Print('</span>')
241 else:
242 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
243 if m:
244 #log('MATCH %r', m.groups())
245
246 out.PrintUntil(m.start(1))
247 out.Print('<span class="sh-prompt">')
248 out.PrintUntil(m.end(1))
249 out.Print('</span>')
250
251 out.PrintUntil(m.start(2))
252 out.Print('<span class="sh-command">')
253 out.PrintUntil(m.end(2))
254 out.Print('</span>')
255
256 if m.group(3):
257 out.PrintUntil(m.start(3))
258 out.Print('<span class="sh-tab-complete">')
259 out.PrintUntil(m.end(3))
260 out.Print('</span>')
261
262 if m.group(4):
263 out.PrintUntil(m.start(4))
264 out.Print('<span class="sh-comment">')
265 out.PrintUntil(m.end(4))
266 out.Print('</span>')
267 else:
268 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
269 if m:
270 out.PrintUntil(m.start(1))
271 out.Print('<span class="sh-comment">')
272 out.PrintUntil(m.end(1))
273 out.Print('</span>')
274
275 out.PrintUntil(line_end)
276
277 pos = line_end
278
279
280class HelpTopicsPlugin(_Plugin):
281 """Highlight blocks of doc/ref/toc-*.md."""
282
283 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
284 _Plugin.__init__(self, s, start_pos, end_pos)
285 self.chapter = chapter
286 self.linkify_stop_col = linkify_stop_col
287
288 def PrintHighlighted(self, out):
289 # type: (html.Output) -> None
290 from doctools import help_gen
291
292 debug_out = []
293 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
294 self.linkify_stop_col)
295
296 pos = self.start_pos
297 for line_end in Lines(self.s, self.start_pos, self.end_pos):
298 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
299 # add tags and leave everything alone.
300 line = self.s[pos:line_end]
301
302 html_line = r.Render(line)
303
304 if html_line is not None:
305 out.PrintUntil(pos)
306 out.Print(html_line)
307 out.SkipTo(line_end)
308
309 pos = line_end
310
311 return debug_out
312
313
314class PygmentsPlugin(_Plugin):
315
316 def __init__(self, s, start_pos, end_pos, lang):
317 _Plugin.__init__(self, s, start_pos, end_pos)
318 self.lang = lang
319
320 def PrintHighlighted(self, out):
321 # type: (html.Output) -> None
322
323 # unescape before passing to pygments, which will escape
324 code = html.ToText(self.s, self.start_pos, self.end_pos)
325
326 lexer = pygments.lexers.get_lexer_by_name(self.lang)
327 formatter = pygments.formatters.HtmlFormatter()
328
329 highlighted = pygments.highlight(code, lexer, formatter)
330 out.Print(highlighted)
331
332
333def SimpleHighlightCode(s):
334 # type: (str) -> str
335 """Simple highlighting for test/shell-vs-shell.sh."""
336
337 f = StringIO()
338 out = html.Output(s, f)
339
340 tag_lexer = html.TagLexer(s)
341
342 pos = 0
343
344 it = html.ValidTokens(s)
345
346 while True:
347 try:
348 tok_id, end_pos = next(it)
349 except StopIteration:
350 break
351
352 if tok_id == h8_id.StartTag:
353
354 tag_lexer.Reset(pos, end_pos)
355 if tag_lexer.TagName() == 'pre':
356 pre_start_pos = pos
357 pre_end_pos = end_pos
358
359 slash_pre_right, slash_pre_right = \
360 html.ReadUntilEndTag(it, tag_lexer, 'pre')
361
362 out.PrintUntil(pre_end_pos)
363
364 # Using ShPromptPlugin because it does the comment highlighting we want!
365 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
366 plugin.PrintHighlighted(out)
367
368 out.SkipTo(slash_pre_right)
369
370 pos = end_pos
371
372 out.PrintTheRest()
373
374 return f.getvalue()
375
376
377CSS_CLASS_RE = re.compile(
378 r'''
379 language-chapter-links-
380 ([a-z0-9-]+) # chapter name
381 (?:_(\d+))? # optional linkify_stop_col
382 ''', re.VERBOSE)
383
384
385def HighlightCode(s, default_highlighter, debug_out=None):
386 # type: (str, Optional[Any], Optional[List]) -> str
387 """
388 Algorithm:
389 1. Collect what's inside <pre><code> ...
390 2. Then read lines with ShPromptPlugin.
391 3. If the line looks like a shell prompt and command, highlight them with
392 <span>
393 """
394 if debug_out is None:
395 debug_out = []
396
397 f = StringIO()
398 out = html.Output(s, f)
399
400 tag_lexer = html.TagLexer(s)
401
402 pos = 0
403
404 it = html.ValidTokens(s)
405
406 while True:
407 try:
408 tok_id, end_pos = next(it)
409 except StopIteration:
410 break
411
412 if tok_id == h8_id.StartTag:
413
414 tag_lexer.Reset(pos, end_pos)
415 if tag_lexer.TagName() == 'pre':
416 pre_start_pos = pos
417 pos = end_pos
418
419 try:
420 tok_id, end_pos = next(it)
421 except StopIteration:
422 break
423
424 tag_lexer.Reset(pos, end_pos)
425 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
426
427 css_class = tag_lexer.GetAttrRaw('class')
428 code_start_pos = end_pos
429
430 if css_class is None:
431 slash_code_left, slash_code_right = \
432 html.ReadUntilEndTag(it, tag_lexer, 'code')
433
434 if default_highlighter is not None:
435 # TODO: Refactor this to remove duplication with
436 # language-{sh-prompt,oil-sh} below
437
438 # oil-sh for compatibility
439 if default_highlighter in ('sh-prompt', 'oils-sh',
440 'oil-sh'):
441 out.PrintUntil(code_start_pos)
442
443 # Using ShPromptPlugin because it does the comment highlighting
444 # we want!
445 plugin = ShPromptPlugin(
446 s, code_start_pos, slash_code_left)
447 plugin.PrintHighlighted(out)
448
449 out.SkipTo(slash_code_left)
450 else:
451 raise RuntimeError(
452 'Unknown default highlighter %r' %
453 default_highlighter)
454
455 elif css_class.startswith('language'):
456 slash_code_left, slash_code_right = \
457 html.ReadUntilEndTag(it, tag_lexer, 'code')
458
459 if css_class == 'language-none':
460 # Allow ```none
461 pass
462
463 elif css_class in ('language-sh-prompt',
464 'language-oil-sh'):
465 # Here's we're KEEPING the original <pre><code>
466 # Print everything up to and including <pre><code language="...">
467 out.PrintUntil(code_start_pos)
468
469 plugin = ShPromptPlugin(s, code_start_pos,
470 slash_code_left)
471 plugin.PrintHighlighted(out)
472
473 out.SkipTo(slash_code_left)
474
475 elif css_class == 'language-ysh':
476 # TODO: Write an Oil syntax highlighter.
477 pass
478
479 elif css_class.startswith('language-chapter-links-'):
480 m = CSS_CLASS_RE.match(css_class)
481 assert m is not None, css_class
482
483 #log('%s GROUPS %s', css_class, m.groups())
484 chapter, num_str = m.groups()
485 if num_str is not None:
486 linkify_stop_col = int(num_str)
487 else:
488 linkify_stop_col = -1
489
490 out.PrintUntil(code_start_pos)
491
492 plugin = HelpTopicsPlugin(s, code_start_pos,
493 slash_code_left, chapter,
494 linkify_stop_col)
495
496 block_debug_info = plugin.PrintHighlighted(out)
497
498 # e.g. these are links to cmd-lang within a block in toc-ysh
499 chap_block = {
500 'to_chap': chapter,
501 'lines': block_debug_info
502 }
503 debug_out.append(chap_block)
504
505 out.SkipTo(slash_code_left)
506
507 else: # language-*: Use Pygments
508 if pygments is None:
509 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
510 )
511 continue
512
513 # We REMOVE the original <pre><code> because
514 # Pygments gives you a <pre> already
515
516 # We just read closing </code>, and the next one
517 # should be </pre>.
518 try:
519 tok_id, end_pos = next(it)
520 except StopIteration:
521 break
522 tag_lexer.Reset(slash_code_right, end_pos)
523 assert tok_id == h8_id.EndTag, tok_id
524 assert (tag_lexer.TagName() == 'pre'
525 ), tag_lexer.TagName()
526 slash_pre_right = end_pos
527
528 out.PrintUntil(pre_start_pos)
529
530 lang = css_class[len('language-'):]
531 plugin = PygmentsPlugin(s, code_start_pos,
532 slash_code_left, lang)
533 plugin.PrintHighlighted(out)
534
535 out.SkipTo(slash_pre_right)
536 f.write('<!-- done pygments -->\n')
537
538 pos = end_pos
539
540 out.PrintTheRest()
541
542 return f.getvalue()
543
544
545def ExtractCode(s, f):
546 # type: (str, IO[str]) -> None
547 """Print code blocks to a plain text file.
548
549 So we can at least validate the syntax.
550
551 Similar to the algorithm code above:
552
553 1. Collect what's inside <pre><code> ...
554 2. Decode &amp; -> &,e tc. and return it
555 """
556 out = html.Output(s, f)
557 tag_lexer = html.TagLexer(s)
558
559 block_num = 0
560 pos = 0
561 it = html.ValidTokens(s)
562
563 while True:
564 try:
565 tok_id, end_pos = next(it)
566 except StopIteration:
567 break
568
569 if tok_id == h8_id.StartTag:
570 tag_lexer.Reset(pos, end_pos)
571 if tag_lexer.TagName() == 'pre':
572 pre_start_pos = pos
573 pos = end_pos
574
575 try:
576 tok_id, end_pos = next(it)
577 except StopIteration:
578 break
579
580 tag_lexer.Reset(pos, end_pos)
581 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
582
583 css_class = tag_lexer.GetAttrRaw('class')
584 # Skip code blocks that look like ```foo
585 # Usually we use 'oil-sh' as the default_highlighter, and
586 # all those code blocks should be extracted. TODO: maybe
587 # this should be oil-language?
588 if css_class is None:
589 code_start_pos = end_pos
590
591 out.SkipTo(code_start_pos)
592 out.Print('# block %d' % block_num)
593 out.Print('\n')
594
595 slash_code_left, slash_code_right = \
596 html.ReadUntilEndTag(it, tag_lexer, 'code')
597
598 text = html.ToText(s, code_start_pos, slash_code_left)
599 out.SkipTo(slash_code_left)
600
601 out.Print(text)
602 out.Print('\n')
603
604 block_num += 1
605
606 pos = end_pos
607
608 #out.PrintTheRest()
609
610
611class ShellSession(object):
612 """
613 TODO: Pass this to HighlightCode as a plugin
614
615 $ x=one
616 $ echo $x
617 $ echo two
618
619 Becomes
620
621 $ x=one
622 $ echo $x
623 one
624 $ echo two
625 two
626
627 And then you will have
628 blog/2019/12/_shell_session/
629 $hash1-stdout.txt
630 $hash2-stdout.txt
631
632 It hashes the command with md5 and then brings it back.
633 If the file already exists then it doesn't run it again.
634 You can delete the file to redo it.
635
636 TODO: write a loop that reads one line at a time, writes, it, then reads
637 output from bash.
638 Use the Lines iterator to get lines.
639 For extra credit, you can solve the PS2 problem? That's easily done with
640 Oil's parser.
641 """
642
643 def __init__(self, shell_exe, cache_dir):
644 # type: (str, str) -> None
645 """
646 Args:
647 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
648 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
649 """
650 self.shell_exe = shell_exe
651 self.cache_dir = cache_dir
652
653 def PrintHighlighted(self, s, start_pos, end_pos, out):
654 # type: (str, int, int, html.Output) -> None
655 """
656 Args:
657 s: an HTML string.
658 """
659 pass
660
661
662def main(argv):
663 # type: (List[str]) -> None
664 action = argv[1]
665
666 if action == 'highlight':
667 # for test/shell-vs-shell.sh
668
669 html = sys.stdin.read()
670 out = SimpleHighlightCode(html)
671 print(out)
672
673 else:
674 raise RuntimeError('Invalid action %r' % action)
675
676
677if __name__ == '__main__':
678 main(sys.argv)