OILS / doctools / oils_doc.py View on Github | oils.pub

681 lines, 374 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14from _devbuild.gen.htm8_asdl import h8_id
15
16import cgi
17try:
18 from cStringIO import StringIO
19except ImportError:
20 # for python3
21 from io import StringIO # type: ignore
22import re
23import sys
24
25from typing import Iterator, Any, List, Optional, IO
26
27from data_lang import htm8
28from doctools.util import log
29from lazylex import html
30
31try:
32 import pygments
33except ImportError:
34 pygments = None
35
36
37class _Abbrev(object):
38
39 def __init__(self, fmt):
40 # type: (str) -> None
41 self.fmt = fmt
42
43 def __call__(self, value):
44 # type: (str) -> str
45 return self.fmt % {'value': value}
46
47
48_ABBREVIATIONS = {
49 'xref':
50 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
51
52 # alias for osh-help, for backward compatibility
53 # to link to the same version
54
55 # TODO: Remove all of these broken links!
56 'help':
57 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
58 'osh-help':
59 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
60 'oil-help':
61 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
62
63 # New style: one for every chapter?
64 # Problem: can't use relative links here, because some are from doc/ref, and
65 # some are from doc
66 'chap-type-method':
67 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
68 'chap-plugin':
69 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
70 'chap-builtin-cmd':
71 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
72
73 # for blog
74 'osh-help-latest':
75 _Abbrev(
76 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
77 ),
78 'oil-help-latest':
79 _Abbrev(
80 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
81 ),
82
83 # For the blog
84 'oils-doc':
85 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
86 'blog-tag':
87 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
88 'oils-commit':
89 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
90 'oils-src':
91 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
92 'blog-code-src':
93 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
94 'issue':
95 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
96 'wiki':
97 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
98}
99
100# Backward compatibility
101_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
102_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
103_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
104
105# $xref:foo
106_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
107
108
109def ExpandLinks(s):
110 # type: (str) -> str
111 """Expand $xref:bash and so forth."""
112 f = StringIO()
113 out = htm8.Output(s, f)
114
115 tag_lexer = htm8.TagLexer(s)
116
117 pos = 0
118
119 it = html.ValidTokens(s)
120 while True:
121 try:
122 tok_id, end_pos = next(it)
123 except StopIteration:
124 break
125
126 if tok_id == h8_id.StartTag:
127
128 tag_lexer.Reset(pos, end_pos)
129 if tag_lexer.GetTagName() == 'a':
130 open_tag_right = end_pos
131
132 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
133 if href_start == -1:
134 continue
135
136 href_raw = s[href_start:href_end]
137
138 new = None
139 m = _SHORTCUT_RE.match(href_raw)
140 if m:
141 abbrev_name, arg = m.groups()
142 if not arg:
143 close_tag_left, _ = html.ReadUntilEndTag(
144 it, tag_lexer, 'a')
145 arg = s[open_tag_right:close_tag_left]
146
147 # Hack to so we can write [Wiki Page]($wiki) and have the
148 # link look like /Wiki-Page/
149 if abbrev_name == 'wiki':
150 arg = arg.replace(' ', '-')
151
152 func = _ABBREVIATIONS.get(abbrev_name)
153 if not func:
154 raise RuntimeError('Invalid abbreviation %r' %
155 abbrev_name)
156 new = func(arg)
157
158 if new is not None:
159 out.PrintUntil(href_start)
160 f.write(cgi.escape(new))
161 out.SkipTo(href_end)
162
163 pos = end_pos
164
165 out.PrintTheRest()
166
167 return f.getvalue()
168
169
170class _Plugin(object):
171 """
172 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
173 """
174
175 def __init__(self, s, start_pos, end_pos):
176 # type: (str, int, int) -> None
177 self.s = s
178 self.start_pos = start_pos
179 self.end_pos = end_pos
180
181 def PrintHighlighted(self, out):
182 # type: (htm8.Output) -> None
183 raise NotImplementedError()
184
185
186# Optional newline at end
187_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
188
189_PROMPT_LINE_RE = re.compile(
190 r'''
191(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
192(.*?) # arbitrary text
193(?: # don't highlight tab completion
194 (&lt;TAB&gt;) # it's HTML escaped!!!
195 .*?
196)?
197(?:
198 [ ][ ]([#] .*) # optionally: two spaces then a comment
199)?
200$
201''', re.VERBOSE)
202
203_EOL_COMMENT_RE = re.compile(
204 r'''
205.*? # arbitrary text
206[ ][ ]([#] .*) # two spaces then a comment
207$
208''', re.VERBOSE)
209
210_COMMENT_LINE_RE = re.compile(r'#.*')
211
212
213def Lines(s, start_pos, end_pos):
214 # type: (str, int, int) -> Iterator[int]
215 """Yields positions in s that end a line."""
216 pos = start_pos
217 while pos < end_pos:
218 m = _LINE_RE.match(s, pos, end_pos)
219 if not m:
220 raise RuntimeError("Should have matched a line")
221 line_end = m.end(0)
222
223 yield line_end
224
225 pos = line_end
226
227
228class ShPromptPlugin(_Plugin):
229 """Highlight shell prompts."""
230
231 def PrintHighlighted(self, out):
232 # type: (htm8.Output) -> None
233 pos = self.start_pos
234 for line_end in Lines(self.s, self.start_pos, self.end_pos):
235
236 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
237 if m:
238 out.PrintUntil(m.start(0))
239 out.Print('<span class="sh-comment">')
240 out.PrintUntil(m.end(0))
241 out.Print('</span>')
242 else:
243 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
244 if m:
245 #log('MATCH %r', m.groups())
246
247 out.PrintUntil(m.start(1))
248 out.Print('<span class="sh-prompt">')
249 out.PrintUntil(m.end(1))
250 out.Print('</span>')
251
252 out.PrintUntil(m.start(2))
253 out.Print('<span class="sh-command">')
254 out.PrintUntil(m.end(2))
255 out.Print('</span>')
256
257 if m.group(3):
258 out.PrintUntil(m.start(3))
259 out.Print('<span class="sh-tab-complete">')
260 out.PrintUntil(m.end(3))
261 out.Print('</span>')
262
263 if m.group(4):
264 out.PrintUntil(m.start(4))
265 out.Print('<span class="sh-comment">')
266 out.PrintUntil(m.end(4))
267 out.Print('</span>')
268 else:
269 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
270 if m:
271 out.PrintUntil(m.start(1))
272 out.Print('<span class="sh-comment">')
273 out.PrintUntil(m.end(1))
274 out.Print('</span>')
275
276 out.PrintUntil(line_end)
277
278 pos = line_end
279
280
281class HelpTopicsPlugin(_Plugin):
282 """Highlight blocks of doc/ref/toc-*.md."""
283
284 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
285 _Plugin.__init__(self, s, start_pos, end_pos)
286 self.chapter = chapter
287 self.linkify_stop_col = linkify_stop_col
288
289 def PrintHighlighted(self, out):
290 # type: (htm8.Output) -> None
291 from doctools import help_gen
292
293 debug_out = []
294 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
295 self.linkify_stop_col)
296
297 pos = self.start_pos
298 for line_end in Lines(self.s, self.start_pos, self.end_pos):
299 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
300 # add tags and leave everything alone.
301 line = self.s[pos:line_end]
302
303 html_line = r.Render(line)
304
305 if html_line is not None:
306 out.PrintUntil(pos)
307 out.Print(html_line)
308 out.SkipTo(line_end)
309
310 pos = line_end
311
312 return debug_out
313
314
315class PygmentsPlugin(_Plugin):
316
317 def __init__(self, s, start_pos, end_pos, lang):
318 _Plugin.__init__(self, s, start_pos, end_pos)
319 self.lang = lang
320
321 def PrintHighlighted(self, out):
322 # type: (htm8.Output) -> None
323
324 # unescape before passing to pygments, which will escape
325 code = html.ToText(self.s, self.start_pos, self.end_pos)
326
327 lexer = pygments.lexers.get_lexer_by_name(self.lang)
328 formatter = pygments.formatters.HtmlFormatter()
329
330 highlighted = pygments.highlight(code, lexer, formatter)
331 out.Print(highlighted)
332
333
334def SimpleHighlightCode(s):
335 # type: (str) -> str
336 """Simple highlighting for test/shell-vs-shell.sh."""
337
338 f = StringIO()
339 out = htm8.Output(s, f)
340
341 tag_lexer = htm8.TagLexer(s)
342
343 pos = 0
344
345 it = html.ValidTokens(s)
346
347 while True:
348 try:
349 tok_id, end_pos = next(it)
350 except StopIteration:
351 break
352
353 if tok_id == h8_id.StartTag:
354
355 tag_lexer.Reset(pos, end_pos)
356 if tag_lexer.GetTagName() == 'pre':
357 pre_start_pos = pos
358 pre_end_pos = end_pos
359
360 slash_pre_right, slash_pre_right = \
361 html.ReadUntilEndTag(it, tag_lexer, 'pre')
362
363 out.PrintUntil(pre_end_pos)
364
365 # Using ShPromptPlugin because it does the comment highlighting we want!
366 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
367 plugin.PrintHighlighted(out)
368
369 out.SkipTo(slash_pre_right)
370
371 pos = end_pos
372
373 out.PrintTheRest()
374
375 return f.getvalue()
376
377
378CSS_CLASS_RE = re.compile(
379 r'''
380 language-chapter-links-
381 ([a-z0-9-]+) # chapter name
382 (?:_(\d+))? # optional linkify_stop_col
383 ''', re.VERBOSE)
384
385
386def HighlightCode(s, default_highlighter, debug_out=None):
387 # type: (str, Optional[Any], Optional[List]) -> str
388 """
389 Algorithm:
390 1. Collect what's inside <pre><code> ...
391 2. Then read lines with ShPromptPlugin.
392 3. If the line looks like a shell prompt and command, highlight them with
393 <span>
394 """
395 if debug_out is None:
396 debug_out = []
397
398 f = StringIO()
399 out = htm8.Output(s, f)
400
401 tag_lexer = htm8.TagLexer(s)
402
403 pos = 0
404
405 it = html.ValidTokens(s)
406
407 while True:
408 try:
409 tok_id, end_pos = next(it)
410 except StopIteration:
411 break
412
413 if tok_id == h8_id.StartTag:
414
415 tag_lexer.Reset(pos, end_pos)
416 if tag_lexer.GetTagName() == 'pre':
417 pre_start_pos = pos
418 pos = end_pos
419
420 try:
421 tok_id, end_pos = next(it)
422 except StopIteration:
423 break
424
425 tag_lexer.Reset(pos, end_pos)
426 if (tok_id == h8_id.StartTag and
427 tag_lexer.GetTagName() == 'code'):
428
429 css_class = tag_lexer.GetAttrRaw('class')
430 code_start_pos = end_pos
431
432 if css_class is None:
433 slash_code_left, slash_code_right = \
434 html.ReadUntilEndTag(it, tag_lexer, 'code')
435
436 if default_highlighter is not None:
437 # TODO: Refactor this to remove duplication with
438 # language-{sh-prompt,oil-sh} below
439
440 # oil-sh for compatibility
441 if default_highlighter in ('sh-prompt', 'oils-sh',
442 'oil-sh'):
443 out.PrintUntil(code_start_pos)
444
445 # Using ShPromptPlugin because it does the comment highlighting
446 # we want!
447 plugin = ShPromptPlugin(
448 s, code_start_pos, slash_code_left)
449 plugin.PrintHighlighted(out)
450
451 out.SkipTo(slash_code_left)
452 else:
453 raise RuntimeError(
454 'Unknown default highlighter %r' %
455 default_highlighter)
456
457 elif css_class.startswith('language'):
458 slash_code_left, slash_code_right = \
459 html.ReadUntilEndTag(it, tag_lexer, 'code')
460
461 if css_class == 'language-none':
462 # Allow ```none
463 pass
464
465 elif css_class in ('language-sh-prompt',
466 'language-oil-sh'):
467 # Here's we're KEEPING the original <pre><code>
468 # Print everything up to and including <pre><code language="...">
469 out.PrintUntil(code_start_pos)
470
471 plugin = ShPromptPlugin(s, code_start_pos,
472 slash_code_left)
473 plugin.PrintHighlighted(out)
474
475 out.SkipTo(slash_code_left)
476
477 elif css_class == 'language-ysh':
478 # TODO: Write an Oil syntax highlighter.
479 pass
480
481 elif css_class.startswith('language-chapter-links-'):
482 m = CSS_CLASS_RE.match(css_class)
483 assert m is not None, css_class
484
485 #log('%s GROUPS %s', css_class, m.groups())
486 chapter, num_str = m.groups()
487 if num_str is not None:
488 linkify_stop_col = int(num_str)
489 else:
490 linkify_stop_col = -1
491
492 out.PrintUntil(code_start_pos)
493
494 plugin = HelpTopicsPlugin(s, code_start_pos,
495 slash_code_left, chapter,
496 linkify_stop_col)
497
498 block_debug_info = plugin.PrintHighlighted(out)
499
500 # e.g. these are links to cmd-lang within a block in toc-ysh
501 chap_block = {
502 'to_chap': chapter,
503 'lines': block_debug_info
504 }
505 debug_out.append(chap_block)
506
507 out.SkipTo(slash_code_left)
508
509 else: # language-*: Use Pygments
510 if pygments is None:
511 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
512 )
513 continue
514
515 # We REMOVE the original <pre><code> because
516 # Pygments gives you a <pre> already
517
518 # We just read closing </code>, and the next one
519 # should be </pre>.
520 try:
521 tok_id, end_pos = next(it)
522 except StopIteration:
523 break
524 tag_lexer.Reset(slash_code_right, end_pos)
525 assert tok_id == h8_id.EndTag, tok_id
526 assert (tag_lexer.GetTagName() == 'pre'
527 ), tag_lexer.GetTagName()
528 slash_pre_right = end_pos
529
530 out.PrintUntil(pre_start_pos)
531
532 lang = css_class[len('language-'):]
533 plugin = PygmentsPlugin(s, code_start_pos,
534 slash_code_left, lang)
535 plugin.PrintHighlighted(out)
536
537 out.SkipTo(slash_pre_right)
538 f.write('<!-- done pygments -->\n')
539
540 pos = end_pos
541
542 out.PrintTheRest()
543
544 return f.getvalue()
545
546
547def ExtractCode(s, f):
548 # type: (str, IO[str]) -> None
549 """Print code blocks to a plain text file.
550
551 So we can at least validate the syntax.
552
553 Similar to the algorithm code above:
554
555 1. Collect what's inside <pre><code> ...
556 2. Decode &amp; -> &,e tc. and return it
557 """
558 out = htm8.Output(s, f)
559 tag_lexer = htm8.TagLexer(s)
560
561 block_num = 0
562 pos = 0
563 it = html.ValidTokens(s)
564
565 while True:
566 try:
567 tok_id, end_pos = next(it)
568 except StopIteration:
569 break
570
571 if tok_id == h8_id.StartTag:
572 tag_lexer.Reset(pos, end_pos)
573 if tag_lexer.GetTagName() == 'pre':
574 pre_start_pos = pos
575 pos = end_pos
576
577 try:
578 tok_id, end_pos = next(it)
579 except StopIteration:
580 break
581
582 tag_lexer.Reset(pos, end_pos)
583 if (tok_id == h8_id.StartTag and
584 tag_lexer.GetTagName() == 'code'):
585
586 css_class = tag_lexer.GetAttrRaw('class')
587 # Skip code blocks that look like ```foo
588 # Usually we use 'oil-sh' as the default_highlighter, and
589 # all those code blocks should be extracted. TODO: maybe
590 # this should be oil-language?
591 if css_class is None:
592 code_start_pos = end_pos
593
594 out.SkipTo(code_start_pos)
595 out.Print('# block %d' % block_num)
596 out.Print('\n')
597
598 slash_code_left, slash_code_right = \
599 html.ReadUntilEndTag(it, tag_lexer, 'code')
600
601 text = html.ToText(s, code_start_pos, slash_code_left)
602 out.SkipTo(slash_code_left)
603
604 out.Print(text)
605 out.Print('\n')
606
607 block_num += 1
608
609 pos = end_pos
610
611 #out.PrintTheRest()
612
613
614class ShellSession(object):
615 """
616 TODO: Pass this to HighlightCode as a plugin
617
618 $ x=one
619 $ echo $x
620 $ echo two
621
622 Becomes
623
624 $ x=one
625 $ echo $x
626 one
627 $ echo two
628 two
629
630 And then you will have
631 blog/2019/12/_shell_session/
632 $hash1-stdout.txt
633 $hash2-stdout.txt
634
635 It hashes the command with md5 and then brings it back.
636 If the file already exists then it doesn't run it again.
637 You can delete the file to redo it.
638
639 TODO: write a loop that reads one line at a time, writes, it, then reads
640 output from bash.
641 Use the Lines iterator to get lines.
642 For extra credit, you can solve the PS2 problem? That's easily done with
643 Oil's parser.
644 """
645
646 def __init__(self, shell_exe, cache_dir):
647 # type: (str, str) -> None
648 """
649 Args:
650 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
651 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
652 """
653 self.shell_exe = shell_exe
654 self.cache_dir = cache_dir
655
656 def PrintHighlighted(self, s, start_pos, end_pos, out):
657 # type: (str, int, int, htm8.Output) -> None
658 """
659 Args:
660 s: an HTML string.
661 """
662 pass
663
664
665def main(argv):
666 # type: (List[str]) -> None
667 action = argv[1]
668
669 if action == 'highlight':
670 # for test/shell-vs-shell.sh
671
672 html = sys.stdin.read()
673 out = SimpleHighlightCode(html)
674 print(out)
675
676 else:
677 raise RuntimeError('Invalid action %r' % action)
678
679
680if __name__ == '__main__':
681 main(sys.argv)