OILS / doctools / oils_doc.py View on Github | oils.pub

679 lines, 372 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14from _devbuild.gen.htm8_asdl import h8_id
15
16import cgi
17try:
18 from cStringIO import StringIO
19except ImportError:
20 # for python3
21 from io import StringIO # type: ignore
22import re
23import sys
24
25from typing import Iterator, Any, List, Optional, IO
26
27from data_lang import htm8
28from doctools.util import log
29from lazylex import html
30
31try:
32 import pygments
33except ImportError:
34 pygments = None
35
36
37class _Abbrev(object):
38
39 def __init__(self, fmt):
40 # type: (str) -> None
41 self.fmt = fmt
42
43 def __call__(self, value):
44 # type: (str) -> str
45 return self.fmt % {'value': value}
46
47
48_ABBREVIATIONS = {
49 'xref':
50 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
51
52 # alias for osh-help, for backward compatibility
53 # to link to the same version
54
55 # TODO: Remove all of these broken links!
56 'help':
57 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
58 'osh-help':
59 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
60 'oil-help':
61 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
62
63 # New style: one for every chapter?
64 # Problem: can't use relative links here, because some are from doc/ref, and
65 # some are from doc
66 'chap-type-method':
67 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
68 'chap-plugin':
69 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
70 'chap-builtin-cmd':
71 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
72
73 # for blog
74 'osh-help-latest':
75 _Abbrev(
76 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
77 ),
78 'oil-help-latest':
79 _Abbrev(
80 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
81 ),
82
83 # For the blog
84 'oils-doc':
85 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
86 'blog-tag':
87 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
88 'oils-commit':
89 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
90 'oils-src':
91 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
92 'blog-code-src':
93 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
94 'issue':
95 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
96 'wiki':
97 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
98}
99
100# Backward compatibility
101_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
102_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
103_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
104
105# $xref:foo
106_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
107
108
109def ExpandLinks(s):
110 # type: (str) -> str
111 """Expand $xref:bash and so forth."""
112 f = StringIO()
113 out = htm8.Output(s, f)
114
115 tag_lexer = htm8.TagLexer(s)
116
117 pos = 0
118
119 it = html.ValidTokens(s)
120 while True:
121 try:
122 tok_id, end_pos = next(it)
123 except StopIteration:
124 break
125
126 if tok_id == h8_id.StartTag:
127
128 tag_lexer.Reset(pos, end_pos)
129 if tag_lexer.TagName() == 'a':
130 open_tag_right = end_pos
131
132 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
133 if href_start == -1:
134 continue
135
136 href_raw = s[href_start:href_end]
137
138 new = None
139 m = _SHORTCUT_RE.match(href_raw)
140 if m:
141 abbrev_name, arg = m.groups()
142 if not arg:
143 close_tag_left, _ = html.ReadUntilEndTag(
144 it, tag_lexer, 'a')
145 arg = s[open_tag_right:close_tag_left]
146
147 # Hack to so we can write [Wiki Page]($wiki) and have the
148 # link look like /Wiki-Page/
149 if abbrev_name == 'wiki':
150 arg = arg.replace(' ', '-')
151
152 func = _ABBREVIATIONS.get(abbrev_name)
153 if not func:
154 raise RuntimeError('Invalid abbreviation %r' %
155 abbrev_name)
156 new = func(arg)
157
158 if new is not None:
159 out.PrintUntil(href_start)
160 f.write(cgi.escape(new))
161 out.SkipTo(href_end)
162
163 pos = end_pos
164
165 out.PrintTheRest()
166
167 return f.getvalue()
168
169
170class _Plugin(object):
171 """
172 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
173 """
174
175 def __init__(self, s, start_pos, end_pos):
176 # type: (str, int, int) -> None
177 self.s = s
178 self.start_pos = start_pos
179 self.end_pos = end_pos
180
181 def PrintHighlighted(self, out):
182 # type: (htm8.Output) -> None
183 raise NotImplementedError()
184
185
186# Optional newline at end
187_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
188
189_PROMPT_LINE_RE = re.compile(
190 r'''
191(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
192(.*?) # arbitrary text
193(?: # don't highlight tab completion
194 (&lt;TAB&gt;) # it's HTML escaped!!!
195 .*?
196)?
197(?:
198 [ ][ ]([#] .*) # optionally: two spaces then a comment
199)?
200$
201''', re.VERBOSE)
202
203_EOL_COMMENT_RE = re.compile(
204 r'''
205.*? # arbitrary text
206[ ][ ]([#] .*) # two spaces then a comment
207$
208''', re.VERBOSE)
209
210_COMMENT_LINE_RE = re.compile(r'#.*')
211
212
213def Lines(s, start_pos, end_pos):
214 # type: (str, int, int) -> Iterator[int]
215 """Yields positions in s that end a line."""
216 pos = start_pos
217 while pos < end_pos:
218 m = _LINE_RE.match(s, pos, end_pos)
219 if not m:
220 raise RuntimeError("Should have matched a line")
221 line_end = m.end(0)
222
223 yield line_end
224
225 pos = line_end
226
227
228class ShPromptPlugin(_Plugin):
229 """Highlight shell prompts."""
230
231 def PrintHighlighted(self, out):
232 # type: (htm8.Output) -> None
233 pos = self.start_pos
234 for line_end in Lines(self.s, self.start_pos, self.end_pos):
235
236 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
237 if m:
238 out.PrintUntil(m.start(0))
239 out.Print('<span class="sh-comment">')
240 out.PrintUntil(m.end(0))
241 out.Print('</span>')
242 else:
243 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
244 if m:
245 #log('MATCH %r', m.groups())
246
247 out.PrintUntil(m.start(1))
248 out.Print('<span class="sh-prompt">')
249 out.PrintUntil(m.end(1))
250 out.Print('</span>')
251
252 out.PrintUntil(m.start(2))
253 out.Print('<span class="sh-command">')
254 out.PrintUntil(m.end(2))
255 out.Print('</span>')
256
257 if m.group(3):
258 out.PrintUntil(m.start(3))
259 out.Print('<span class="sh-tab-complete">')
260 out.PrintUntil(m.end(3))
261 out.Print('</span>')
262
263 if m.group(4):
264 out.PrintUntil(m.start(4))
265 out.Print('<span class="sh-comment">')
266 out.PrintUntil(m.end(4))
267 out.Print('</span>')
268 else:
269 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
270 if m:
271 out.PrintUntil(m.start(1))
272 out.Print('<span class="sh-comment">')
273 out.PrintUntil(m.end(1))
274 out.Print('</span>')
275
276 out.PrintUntil(line_end)
277
278 pos = line_end
279
280
281class HelpTopicsPlugin(_Plugin):
282 """Highlight blocks of doc/ref/toc-*.md."""
283
284 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
285 _Plugin.__init__(self, s, start_pos, end_pos)
286 self.chapter = chapter
287 self.linkify_stop_col = linkify_stop_col
288
289 def PrintHighlighted(self, out):
290 # type: (htm8.Output) -> None
291 from doctools import help_gen
292
293 debug_out = []
294 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
295 self.linkify_stop_col)
296
297 pos = self.start_pos
298 for line_end in Lines(self.s, self.start_pos, self.end_pos):
299 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
300 # add tags and leave everything alone.
301 line = self.s[pos:line_end]
302
303 html_line = r.Render(line)
304
305 if html_line is not None:
306 out.PrintUntil(pos)
307 out.Print(html_line)
308 out.SkipTo(line_end)
309
310 pos = line_end
311
312 return debug_out
313
314
315class PygmentsPlugin(_Plugin):
316
317 def __init__(self, s, start_pos, end_pos, lang):
318 _Plugin.__init__(self, s, start_pos, end_pos)
319 self.lang = lang
320
321 def PrintHighlighted(self, out):
322 # type: (htm8.Output) -> None
323
324 # unescape before passing to pygments, which will escape
325 code = html.ToText(self.s, self.start_pos, self.end_pos)
326
327 lexer = pygments.lexers.get_lexer_by_name(self.lang)
328 formatter = pygments.formatters.HtmlFormatter()
329
330 highlighted = pygments.highlight(code, lexer, formatter)
331 out.Print(highlighted)
332
333
334def SimpleHighlightCode(s):
335 # type: (str) -> str
336 """Simple highlighting for test/shell-vs-shell.sh."""
337
338 f = StringIO()
339 out = htm8.Output(s, f)
340
341 tag_lexer = htm8.TagLexer(s)
342
343 pos = 0
344
345 it = html.ValidTokens(s)
346
347 while True:
348 try:
349 tok_id, end_pos = next(it)
350 except StopIteration:
351 break
352
353 if tok_id == h8_id.StartTag:
354
355 tag_lexer.Reset(pos, end_pos)
356 if tag_lexer.TagName() == 'pre':
357 pre_start_pos = pos
358 pre_end_pos = end_pos
359
360 slash_pre_right, slash_pre_right = \
361 html.ReadUntilEndTag(it, tag_lexer, 'pre')
362
363 out.PrintUntil(pre_end_pos)
364
365 # Using ShPromptPlugin because it does the comment highlighting we want!
366 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
367 plugin.PrintHighlighted(out)
368
369 out.SkipTo(slash_pre_right)
370
371 pos = end_pos
372
373 out.PrintTheRest()
374
375 return f.getvalue()
376
377
378CSS_CLASS_RE = re.compile(
379 r'''
380 language-chapter-links-
381 ([a-z0-9-]+) # chapter name
382 (?:_(\d+))? # optional linkify_stop_col
383 ''', re.VERBOSE)
384
385
386def HighlightCode(s, default_highlighter, debug_out=None):
387 # type: (str, Optional[Any], Optional[List]) -> str
388 """
389 Algorithm:
390 1. Collect what's inside <pre><code> ...
391 2. Then read lines with ShPromptPlugin.
392 3. If the line looks like a shell prompt and command, highlight them with
393 <span>
394 """
395 if debug_out is None:
396 debug_out = []
397
398 f = StringIO()
399 out = htm8.Output(s, f)
400
401 tag_lexer = htm8.TagLexer(s)
402
403 pos = 0
404
405 it = html.ValidTokens(s)
406
407 while True:
408 try:
409 tok_id, end_pos = next(it)
410 except StopIteration:
411 break
412
413 if tok_id == h8_id.StartTag:
414
415 tag_lexer.Reset(pos, end_pos)
416 if tag_lexer.TagName() == 'pre':
417 pre_start_pos = pos
418 pos = end_pos
419
420 try:
421 tok_id, end_pos = next(it)
422 except StopIteration:
423 break
424
425 tag_lexer.Reset(pos, end_pos)
426 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
427
428 css_class = tag_lexer.GetAttrRaw('class')
429 code_start_pos = end_pos
430
431 if css_class is None:
432 slash_code_left, slash_code_right = \
433 html.ReadUntilEndTag(it, tag_lexer, 'code')
434
435 if default_highlighter is not None:
436 # TODO: Refactor this to remove duplication with
437 # language-{sh-prompt,oil-sh} below
438
439 # oil-sh for compatibility
440 if default_highlighter in ('sh-prompt', 'oils-sh',
441 'oil-sh'):
442 out.PrintUntil(code_start_pos)
443
444 # Using ShPromptPlugin because it does the comment highlighting
445 # we want!
446 plugin = ShPromptPlugin(
447 s, code_start_pos, slash_code_left)
448 plugin.PrintHighlighted(out)
449
450 out.SkipTo(slash_code_left)
451 else:
452 raise RuntimeError(
453 'Unknown default highlighter %r' %
454 default_highlighter)
455
456 elif css_class.startswith('language'):
457 slash_code_left, slash_code_right = \
458 html.ReadUntilEndTag(it, tag_lexer, 'code')
459
460 if css_class == 'language-none':
461 # Allow ```none
462 pass
463
464 elif css_class in ('language-sh-prompt',
465 'language-oil-sh'):
466 # Here's we're KEEPING the original <pre><code>
467 # Print everything up to and including <pre><code language="...">
468 out.PrintUntil(code_start_pos)
469
470 plugin = ShPromptPlugin(s, code_start_pos,
471 slash_code_left)
472 plugin.PrintHighlighted(out)
473
474 out.SkipTo(slash_code_left)
475
476 elif css_class == 'language-ysh':
477 # TODO: Write an Oil syntax highlighter.
478 pass
479
480 elif css_class.startswith('language-chapter-links-'):
481 m = CSS_CLASS_RE.match(css_class)
482 assert m is not None, css_class
483
484 #log('%s GROUPS %s', css_class, m.groups())
485 chapter, num_str = m.groups()
486 if num_str is not None:
487 linkify_stop_col = int(num_str)
488 else:
489 linkify_stop_col = -1
490
491 out.PrintUntil(code_start_pos)
492
493 plugin = HelpTopicsPlugin(s, code_start_pos,
494 slash_code_left, chapter,
495 linkify_stop_col)
496
497 block_debug_info = plugin.PrintHighlighted(out)
498
499 # e.g. these are links to cmd-lang within a block in toc-ysh
500 chap_block = {
501 'to_chap': chapter,
502 'lines': block_debug_info
503 }
504 debug_out.append(chap_block)
505
506 out.SkipTo(slash_code_left)
507
508 else: # language-*: Use Pygments
509 if pygments is None:
510 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
511 )
512 continue
513
514 # We REMOVE the original <pre><code> because
515 # Pygments gives you a <pre> already
516
517 # We just read closing </code>, and the next one
518 # should be </pre>.
519 try:
520 tok_id, end_pos = next(it)
521 except StopIteration:
522 break
523 tag_lexer.Reset(slash_code_right, end_pos)
524 assert tok_id == h8_id.EndTag, tok_id
525 assert (tag_lexer.TagName() == 'pre'
526 ), tag_lexer.TagName()
527 slash_pre_right = end_pos
528
529 out.PrintUntil(pre_start_pos)
530
531 lang = css_class[len('language-'):]
532 plugin = PygmentsPlugin(s, code_start_pos,
533 slash_code_left, lang)
534 plugin.PrintHighlighted(out)
535
536 out.SkipTo(slash_pre_right)
537 f.write('<!-- done pygments -->\n')
538
539 pos = end_pos
540
541 out.PrintTheRest()
542
543 return f.getvalue()
544
545
546def ExtractCode(s, f):
547 # type: (str, IO[str]) -> None
548 """Print code blocks to a plain text file.
549
550 So we can at least validate the syntax.
551
552 Similar to the algorithm code above:
553
554 1. Collect what's inside <pre><code> ...
555 2. Decode &amp; -> &,e tc. and return it
556 """
557 out = htm8.Output(s, f)
558 tag_lexer = htm8.TagLexer(s)
559
560 block_num = 0
561 pos = 0
562 it = html.ValidTokens(s)
563
564 while True:
565 try:
566 tok_id, end_pos = next(it)
567 except StopIteration:
568 break
569
570 if tok_id == h8_id.StartTag:
571 tag_lexer.Reset(pos, end_pos)
572 if tag_lexer.TagName() == 'pre':
573 pre_start_pos = pos
574 pos = end_pos
575
576 try:
577 tok_id, end_pos = next(it)
578 except StopIteration:
579 break
580
581 tag_lexer.Reset(pos, end_pos)
582 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
583
584 css_class = tag_lexer.GetAttrRaw('class')
585 # Skip code blocks that look like ```foo
586 # Usually we use 'oil-sh' as the default_highlighter, and
587 # all those code blocks should be extracted. TODO: maybe
588 # this should be oil-language?
589 if css_class is None:
590 code_start_pos = end_pos
591
592 out.SkipTo(code_start_pos)
593 out.Print('# block %d' % block_num)
594 out.Print('\n')
595
596 slash_code_left, slash_code_right = \
597 html.ReadUntilEndTag(it, tag_lexer, 'code')
598
599 text = html.ToText(s, code_start_pos, slash_code_left)
600 out.SkipTo(slash_code_left)
601
602 out.Print(text)
603 out.Print('\n')
604
605 block_num += 1
606
607 pos = end_pos
608
609 #out.PrintTheRest()
610
611
612class ShellSession(object):
613 """
614 TODO: Pass this to HighlightCode as a plugin
615
616 $ x=one
617 $ echo $x
618 $ echo two
619
620 Becomes
621
622 $ x=one
623 $ echo $x
624 one
625 $ echo two
626 two
627
628 And then you will have
629 blog/2019/12/_shell_session/
630 $hash1-stdout.txt
631 $hash2-stdout.txt
632
633 It hashes the command with md5 and then brings it back.
634 If the file already exists then it doesn't run it again.
635 You can delete the file to redo it.
636
637 TODO: write a loop that reads one line at a time, writes, it, then reads
638 output from bash.
639 Use the Lines iterator to get lines.
640 For extra credit, you can solve the PS2 problem? That's easily done with
641 Oil's parser.
642 """
643
644 def __init__(self, shell_exe, cache_dir):
645 # type: (str, str) -> None
646 """
647 Args:
648 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
649 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
650 """
651 self.shell_exe = shell_exe
652 self.cache_dir = cache_dir
653
654 def PrintHighlighted(self, s, start_pos, end_pos, out):
655 # type: (str, int, int, htm8.Output) -> None
656 """
657 Args:
658 s: an HTML string.
659 """
660 pass
661
662
663def main(argv):
664 # type: (List[str]) -> None
665 action = argv[1]
666
667 if action == 'highlight':
668 # for test/shell-vs-shell.sh
669
670 html = sys.stdin.read()
671 out = SimpleHighlightCode(html)
672 print(out)
673
674 else:
675 raise RuntimeError('Invalid action %r' % action)
676
677
678if __name__ == '__main__':
679 main(sys.argv)