OILS / doctools / oils_doc.py View on Github | oils.pub

668 lines, 373 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15from typing import Iterator
16from typing import Any
17from typing import List
18from typing import Optional
19try:
20 from cStringIO import StringIO
21except ImportError:
22 # for python3
23 from io import StringIO # type: ignore
24import re
25import sys
26
27from doctools.util import log
28from lazylex import html
29
30try:
31 import pygments
32except ImportError:
33 pygments = None
34
35
36class _Abbrev(object):
37
38 def __init__(self, fmt):
39 self.fmt = fmt
40
41 def __call__(self, value):
42 # type: (str) -> str
43 return self.fmt % {'value': value}
44
45
46_ABBREVIATIONS = {
47 'xref':
48 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
49
50 # alias for osh-help, for backward compatibility
51 # to link to the same version
52
53 # TODO: Remove all of these broken links!
54 'help':
55 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
56 'osh-help':
57 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
58 'oil-help':
59 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
60
61 # New style: one for every chapter?
62 # Problem: can't use relative links here, because some are from doc/ref, and
63 # some are from doc
64 'chap-type-method':
65 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
66 'chap-plugin':
67 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
68 'chap-builtin-cmd':
69 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
70
71 # for blog
72 'osh-help-latest':
73 _Abbrev(
74 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
75 ),
76 'oil-help-latest':
77 _Abbrev(
78 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
79 ),
80
81 # For the blog
82 'oils-doc':
83 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
84 'blog-tag':
85 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
86 'oils-commit':
87 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
88 'oils-src':
89 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
90 'blog-code-src':
91 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
92 'issue':
93 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
94 'wiki':
95 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
96}
97
98# Backward compatibility
99_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
100_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
101_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
102
103# $xref:foo
104_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
105
106
107def ExpandLinks(s):
108 # type: (str) -> str
109 """Expand $xref:bash and so forth."""
110 f = StringIO()
111 out = html.Output(s, f)
112
113 tag_lexer = html.TagLexer(s)
114
115 pos = 0
116
117 it = html.ValidTokens(s)
118 while True:
119 try:
120 tok_id, end_pos = next(it)
121 except StopIteration:
122 break
123
124 if tok_id == html.StartTag:
125
126 tag_lexer.Reset(pos, end_pos)
127 if tag_lexer.TagName() == 'a':
128 open_tag_right = end_pos
129
130 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
131 if href_start == -1:
132 continue
133
134 href_raw = s[href_start:href_end]
135
136 new = None
137 m = _SHORTCUT_RE.match(href_raw)
138 if m:
139 abbrev_name, arg = m.groups()
140 if not arg:
141 close_tag_left, _ = html.ReadUntilEndTag(
142 it, tag_lexer, 'a')
143 arg = s[open_tag_right:close_tag_left]
144
145 # Hack to so we can write [Wiki Page]($wiki) and have the
146 # link look like /Wiki-Page/
147 if abbrev_name == 'wiki':
148 arg = arg.replace(' ', '-')
149
150 func = _ABBREVIATIONS.get(abbrev_name)
151 if not func:
152 raise RuntimeError('Invalid abbreviation %r' %
153 abbrev_name)
154 new = func(arg)
155
156 if new is not None:
157 out.PrintUntil(href_start)
158 f.write(cgi.escape(new))
159 out.SkipTo(href_end)
160
161 pos = end_pos
162
163 out.PrintTheRest()
164
165 return f.getvalue()
166
167
168class _Plugin(object):
169 """
170 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
171 """
172
173 def __init__(self, s, start_pos, end_pos):
174 # type: (str, int, int) -> None
175 self.s = s
176 self.start_pos = start_pos
177 self.end_pos = end_pos
178
179 def PrintHighlighted(self, out):
180 raise NotImplementedError()
181
182
183# Optional newline at end
184_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
185
186_PROMPT_LINE_RE = re.compile(
187 r'''
188(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
189(.*?) # arbitrary text
190(?: # don't highlight tab completion
191 (&lt;TAB&gt;) # it's HTML escaped!!!
192 .*?
193)?
194(?:
195 [ ][ ]([#] .*) # optionally: two spaces then a comment
196)?
197$
198''', re.VERBOSE)
199
200_EOL_COMMENT_RE = re.compile(
201 r'''
202.*? # arbitrary text
203[ ][ ]([#] .*) # two spaces then a comment
204$
205''', re.VERBOSE)
206
207_COMMENT_LINE_RE = re.compile(r'#.*')
208
209
210def Lines(s, start_pos, end_pos):
211 # type: (str, int, int) -> Iterator[int]
212 """Yields positions in s that end a line."""
213 pos = start_pos
214 while pos < end_pos:
215 m = _LINE_RE.match(s, pos, end_pos)
216 if not m:
217 raise RuntimeError("Should have matched a line")
218 line_end = m.end(0)
219
220 yield line_end
221
222 pos = line_end
223
224
225class ShPromptPlugin(_Plugin):
226 """Highlight shell prompts."""
227
228 def PrintHighlighted(self, out):
229 # type: (html.Output) -> None
230 pos = self.start_pos
231 for line_end in Lines(self.s, self.start_pos, self.end_pos):
232
233 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
234 if m:
235 out.PrintUntil(m.start(0))
236 out.Print('<span class="sh-comment">')
237 out.PrintUntil(m.end(0))
238 out.Print('</span>')
239 else:
240 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
241 if m:
242 #log('MATCH %r', m.groups())
243
244 out.PrintUntil(m.start(1))
245 out.Print('<span class="sh-prompt">')
246 out.PrintUntil(m.end(1))
247 out.Print('</span>')
248
249 out.PrintUntil(m.start(2))
250 out.Print('<span class="sh-command">')
251 out.PrintUntil(m.end(2))
252 out.Print('</span>')
253
254 if m.group(3):
255 out.PrintUntil(m.start(3))
256 out.Print('<span class="sh-tab-complete">')
257 out.PrintUntil(m.end(3))
258 out.Print('</span>')
259
260 if m.group(4):
261 out.PrintUntil(m.start(4))
262 out.Print('<span class="sh-comment">')
263 out.PrintUntil(m.end(4))
264 out.Print('</span>')
265 else:
266 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
267 if m:
268 out.PrintUntil(m.start(1))
269 out.Print('<span class="sh-comment">')
270 out.PrintUntil(m.end(1))
271 out.Print('</span>')
272
273 out.PrintUntil(line_end)
274
275 pos = line_end
276
277
278class HelpTopicsPlugin(_Plugin):
279 """Highlight blocks of doc/ref/toc-*.md."""
280
281 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
282 _Plugin.__init__(self, s, start_pos, end_pos)
283 self.chapter = chapter
284 self.linkify_stop_col = linkify_stop_col
285
286 def PrintHighlighted(self, out):
287 from doctools import help_gen
288
289 debug_out = []
290 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
291 self.linkify_stop_col)
292
293 pos = self.start_pos
294 for line_end in Lines(self.s, self.start_pos, self.end_pos):
295 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
296 # add tags and leave everything alone.
297 line = self.s[pos:line_end]
298
299 html_line = r.Render(line)
300
301 if html_line is not None:
302 out.PrintUntil(pos)
303 out.Print(html_line)
304 out.SkipTo(line_end)
305
306 pos = line_end
307
308 return debug_out
309
310
311class PygmentsPlugin(_Plugin):
312
313 def __init__(self, s, start_pos, end_pos, lang):
314 _Plugin.__init__(self, s, start_pos, end_pos)
315 self.lang = lang
316
317 def PrintHighlighted(self, out):
318 # unescape before passing to pygments, which will escape
319 code = html.ToText(self.s, self.start_pos, self.end_pos)
320
321 lexer = pygments.lexers.get_lexer_by_name(self.lang)
322 formatter = pygments.formatters.HtmlFormatter()
323
324 highlighted = pygments.highlight(code, lexer, formatter)
325 out.Print(highlighted)
326
327
328def SimpleHighlightCode(s):
329 """Simple highlighting for test/shell-vs-shell.sh."""
330
331 f = StringIO()
332 out = html.Output(s, f)
333
334 tag_lexer = html.TagLexer(s)
335
336 pos = 0
337
338 it = html.ValidTokens(s)
339
340 while True:
341 try:
342 tok_id, end_pos = next(it)
343 except StopIteration:
344 break
345
346 if tok_id == html.StartTag:
347
348 tag_lexer.Reset(pos, end_pos)
349 if tag_lexer.TagName() == 'pre':
350 pre_start_pos = pos
351 pre_end_pos = end_pos
352
353 slash_pre_right, slash_pre_right = \
354 html.ReadUntilEndTag(it, tag_lexer, 'pre')
355
356 out.PrintUntil(pre_end_pos)
357
358 # Using ShPromptPlugin because it does the comment highlighting we want!
359 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
360 plugin.PrintHighlighted(out)
361
362 out.SkipTo(slash_pre_right)
363
364 pos = end_pos
365
366 out.PrintTheRest()
367
368 return f.getvalue()
369
370
371CSS_CLASS_RE = re.compile(
372 r'''
373 language-chapter-links-
374 ([a-z0-9-]+) # chapter name
375 (?:_(\d+))? # optional linkify_stop_col
376 ''', re.VERBOSE)
377
378
379def HighlightCode(s, default_highlighter, debug_out=None):
380 # type: (str, Optional[Any], Optional[List]) -> str
381 """
382 Algorithm:
383 1. Collect what's inside <pre><code> ...
384 2. Then read lines with ShPromptPlugin.
385 3. If the line looks like a shell prompt and command, highlight them with
386 <span>
387 """
388 if debug_out is None:
389 debug_out = []
390
391 f = StringIO()
392 out = html.Output(s, f)
393
394 tag_lexer = html.TagLexer(s)
395
396 pos = 0
397
398 it = html.ValidTokens(s)
399
400 while True:
401 try:
402 tok_id, end_pos = next(it)
403 except StopIteration:
404 break
405
406 if tok_id == html.StartTag:
407
408 tag_lexer.Reset(pos, end_pos)
409 if tag_lexer.TagName() == 'pre':
410 pre_start_pos = pos
411 pos = end_pos
412
413 try:
414 tok_id, end_pos = next(it)
415 except StopIteration:
416 break
417
418 tag_lexer.Reset(pos, end_pos)
419 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
420
421 css_class = tag_lexer.GetAttrRaw('class')
422 code_start_pos = end_pos
423
424 if css_class is None:
425 slash_code_left, slash_code_right = \
426 html.ReadUntilEndTag(it, tag_lexer, 'code')
427
428 if default_highlighter is not None:
429 # TODO: Refactor this to remove duplication with
430 # language-{sh-prompt,oil-sh} below
431
432 # oil-sh for compatibility
433 if default_highlighter in ('sh-prompt', 'oils-sh',
434 'oil-sh'):
435 out.PrintUntil(code_start_pos)
436
437 # Using ShPromptPlugin because it does the comment highlighting
438 # we want!
439 plugin = ShPromptPlugin(
440 s, code_start_pos, slash_code_left)
441 plugin.PrintHighlighted(out)
442
443 out.SkipTo(slash_code_left)
444 else:
445 raise RuntimeError(
446 'Unknown default highlighter %r' %
447 default_highlighter)
448
449 elif css_class.startswith('language'):
450 slash_code_left, slash_code_right = \
451 html.ReadUntilEndTag(it, tag_lexer, 'code')
452
453 if css_class == 'language-none':
454 # Allow ```none
455 pass
456
457 elif css_class in ('language-sh-prompt',
458 'language-oil-sh'):
459 # Here's we're KEEPING the original <pre><code>
460 # Print everything up to and including <pre><code language="...">
461 out.PrintUntil(code_start_pos)
462
463 plugin = ShPromptPlugin(s, code_start_pos,
464 slash_code_left)
465 plugin.PrintHighlighted(out)
466
467 out.SkipTo(slash_code_left)
468
469 elif css_class == 'language-ysh':
470 # TODO: Write an Oil syntax highlighter.
471 pass
472
473 elif css_class.startswith('language-chapter-links-'):
474 m = CSS_CLASS_RE.match(css_class)
475 assert m is not None, css_class
476
477 #log('%s GROUPS %s', css_class, m.groups())
478 chapter, num_str = m.groups()
479 if num_str is not None:
480 linkify_stop_col = int(num_str)
481 else:
482 linkify_stop_col = -1
483
484 out.PrintUntil(code_start_pos)
485
486 plugin = HelpTopicsPlugin(s, code_start_pos,
487 slash_code_left, chapter,
488 linkify_stop_col)
489
490 block_debug_info = plugin.PrintHighlighted(out)
491
492 # e.g. these are links to cmd-lang within a block in toc-ysh
493 chap_block = {
494 'to_chap': chapter,
495 'lines': block_debug_info
496 }
497 debug_out.append(chap_block)
498
499 out.SkipTo(slash_code_left)
500
501 else: # language-*: Use Pygments
502 if pygments is None:
503 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
504 )
505 continue
506
507 # We REMOVE the original <pre><code> because
508 # Pygments gives you a <pre> already
509
510 # We just read closing </code>, and the next one
511 # should be </pre>.
512 try:
513 tok_id, end_pos = next(it)
514 except StopIteration:
515 break
516 tag_lexer.Reset(slash_code_right, end_pos)
517 assert tok_id == html.EndTag, tok_id
518 assert (tag_lexer.TagName() == 'pre'
519 ), tag_lexer.TagName()
520 slash_pre_right = end_pos
521
522 out.PrintUntil(pre_start_pos)
523
524 lang = css_class[len('language-'):]
525 plugin = PygmentsPlugin(s, code_start_pos,
526 slash_code_left, lang)
527 plugin.PrintHighlighted(out)
528
529 out.SkipTo(slash_pre_right)
530 f.write('<!-- done pygments -->\n')
531
532 pos = end_pos
533
534 out.PrintTheRest()
535
536 return f.getvalue()
537
538
539def ExtractCode(s, f):
540 """Print code blocks to a plain text file.
541
542 So we can at least validate the syntax.
543
544 Similar to the algorithm code above:
545
546 1. Collect what's inside <pre><code> ...
547 2. Decode &amp; -> &,e tc. and return it
548 """
549 out = html.Output(s, f)
550 tag_lexer = html.TagLexer(s)
551
552 block_num = 0
553 pos = 0
554 it = html.ValidTokens(s)
555
556 while True:
557 try:
558 tok_id, end_pos = next(it)
559 except StopIteration:
560 break
561
562 if tok_id == html.StartTag:
563 tag_lexer.Reset(pos, end_pos)
564 if tag_lexer.TagName() == 'pre':
565 pre_start_pos = pos
566 pos = end_pos
567
568 try:
569 tok_id, end_pos = next(it)
570 except StopIteration:
571 break
572
573 tag_lexer.Reset(pos, end_pos)
574 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
575
576 css_class = tag_lexer.GetAttrRaw('class')
577 # Skip code blocks that look like ```foo
578 # Usually we use 'oil-sh' as the default_highlighter, and
579 # all those code blocks should be extracted. TODO: maybe
580 # this should be oil-language?
581 if css_class is None:
582 code_start_pos = end_pos
583
584 out.SkipTo(code_start_pos)
585 out.Print('# block %d' % block_num)
586 out.Print('\n')
587
588 slash_code_left, slash_code_right = \
589 html.ReadUntilEndTag(it, tag_lexer, 'code')
590
591 text = html.ToText(s, code_start_pos, slash_code_left)
592 out.SkipTo(slash_code_left)
593
594 out.Print(text)
595 out.Print('\n')
596
597 block_num += 1
598
599 pos = end_pos
600
601 #out.PrintTheRest()
602
603
604class ShellSession(object):
605 """
606 TODO: Pass this to HighlightCode as a plugin
607
608 $ x=one
609 $ echo $x
610 $ echo two
611
612 Becomes
613
614 $ x=one
615 $ echo $x
616 one
617 $ echo two
618 two
619
620 And then you will have
621 blog/2019/12/_shell_session/
622 $hash1-stdout.txt
623 $hash2-stdout.txt
624
625 It hashes the command with md5 and then brings it back.
626 If the file already exists then it doesn't run it again.
627 You can delete the file to redo it.
628
629 TODO: write a loop that reads one line at a time, writes, it, then reads
630 output from bash.
631 Use the Lines iterator to get lines.
632 For extra credit, you can solve the PS2 problem? That's easily done with
633 Oil's parser.
634 """
635
636 def __init__(self, shell_exe, cache_dir):
637 """
638 Args:
639 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
640 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
641 """
642 self.shell_exe = shell_exe
643 self.cache_dir = cache_dir
644
645 def PrintHighlighted(self, s, start_pos, end_pos, out):
646 """
647 Args:
648 s: an HTML string.
649 """
650 pass
651
652
653def main(argv):
654 action = argv[1]
655
656 if action == 'highlight':
657 # for test/shell-vs-shell.sh
658
659 html = sys.stdin.read()
660 out = SimpleHighlightCode(html)
661 print(out)
662
663 else:
664 raise RuntimeError('Invalid action %r' % action)
665
666
667if __name__ == '__main__':
668 main(sys.argv)