OILS / doctools / oils_doc.py View on Github | oils.pub

670 lines, 374 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14from _devbuild.gen.htm8_asdl import h8_id
15
16import cgi
17from typing import Iterator
18from typing import Any
19from typing import List
20from typing import Optional
21try:
22 from cStringIO import StringIO
23except ImportError:
24 # for python3
25 from io import StringIO # type: ignore
26import re
27import sys
28
29from doctools.util import log
30from lazylex import html
31
32try:
33 import pygments
34except ImportError:
35 pygments = None
36
37
38class _Abbrev(object):
39
40 def __init__(self, fmt):
41 self.fmt = fmt
42
43 def __call__(self, value):
44 # type: (str) -> str
45 return self.fmt % {'value': value}
46
47
48_ABBREVIATIONS = {
49 'xref':
50 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
51
52 # alias for osh-help, for backward compatibility
53 # to link to the same version
54
55 # TODO: Remove all of these broken links!
56 'help':
57 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
58 'osh-help':
59 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
60 'oil-help':
61 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
62
63 # New style: one for every chapter?
64 # Problem: can't use relative links here, because some are from doc/ref, and
65 # some are from doc
66 'chap-type-method':
67 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
68 'chap-plugin':
69 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
70 'chap-builtin-cmd':
71 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
72
73 # for blog
74 'osh-help-latest':
75 _Abbrev(
76 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
77 ),
78 'oil-help-latest':
79 _Abbrev(
80 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
81 ),
82
83 # For the blog
84 'oils-doc':
85 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
86 'blog-tag':
87 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
88 'oils-commit':
89 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
90 'oils-src':
91 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
92 'blog-code-src':
93 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
94 'issue':
95 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
96 'wiki':
97 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
98}
99
100# Backward compatibility
101_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
102_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
103_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
104
105# $xref:foo
106_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
107
108
109def ExpandLinks(s):
110 # type: (str) -> str
111 """Expand $xref:bash and so forth."""
112 f = StringIO()
113 out = html.Output(s, f)
114
115 tag_lexer = html.TagLexer(s)
116
117 pos = 0
118
119 it = html.ValidTokens(s)
120 while True:
121 try:
122 tok_id, end_pos = next(it)
123 except StopIteration:
124 break
125
126 if tok_id == h8_id.StartTag:
127
128 tag_lexer.Reset(pos, end_pos)
129 if tag_lexer.TagName() == 'a':
130 open_tag_right = end_pos
131
132 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
133 if href_start == -1:
134 continue
135
136 href_raw = s[href_start:href_end]
137
138 new = None
139 m = _SHORTCUT_RE.match(href_raw)
140 if m:
141 abbrev_name, arg = m.groups()
142 if not arg:
143 close_tag_left, _ = html.ReadUntilEndTag(
144 it, tag_lexer, 'a')
145 arg = s[open_tag_right:close_tag_left]
146
147 # Hack to so we can write [Wiki Page]($wiki) and have the
148 # link look like /Wiki-Page/
149 if abbrev_name == 'wiki':
150 arg = arg.replace(' ', '-')
151
152 func = _ABBREVIATIONS.get(abbrev_name)
153 if not func:
154 raise RuntimeError('Invalid abbreviation %r' %
155 abbrev_name)
156 new = func(arg)
157
158 if new is not None:
159 out.PrintUntil(href_start)
160 f.write(cgi.escape(new))
161 out.SkipTo(href_end)
162
163 pos = end_pos
164
165 out.PrintTheRest()
166
167 return f.getvalue()
168
169
170class _Plugin(object):
171 """
172 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
173 """
174
175 def __init__(self, s, start_pos, end_pos):
176 # type: (str, int, int) -> None
177 self.s = s
178 self.start_pos = start_pos
179 self.end_pos = end_pos
180
181 def PrintHighlighted(self, out):
182 raise NotImplementedError()
183
184
185# Optional newline at end
186_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
187
188_PROMPT_LINE_RE = re.compile(
189 r'''
190(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
191(.*?) # arbitrary text
192(?: # don't highlight tab completion
193 (&lt;TAB&gt;) # it's HTML escaped!!!
194 .*?
195)?
196(?:
197 [ ][ ]([#] .*) # optionally: two spaces then a comment
198)?
199$
200''', re.VERBOSE)
201
202_EOL_COMMENT_RE = re.compile(
203 r'''
204.*? # arbitrary text
205[ ][ ]([#] .*) # two spaces then a comment
206$
207''', re.VERBOSE)
208
209_COMMENT_LINE_RE = re.compile(r'#.*')
210
211
212def Lines(s, start_pos, end_pos):
213 # type: (str, int, int) -> Iterator[int]
214 """Yields positions in s that end a line."""
215 pos = start_pos
216 while pos < end_pos:
217 m = _LINE_RE.match(s, pos, end_pos)
218 if not m:
219 raise RuntimeError("Should have matched a line")
220 line_end = m.end(0)
221
222 yield line_end
223
224 pos = line_end
225
226
227class ShPromptPlugin(_Plugin):
228 """Highlight shell prompts."""
229
230 def PrintHighlighted(self, out):
231 # type: (html.Output) -> None
232 pos = self.start_pos
233 for line_end in Lines(self.s, self.start_pos, self.end_pos):
234
235 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
236 if m:
237 out.PrintUntil(m.start(0))
238 out.Print('<span class="sh-comment">')
239 out.PrintUntil(m.end(0))
240 out.Print('</span>')
241 else:
242 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
243 if m:
244 #log('MATCH %r', m.groups())
245
246 out.PrintUntil(m.start(1))
247 out.Print('<span class="sh-prompt">')
248 out.PrintUntil(m.end(1))
249 out.Print('</span>')
250
251 out.PrintUntil(m.start(2))
252 out.Print('<span class="sh-command">')
253 out.PrintUntil(m.end(2))
254 out.Print('</span>')
255
256 if m.group(3):
257 out.PrintUntil(m.start(3))
258 out.Print('<span class="sh-tab-complete">')
259 out.PrintUntil(m.end(3))
260 out.Print('</span>')
261
262 if m.group(4):
263 out.PrintUntil(m.start(4))
264 out.Print('<span class="sh-comment">')
265 out.PrintUntil(m.end(4))
266 out.Print('</span>')
267 else:
268 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
269 if m:
270 out.PrintUntil(m.start(1))
271 out.Print('<span class="sh-comment">')
272 out.PrintUntil(m.end(1))
273 out.Print('</span>')
274
275 out.PrintUntil(line_end)
276
277 pos = line_end
278
279
280class HelpTopicsPlugin(_Plugin):
281 """Highlight blocks of doc/ref/toc-*.md."""
282
283 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
284 _Plugin.__init__(self, s, start_pos, end_pos)
285 self.chapter = chapter
286 self.linkify_stop_col = linkify_stop_col
287
288 def PrintHighlighted(self, out):
289 from doctools import help_gen
290
291 debug_out = []
292 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
293 self.linkify_stop_col)
294
295 pos = self.start_pos
296 for line_end in Lines(self.s, self.start_pos, self.end_pos):
297 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
298 # add tags and leave everything alone.
299 line = self.s[pos:line_end]
300
301 html_line = r.Render(line)
302
303 if html_line is not None:
304 out.PrintUntil(pos)
305 out.Print(html_line)
306 out.SkipTo(line_end)
307
308 pos = line_end
309
310 return debug_out
311
312
313class PygmentsPlugin(_Plugin):
314
315 def __init__(self, s, start_pos, end_pos, lang):
316 _Plugin.__init__(self, s, start_pos, end_pos)
317 self.lang = lang
318
319 def PrintHighlighted(self, out):
320 # unescape before passing to pygments, which will escape
321 code = html.ToText(self.s, self.start_pos, self.end_pos)
322
323 lexer = pygments.lexers.get_lexer_by_name(self.lang)
324 formatter = pygments.formatters.HtmlFormatter()
325
326 highlighted = pygments.highlight(code, lexer, formatter)
327 out.Print(highlighted)
328
329
330def SimpleHighlightCode(s):
331 """Simple highlighting for test/shell-vs-shell.sh."""
332
333 f = StringIO()
334 out = html.Output(s, f)
335
336 tag_lexer = html.TagLexer(s)
337
338 pos = 0
339
340 it = html.ValidTokens(s)
341
342 while True:
343 try:
344 tok_id, end_pos = next(it)
345 except StopIteration:
346 break
347
348 if tok_id == h8_id.StartTag:
349
350 tag_lexer.Reset(pos, end_pos)
351 if tag_lexer.TagName() == 'pre':
352 pre_start_pos = pos
353 pre_end_pos = end_pos
354
355 slash_pre_right, slash_pre_right = \
356 html.ReadUntilEndTag(it, tag_lexer, 'pre')
357
358 out.PrintUntil(pre_end_pos)
359
360 # Using ShPromptPlugin because it does the comment highlighting we want!
361 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
362 plugin.PrintHighlighted(out)
363
364 out.SkipTo(slash_pre_right)
365
366 pos = end_pos
367
368 out.PrintTheRest()
369
370 return f.getvalue()
371
372
373CSS_CLASS_RE = re.compile(
374 r'''
375 language-chapter-links-
376 ([a-z0-9-]+) # chapter name
377 (?:_(\d+))? # optional linkify_stop_col
378 ''', re.VERBOSE)
379
380
381def HighlightCode(s, default_highlighter, debug_out=None):
382 # type: (str, Optional[Any], Optional[List]) -> str
383 """
384 Algorithm:
385 1. Collect what's inside <pre><code> ...
386 2. Then read lines with ShPromptPlugin.
387 3. If the line looks like a shell prompt and command, highlight them with
388 <span>
389 """
390 if debug_out is None:
391 debug_out = []
392
393 f = StringIO()
394 out = html.Output(s, f)
395
396 tag_lexer = html.TagLexer(s)
397
398 pos = 0
399
400 it = html.ValidTokens(s)
401
402 while True:
403 try:
404 tok_id, end_pos = next(it)
405 except StopIteration:
406 break
407
408 if tok_id == h8_id.StartTag:
409
410 tag_lexer.Reset(pos, end_pos)
411 if tag_lexer.TagName() == 'pre':
412 pre_start_pos = pos
413 pos = end_pos
414
415 try:
416 tok_id, end_pos = next(it)
417 except StopIteration:
418 break
419
420 tag_lexer.Reset(pos, end_pos)
421 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
422
423 css_class = tag_lexer.GetAttrRaw('class')
424 code_start_pos = end_pos
425
426 if css_class is None:
427 slash_code_left, slash_code_right = \
428 html.ReadUntilEndTag(it, tag_lexer, 'code')
429
430 if default_highlighter is not None:
431 # TODO: Refactor this to remove duplication with
432 # language-{sh-prompt,oil-sh} below
433
434 # oil-sh for compatibility
435 if default_highlighter in ('sh-prompt', 'oils-sh',
436 'oil-sh'):
437 out.PrintUntil(code_start_pos)
438
439 # Using ShPromptPlugin because it does the comment highlighting
440 # we want!
441 plugin = ShPromptPlugin(
442 s, code_start_pos, slash_code_left)
443 plugin.PrintHighlighted(out)
444
445 out.SkipTo(slash_code_left)
446 else:
447 raise RuntimeError(
448 'Unknown default highlighter %r' %
449 default_highlighter)
450
451 elif css_class.startswith('language'):
452 slash_code_left, slash_code_right = \
453 html.ReadUntilEndTag(it, tag_lexer, 'code')
454
455 if css_class == 'language-none':
456 # Allow ```none
457 pass
458
459 elif css_class in ('language-sh-prompt',
460 'language-oil-sh'):
461 # Here's we're KEEPING the original <pre><code>
462 # Print everything up to and including <pre><code language="...">
463 out.PrintUntil(code_start_pos)
464
465 plugin = ShPromptPlugin(s, code_start_pos,
466 slash_code_left)
467 plugin.PrintHighlighted(out)
468
469 out.SkipTo(slash_code_left)
470
471 elif css_class == 'language-ysh':
472 # TODO: Write an Oil syntax highlighter.
473 pass
474
475 elif css_class.startswith('language-chapter-links-'):
476 m = CSS_CLASS_RE.match(css_class)
477 assert m is not None, css_class
478
479 #log('%s GROUPS %s', css_class, m.groups())
480 chapter, num_str = m.groups()
481 if num_str is not None:
482 linkify_stop_col = int(num_str)
483 else:
484 linkify_stop_col = -1
485
486 out.PrintUntil(code_start_pos)
487
488 plugin = HelpTopicsPlugin(s, code_start_pos,
489 slash_code_left, chapter,
490 linkify_stop_col)
491
492 block_debug_info = plugin.PrintHighlighted(out)
493
494 # e.g. these are links to cmd-lang within a block in toc-ysh
495 chap_block = {
496 'to_chap': chapter,
497 'lines': block_debug_info
498 }
499 debug_out.append(chap_block)
500
501 out.SkipTo(slash_code_left)
502
503 else: # language-*: Use Pygments
504 if pygments is None:
505 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
506 )
507 continue
508
509 # We REMOVE the original <pre><code> because
510 # Pygments gives you a <pre> already
511
512 # We just read closing </code>, and the next one
513 # should be </pre>.
514 try:
515 tok_id, end_pos = next(it)
516 except StopIteration:
517 break
518 tag_lexer.Reset(slash_code_right, end_pos)
519 assert tok_id == h8_id.EndTag, tok_id
520 assert (tag_lexer.TagName() == 'pre'
521 ), tag_lexer.TagName()
522 slash_pre_right = end_pos
523
524 out.PrintUntil(pre_start_pos)
525
526 lang = css_class[len('language-'):]
527 plugin = PygmentsPlugin(s, code_start_pos,
528 slash_code_left, lang)
529 plugin.PrintHighlighted(out)
530
531 out.SkipTo(slash_pre_right)
532 f.write('<!-- done pygments -->\n')
533
534 pos = end_pos
535
536 out.PrintTheRest()
537
538 return f.getvalue()
539
540
541def ExtractCode(s, f):
542 """Print code blocks to a plain text file.
543
544 So we can at least validate the syntax.
545
546 Similar to the algorithm code above:
547
548 1. Collect what's inside <pre><code> ...
549 2. Decode &amp; -> &,e tc. and return it
550 """
551 out = html.Output(s, f)
552 tag_lexer = html.TagLexer(s)
553
554 block_num = 0
555 pos = 0
556 it = html.ValidTokens(s)
557
558 while True:
559 try:
560 tok_id, end_pos = next(it)
561 except StopIteration:
562 break
563
564 if tok_id == h8_id.StartTag:
565 tag_lexer.Reset(pos, end_pos)
566 if tag_lexer.TagName() == 'pre':
567 pre_start_pos = pos
568 pos = end_pos
569
570 try:
571 tok_id, end_pos = next(it)
572 except StopIteration:
573 break
574
575 tag_lexer.Reset(pos, end_pos)
576 if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
577
578 css_class = tag_lexer.GetAttrRaw('class')
579 # Skip code blocks that look like ```foo
580 # Usually we use 'oil-sh' as the default_highlighter, and
581 # all those code blocks should be extracted. TODO: maybe
582 # this should be oil-language?
583 if css_class is None:
584 code_start_pos = end_pos
585
586 out.SkipTo(code_start_pos)
587 out.Print('# block %d' % block_num)
588 out.Print('\n')
589
590 slash_code_left, slash_code_right = \
591 html.ReadUntilEndTag(it, tag_lexer, 'code')
592
593 text = html.ToText(s, code_start_pos, slash_code_left)
594 out.SkipTo(slash_code_left)
595
596 out.Print(text)
597 out.Print('\n')
598
599 block_num += 1
600
601 pos = end_pos
602
603 #out.PrintTheRest()
604
605
606class ShellSession(object):
607 """
608 TODO: Pass this to HighlightCode as a plugin
609
610 $ x=one
611 $ echo $x
612 $ echo two
613
614 Becomes
615
616 $ x=one
617 $ echo $x
618 one
619 $ echo two
620 two
621
622 And then you will have
623 blog/2019/12/_shell_session/
624 $hash1-stdout.txt
625 $hash2-stdout.txt
626
627 It hashes the command with md5 and then brings it back.
628 If the file already exists then it doesn't run it again.
629 You can delete the file to redo it.
630
631 TODO: write a loop that reads one line at a time, writes, it, then reads
632 output from bash.
633 Use the Lines iterator to get lines.
634 For extra credit, you can solve the PS2 problem? That's easily done with
635 Oil's parser.
636 """
637
638 def __init__(self, shell_exe, cache_dir):
639 """
640 Args:
641 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
642 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
643 """
644 self.shell_exe = shell_exe
645 self.cache_dir = cache_dir
646
647 def PrintHighlighted(self, s, start_pos, end_pos, out):
648 """
649 Args:
650 s: an HTML string.
651 """
652 pass
653
654
655def main(argv):
656 action = argv[1]
657
658 if action == 'highlight':
659 # for test/shell-vs-shell.sh
660
661 html = sys.stdin.read()
662 out = SimpleHighlightCode(html)
663 print(out)
664
665 else:
666 raise RuntimeError('Invalid action %r' % action)
667
668
669if __name__ == '__main__':
670 main(sys.argv)