OILS / doctools / oils_doc.py View on Github | oils.pub

657 lines, 369 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15try:
16 from cStringIO import StringIO
17except ImportError:
18 from io import StringIO # python3
19import re
20import sys
21
22from doctools.util import log
23from lazylex import html
24
25try:
26 import pygments
27except ImportError:
28 pygments = None
29
30
31class _Abbrev(object):
32
33 def __init__(self, fmt):
34 self.fmt = fmt
35
36 def __call__(self, value):
37 return self.fmt % {'value': value}
38
39
40_ABBREVIATIONS = {
41 'xref':
42 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
43
44 # alias for osh-help, for backward compatibility
45 # to link to the same version
46
47 # TODO: Remove all of these broken links!
48 'help':
49 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
50 'osh-help':
51 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
52 'oil-help':
53 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
54
55 # New style: one for every chapter?
56 # Problem: can't use relative links here, because some are from doc/ref, and
57 # some are from doc
58 'chap-type-method':
59 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
60 'chap-plugin':
61 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
62 'chap-builtin-cmd':
63 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
64
65 # for blog
66 'osh-help-latest':
67 _Abbrev(
68 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
69 ),
70 'oil-help-latest':
71 _Abbrev(
72 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
73 ),
74
75 # For the blog
76 'oils-doc':
77 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
78 'blog-tag':
79 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
80 'oils-commit':
81 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
82 'oils-src':
83 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
84 'blog-code-src':
85 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
86 'issue':
87 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
88 'wiki':
89 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
90}
91
92# Backward compatibility
93_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
94_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
95_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
96
97# $xref:foo
98_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
99
100
101def ExpandLinks(s):
102 """Expand $xref:bash and so forth."""
103 f = StringIO()
104 out = html.Output(s, f)
105
106 tag_lexer = html.TagLexer(s)
107
108 pos = 0
109
110 it = html.ValidTokens(s)
111 while True:
112 try:
113 tok_id, end_pos = next(it)
114 except StopIteration:
115 break
116
117 if tok_id == html.StartTag:
118
119 tag_lexer.Reset(pos, end_pos)
120 if tag_lexer.TagName() == 'a':
121 open_tag_right = end_pos
122
123 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
124 if href_start == -1:
125 continue
126
127 href_raw = s[href_start:href_end]
128
129 new = None
130 m = _SHORTCUT_RE.match(href_raw)
131 if m:
132 abbrev_name, arg = m.groups()
133 if not arg:
134 close_tag_left, _ = html.ReadUntilEndTag(
135 it, tag_lexer, 'a')
136 arg = s[open_tag_right:close_tag_left]
137
138 # Hack to so we can write [Wiki Page]($wiki) and have the
139 # link look like /Wiki-Page/
140 if abbrev_name == 'wiki':
141 arg = arg.replace(' ', '-')
142
143 func = _ABBREVIATIONS.get(abbrev_name)
144 if not func:
145 raise RuntimeError('Invalid abbreviation %r' %
146 abbrev_name)
147 new = func(arg)
148
149 if new is not None:
150 out.PrintUntil(href_start)
151 f.write(cgi.escape(new))
152 out.SkipTo(href_end)
153
154 pos = end_pos
155
156 out.PrintTheRest()
157
158 return f.getvalue()
159
160
161class _Plugin(object):
162 """
163 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
164 """
165
166 def __init__(self, s, start_pos, end_pos):
167 self.s = s
168 self.start_pos = start_pos
169 self.end_pos = end_pos
170
171 def PrintHighlighted(self, out):
172 raise NotImplementedError()
173
174
175# Optional newline at end
176_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
177
178_PROMPT_LINE_RE = re.compile(
179 r'''
180(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
181(.*?) # arbitrary text
182(?: # don't highlight tab completion
183 (&lt;TAB&gt;) # it's HTML escaped!!!
184 .*?
185)?
186(?:
187 [ ][ ]([#] .*) # optionally: two spaces then a comment
188)?
189$
190''', re.VERBOSE)
191
192_EOL_COMMENT_RE = re.compile(
193 r'''
194.*? # arbitrary text
195[ ][ ]([#] .*) # two spaces then a comment
196$
197''', re.VERBOSE)
198
199_COMMENT_LINE_RE = re.compile(r'#.*')
200
201
202def Lines(s, start_pos, end_pos):
203 """Yields positions in s that end a line."""
204 pos = start_pos
205 while pos < end_pos:
206 m = _LINE_RE.match(s, pos, end_pos)
207 if not m:
208 raise RuntimeError("Should have matched a line")
209 line_end = m.end(0)
210
211 yield line_end
212
213 pos = line_end
214
215
216class ShPromptPlugin(_Plugin):
217 """Highlight shell prompts."""
218
219 def PrintHighlighted(self, out):
220 pos = self.start_pos
221 for line_end in Lines(self.s, self.start_pos, self.end_pos):
222
223 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
224 if m:
225 out.PrintUntil(m.start(0))
226 out.Print('<span class="sh-comment">')
227 out.PrintUntil(m.end(0))
228 out.Print('</span>')
229 else:
230 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
231 if m:
232 #log('MATCH %r', m.groups())
233
234 out.PrintUntil(m.start(1))
235 out.Print('<span class="sh-prompt">')
236 out.PrintUntil(m.end(1))
237 out.Print('</span>')
238
239 out.PrintUntil(m.start(2))
240 out.Print('<span class="sh-command">')
241 out.PrintUntil(m.end(2))
242 out.Print('</span>')
243
244 if m.group(3):
245 out.PrintUntil(m.start(3))
246 out.Print('<span class="sh-tab-complete">')
247 out.PrintUntil(m.end(3))
248 out.Print('</span>')
249
250 if m.group(4):
251 out.PrintUntil(m.start(4))
252 out.Print('<span class="sh-comment">')
253 out.PrintUntil(m.end(4))
254 out.Print('</span>')
255 else:
256 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
257 if m:
258 out.PrintUntil(m.start(1))
259 out.Print('<span class="sh-comment">')
260 out.PrintUntil(m.end(1))
261 out.Print('</span>')
262
263 out.PrintUntil(line_end)
264
265 pos = line_end
266
267
268class HelpTopicsPlugin(_Plugin):
269 """Highlight blocks of doc/ref/toc-*.md."""
270
271 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
272 _Plugin.__init__(self, s, start_pos, end_pos)
273 self.chapter = chapter
274 self.linkify_stop_col = linkify_stop_col
275
276 def PrintHighlighted(self, out):
277 from doctools import help_gen
278
279 debug_out = []
280 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
281 self.linkify_stop_col)
282
283 pos = self.start_pos
284 for line_end in Lines(self.s, self.start_pos, self.end_pos):
285 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
286 # add tags and leave everything alone.
287 line = self.s[pos:line_end]
288
289 html_line = r.Render(line)
290
291 if html_line is not None:
292 out.PrintUntil(pos)
293 out.Print(html_line)
294 out.SkipTo(line_end)
295
296 pos = line_end
297
298 return debug_out
299
300
301class PygmentsPlugin(_Plugin):
302
303 def __init__(self, s, start_pos, end_pos, lang):
304 _Plugin.__init__(self, s, start_pos, end_pos)
305 self.lang = lang
306
307 def PrintHighlighted(self, out):
308 # unescape before passing to pygments, which will escape
309 code = html.ToText(self.s, self.start_pos, self.end_pos)
310
311 lexer = pygments.lexers.get_lexer_by_name(self.lang)
312 formatter = pygments.formatters.HtmlFormatter()
313
314 highlighted = pygments.highlight(code, lexer, formatter)
315 out.Print(highlighted)
316
317
318def SimpleHighlightCode(s):
319 """Simple highlighting for test/shell-vs-shell.sh."""
320
321 f = StringIO()
322 out = html.Output(s, f)
323
324 tag_lexer = html.TagLexer(s)
325
326 pos = 0
327
328 it = html.ValidTokens(s)
329
330 while True:
331 try:
332 tok_id, end_pos = next(it)
333 except StopIteration:
334 break
335
336 if tok_id == html.StartTag:
337
338 tag_lexer.Reset(pos, end_pos)
339 if tag_lexer.TagName() == 'pre':
340 pre_start_pos = pos
341 pre_end_pos = end_pos
342
343 slash_pre_right, slash_pre_right = \
344 html.ReadUntilEndTag(it, tag_lexer, 'pre')
345
346 out.PrintUntil(pre_end_pos)
347
348 # Using ShPromptPlugin because it does the comment highlighting we want!
349 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
350 plugin.PrintHighlighted(out)
351
352 out.SkipTo(slash_pre_right)
353
354 pos = end_pos
355
356 out.PrintTheRest()
357
358 return f.getvalue()
359
360
361CSS_CLASS_RE = re.compile(
362 r'''
363 language-chapter-links-
364 ([a-z0-9-]+) # chapter name
365 (?:_(\d+))? # optional linkify_stop_col
366 ''', re.VERBOSE)
367
368
369def HighlightCode(s, default_highlighter, debug_out=None):
370 """
371 Algorithm:
372 1. Collect what's inside <pre><code> ...
373 2. Then read lines with ShPromptPlugin.
374 3. If the line looks like a shell prompt and command, highlight them with
375 <span>
376 """
377 if debug_out is None:
378 debug_out = []
379
380 f = StringIO()
381 out = html.Output(s, f)
382
383 tag_lexer = html.TagLexer(s)
384
385 pos = 0
386
387 it = html.ValidTokens(s)
388
389 while True:
390 try:
391 tok_id, end_pos = next(it)
392 except StopIteration:
393 break
394
395 if tok_id == html.StartTag:
396
397 tag_lexer.Reset(pos, end_pos)
398 if tag_lexer.TagName() == 'pre':
399 pre_start_pos = pos
400 pos = end_pos
401
402 try:
403 tok_id, end_pos = next(it)
404 except StopIteration:
405 break
406
407 tag_lexer.Reset(pos, end_pos)
408 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
409
410 css_class = tag_lexer.GetAttrRaw('class')
411 code_start_pos = end_pos
412
413 if css_class is None:
414 slash_code_left, slash_code_right = \
415 html.ReadUntilEndTag(it, tag_lexer, 'code')
416
417 if default_highlighter is not None:
418 # TODO: Refactor this to remove duplication with
419 # language-{sh-prompt,oil-sh} below
420
421 # oil-sh for compatibility
422 if default_highlighter in ('sh-prompt', 'oils-sh',
423 'oil-sh'):
424 out.PrintUntil(code_start_pos)
425
426 # Using ShPromptPlugin because it does the comment highlighting
427 # we want!
428 plugin = ShPromptPlugin(
429 s, code_start_pos, slash_code_left)
430 plugin.PrintHighlighted(out)
431
432 out.SkipTo(slash_code_left)
433 else:
434 raise RuntimeError(
435 'Unknown default highlighter %r' %
436 default_highlighter)
437
438 elif css_class.startswith('language'):
439 slash_code_left, slash_code_right = \
440 html.ReadUntilEndTag(it, tag_lexer, 'code')
441
442 if css_class == 'language-none':
443 # Allow ```none
444 pass
445
446 elif css_class in ('language-sh-prompt',
447 'language-oil-sh'):
448 # Here's we're KEEPING the original <pre><code>
449 # Print everything up to and including <pre><code language="...">
450 out.PrintUntil(code_start_pos)
451
452 plugin = ShPromptPlugin(s, code_start_pos,
453 slash_code_left)
454 plugin.PrintHighlighted(out)
455
456 out.SkipTo(slash_code_left)
457
458 elif css_class == 'language-ysh':
459 # TODO: Write an Oil syntax highlighter.
460 pass
461
462 elif css_class.startswith('language-chapter-links-'):
463 m = CSS_CLASS_RE.match(css_class)
464 assert m is not None, css_class
465
466 #log('%s GROUPS %s', css_class, m.groups())
467 chapter, num_str = m.groups()
468 if num_str is not None:
469 linkify_stop_col = int(num_str)
470 else:
471 linkify_stop_col = -1
472
473 out.PrintUntil(code_start_pos)
474
475 plugin = HelpTopicsPlugin(s, code_start_pos,
476 slash_code_left, chapter,
477 linkify_stop_col)
478
479 block_debug_info = plugin.PrintHighlighted(out)
480
481 # e.g. these are links to cmd-lang within a block in toc-ysh
482 chap_block = {
483 'to_chap': chapter,
484 'lines': block_debug_info
485 }
486 debug_out.append(chap_block)
487
488 out.SkipTo(slash_code_left)
489
490 else: # language-*: Use Pygments
491 if pygments is None:
492 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
493 )
494 continue
495
496 # We REMOVE the original <pre><code> because
497 # Pygments gives you a <pre> already
498
499 # We just read closing </code>, and the next one
500 # should be </pre>.
501 try:
502 tok_id, end_pos = next(it)
503 except StopIteration:
504 break
505 tag_lexer.Reset(slash_code_right, end_pos)
506 assert tok_id == html.EndTag, tok_id
507 assert (tag_lexer.TagName() == 'pre'
508 ), tag_lexer.TagName()
509 slash_pre_right = end_pos
510
511 out.PrintUntil(pre_start_pos)
512
513 lang = css_class[len('language-'):]
514 plugin = PygmentsPlugin(s, code_start_pos,
515 slash_code_left, lang)
516 plugin.PrintHighlighted(out)
517
518 out.SkipTo(slash_pre_right)
519 f.write('<!-- done pygments -->\n')
520
521 pos = end_pos
522
523 out.PrintTheRest()
524
525 return f.getvalue()
526
527
528def ExtractCode(s, f):
529 """Print code blocks to a plain text file.
530
531 So we can at least validate the syntax.
532
533 Similar to the algorithm code above:
534
535 1. Collect what's inside <pre><code> ...
536 2. Decode &amp; -> &,e tc. and return it
537 """
538 out = html.Output(s, f)
539 tag_lexer = html.TagLexer(s)
540
541 block_num = 0
542 pos = 0
543 it = html.ValidTokens(s)
544
545 while True:
546 try:
547 tok_id, end_pos = next(it)
548 except StopIteration:
549 break
550
551 if tok_id == html.StartTag:
552 tag_lexer.Reset(pos, end_pos)
553 if tag_lexer.TagName() == 'pre':
554 pre_start_pos = pos
555 pos = end_pos
556
557 try:
558 tok_id, end_pos = next(it)
559 except StopIteration:
560 break
561
562 tag_lexer.Reset(pos, end_pos)
563 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
564
565 css_class = tag_lexer.GetAttrRaw('class')
566 # Skip code blocks that look like ```foo
567 # Usually we use 'oil-sh' as the default_highlighter, and
568 # all those code blocks should be extracted. TODO: maybe
569 # this should be oil-language?
570 if css_class is None:
571 code_start_pos = end_pos
572
573 out.SkipTo(code_start_pos)
574 out.Print('# block %d' % block_num)
575 out.Print('\n')
576
577 slash_code_left, slash_code_right = \
578 html.ReadUntilEndTag(it, tag_lexer, 'code')
579
580 text = html.ToText(s, code_start_pos, slash_code_left)
581 out.SkipTo(slash_code_left)
582
583 out.Print(text)
584 out.Print('\n')
585
586 block_num += 1
587
588 pos = end_pos
589
590 #out.PrintTheRest()
591
592
593class ShellSession(object):
594 """
595 TODO: Pass this to HighlightCode as a plugin
596
597 $ x=one
598 $ echo $x
599 $ echo two
600
601 Becomes
602
603 $ x=one
604 $ echo $x
605 one
606 $ echo two
607 two
608
609 And then you will have
610 blog/2019/12/_shell_session/
611 $hash1-stdout.txt
612 $hash2-stdout.txt
613
614 It hashes the command with md5 and then brings it back.
615 If the file already exists then it doesn't run it again.
616 You can delete the file to redo it.
617
618 TODO: write a loop that reads one line at a time, writes, it, then reads
619 output from bash.
620 Use the Lines iterator to get lines.
621 For extra credit, you can solve the PS2 problem? That's easily done with
622 Oil's parser.
623 """
624
625 def __init__(self, shell_exe, cache_dir):
626 """
627 Args:
628 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
629 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
630 """
631 self.shell_exe = shell_exe
632 self.cache_dir = cache_dir
633
634 def PrintHighlighted(self, s, start_pos, end_pos, out):
635 """
636 Args:
637 s: an HTML string.
638 """
639 pass
640
641
642def main(argv):
643 action = argv[1]
644
645 if action == 'highlight':
646 # for test/shell-vs-shell.sh
647
648 html = sys.stdin.read()
649 out = SimpleHighlightCode(html)
650 print(out)
651
652 else:
653 raise RuntimeError('Invalid action %r' % action)
654
655
656if __name__ == '__main__':
657 main(sys.argv)