OILS / doctools / oils_doc.py View on Github | oils.pub

678 lines, 383 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15try:
16 from cStringIO import StringIO
17except ImportError:
18 from io import StringIO # python3
19import re
20import sys
21
22from doctools.util import log
23from lazylex import html
24
25
26def RemoveComments(s):
27 """Remove <!-- comments -->"""
28 f = StringIO()
29 out = html.Output(s, f)
30
31 tag_lexer = html.TagLexer(s)
32
33 pos = 0
34
35 for tok_id, end_pos in html.ValidTokens(s):
36 if tok_id == html.Comment:
37 value = s[pos:end_pos]
38 # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
39 if 'REPLACE' not in value:
40 out.PrintUntil(pos)
41 out.SkipTo(end_pos)
42 pos = end_pos
43
44 out.PrintTheRest()
45 return f.getvalue()
46
47
48class _Abbrev(object):
49
50 def __init__(self, fmt):
51 self.fmt = fmt
52
53 def __call__(self, value):
54 return self.fmt % {'value': value}
55
56
57_ABBREVIATIONS = {
58 'xref':
59 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
60
61 # alias for osh-help, for backward compatibility
62 # to link to the same version
63
64 # TODO: Remove all of these broken links!
65 'help':
66 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
67 'osh-help':
68 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
69 'oil-help':
70 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
71
72 # New style: one for every chapter?
73 # Problem: can't use relative links here, because some are from doc/ref, and
74 # some are from doc
75 'chap-type-method':
76 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
77 'chap-plugin':
78 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
79 'chap-builtin-cmd':
80 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
81
82 # for blog
83 'osh-help-latest':
84 _Abbrev(
85 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
86 ),
87 'oil-help-latest':
88 _Abbrev(
89 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
90 ),
91
92 # For the blog
93 'oils-doc':
94 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
95 'blog-tag':
96 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
97 'oils-commit':
98 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
99 'oils-src':
100 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
101 'blog-code-src':
102 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
103 'issue':
104 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
105 'wiki':
106 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
107}
108
109# Backward compatibility
110_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
111_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
112_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
113
114# $xref:foo
115_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
116
117
118def ExpandLinks(s):
119 """Expand $xref:bash and so forth."""
120 f = StringIO()
121 out = html.Output(s, f)
122
123 tag_lexer = html.TagLexer(s)
124
125 pos = 0
126
127 it = html.ValidTokens(s)
128 while True:
129 try:
130 tok_id, end_pos = next(it)
131 except StopIteration:
132 break
133
134 if tok_id == html.StartTag:
135
136 tag_lexer.Reset(pos, end_pos)
137 if tag_lexer.TagName() == 'a':
138 open_tag_right = end_pos
139
140 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
141 if href_start == -1:
142 continue
143
144 href_raw = s[href_start:href_end]
145
146 new = None
147 m = _SHORTCUT_RE.match(href_raw)
148 if m:
149 abbrev_name, arg = m.groups()
150 if not arg:
151 close_tag_left, _ = html.ReadUntilEndTag(
152 it, tag_lexer, 'a')
153 arg = s[open_tag_right:close_tag_left]
154
155 # Hack to so we can write [Wiki Page]($wiki) and have the
156 # link look like /Wiki-Page/
157 if abbrev_name == 'wiki':
158 arg = arg.replace(' ', '-')
159
160 func = _ABBREVIATIONS.get(abbrev_name)
161 if not func:
162 raise RuntimeError('Invalid abbreviation %r' %
163 abbrev_name)
164 new = func(arg)
165
166 if new is not None:
167 out.PrintUntil(href_start)
168 f.write(cgi.escape(new))
169 out.SkipTo(href_end)
170
171 pos = end_pos
172
173 out.PrintTheRest()
174
175 return f.getvalue()
176
177
178class _Plugin(object):
179 """
180 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
181 """
182
183 def __init__(self, s, start_pos, end_pos):
184 self.s = s
185 self.start_pos = start_pos
186 self.end_pos = end_pos
187
188 def PrintHighlighted(self, out):
189 raise NotImplementedError()
190
191
192# Optional newline at end
193_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
194
195_PROMPT_LINE_RE = re.compile(
196 r'''
197(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
198(.*?) # arbitrary text
199(?: # don't highlight tab completion
200 (&lt;TAB&gt;) # it's HTML escaped!!!
201 .*?
202)?
203(?:
204 [ ][ ]([#] .*) # optionally: two spaces then a comment
205)?
206$
207''', re.VERBOSE)
208
209_EOL_COMMENT_RE = re.compile(
210 r'''
211.*? # arbitrary text
212[ ][ ]([#] .*) # two spaces then a comment
213$
214''', re.VERBOSE)
215
216_COMMENT_LINE_RE = re.compile(r'#.*')
217
218
219def Lines(s, start_pos, end_pos):
220 """Yields positions in s that end a line."""
221 pos = start_pos
222 while pos < end_pos:
223 m = _LINE_RE.match(s, pos, end_pos)
224 if not m:
225 raise RuntimeError("Should have matched a line")
226 line_end = m.end(0)
227
228 yield line_end
229
230 pos = line_end
231
232
233class ShPromptPlugin(_Plugin):
234 """Highlight shell prompts."""
235
236 def PrintHighlighted(self, out):
237 pos = self.start_pos
238 for line_end in Lines(self.s, self.start_pos, self.end_pos):
239
240 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
241 if m:
242 out.PrintUntil(m.start(0))
243 out.Print('<span class="sh-comment">')
244 out.PrintUntil(m.end(0))
245 out.Print('</span>')
246 else:
247 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
248 if m:
249 #log('MATCH %r', m.groups())
250
251 out.PrintUntil(m.start(1))
252 out.Print('<span class="sh-prompt">')
253 out.PrintUntil(m.end(1))
254 out.Print('</span>')
255
256 out.PrintUntil(m.start(2))
257 out.Print('<span class="sh-command">')
258 out.PrintUntil(m.end(2))
259 out.Print('</span>')
260
261 if m.group(3):
262 out.PrintUntil(m.start(3))
263 out.Print('<span class="sh-tab-complete">')
264 out.PrintUntil(m.end(3))
265 out.Print('</span>')
266
267 if m.group(4):
268 out.PrintUntil(m.start(4))
269 out.Print('<span class="sh-comment">')
270 out.PrintUntil(m.end(4))
271 out.Print('</span>')
272 else:
273 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
274 if m:
275 out.PrintUntil(m.start(1))
276 out.Print('<span class="sh-comment">')
277 out.PrintUntil(m.end(1))
278 out.Print('</span>')
279
280 out.PrintUntil(line_end)
281
282 pos = line_end
283
284
285class HelpTopicsPlugin(_Plugin):
286 """Highlight blocks of doc/ref/toc-*.md."""
287
288 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
289 _Plugin.__init__(self, s, start_pos, end_pos)
290 self.chapter = chapter
291 self.linkify_stop_col = linkify_stop_col
292
293 def PrintHighlighted(self, out):
294 from doctools import help_gen
295
296 debug_out = []
297 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
298 self.linkify_stop_col)
299
300 pos = self.start_pos
301 for line_end in Lines(self.s, self.start_pos, self.end_pos):
302 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
303 # add tags and leave everything alone.
304 line = self.s[pos:line_end]
305
306 html_line = r.Render(line)
307
308 if html_line is not None:
309 out.PrintUntil(pos)
310 out.Print(html_line)
311 out.SkipTo(line_end)
312
313 pos = line_end
314
315 return debug_out
316
317
318class PygmentsPlugin(_Plugin):
319
320 def __init__(self, s, start_pos, end_pos, lang):
321 _Plugin.__init__(self, s, start_pos, end_pos)
322 self.lang = lang
323
324 def PrintHighlighted(self, out):
325 try:
326 from pygments import lexers
327 from pygments import formatters
328 from pygments import highlight
329 except ImportError:
330 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
331 )
332 return
333
334 # unescape before passing to pygments, which will escape
335 code = html.ToText(self.s, self.start_pos, self.end_pos)
336
337 lexer = lexers.get_lexer_by_name(self.lang)
338 formatter = formatters.HtmlFormatter()
339
340 highlighted = highlight(code, lexer, formatter)
341 out.Print(highlighted)
342
343
344def SimpleHighlightCode(s):
345 """Simple highlighting for test/shell-vs-shell.sh."""
346
347 f = StringIO()
348 out = html.Output(s, f)
349
350 tag_lexer = html.TagLexer(s)
351
352 pos = 0
353
354 it = html.ValidTokens(s)
355
356 while True:
357 try:
358 tok_id, end_pos = next(it)
359 except StopIteration:
360 break
361
362 if tok_id == html.StartTag:
363
364 tag_lexer.Reset(pos, end_pos)
365 if tag_lexer.TagName() == 'pre':
366 pre_start_pos = pos
367 pre_end_pos = end_pos
368
369 slash_pre_right, slash_pre_right = \
370 html.ReadUntilEndTag(it, tag_lexer, 'pre')
371
372 out.PrintUntil(pre_end_pos)
373
374 # Using ShPromptPlugin because it does the comment highlighting we want!
375 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
376 plugin.PrintHighlighted(out)
377
378 out.SkipTo(slash_pre_right)
379
380 pos = end_pos
381
382 out.PrintTheRest()
383
384 return f.getvalue()
385
386
387CSS_CLASS_RE = re.compile(
388 r'''
389 language-chapter-links-
390 ([a-z0-9-]+) # chapter name
391 (?:_(\d+))? # optional linkify_stop_col
392 ''', re.VERBOSE)
393
394
395def HighlightCode(s, default_highlighter, debug_out=None):
396 """
397 Algorithm:
398 1. Collect what's inside <pre><code> ...
399 2. Then read lines with ShPromptPlugin.
400 3. If the line looks like a shell prompt and command, highlight them with
401 <span>
402 """
403 if debug_out is None:
404 debug_out = []
405
406 f = StringIO()
407 out = html.Output(s, f)
408
409 tag_lexer = html.TagLexer(s)
410
411 pos = 0
412
413 it = html.ValidTokens(s)
414
415 while True:
416 try:
417 tok_id, end_pos = next(it)
418 except StopIteration:
419 break
420
421 if tok_id == html.StartTag:
422
423 tag_lexer.Reset(pos, end_pos)
424 if tag_lexer.TagName() == 'pre':
425 pre_start_pos = pos
426 pos = end_pos
427
428 try:
429 tok_id, end_pos = next(it)
430 except StopIteration:
431 break
432
433 tag_lexer.Reset(pos, end_pos)
434 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
435
436 css_class = tag_lexer.GetAttrRaw('class')
437 code_start_pos = end_pos
438
439 if css_class is None:
440 slash_code_left, slash_code_right = \
441 html.ReadUntilEndTag(it, tag_lexer, 'code')
442
443 if default_highlighter is not None:
444 # TODO: Refactor this to remove duplication with
445 # language-{sh-prompt,oil-sh} below
446
447 # oil-sh for compatibility
448 if default_highlighter in ('sh-prompt', 'oils-sh',
449 'oil-sh'):
450 out.PrintUntil(code_start_pos)
451
452 # Using ShPromptPlugin because it does the comment highlighting
453 # we want!
454 plugin = ShPromptPlugin(
455 s, code_start_pos, slash_code_left)
456 plugin.PrintHighlighted(out)
457
458 out.SkipTo(slash_code_left)
459 else:
460 raise RuntimeError(
461 'Unknown default highlighter %r' %
462 default_highlighter)
463
464 elif css_class.startswith('language'):
465 slash_code_left, slash_code_right = \
466 html.ReadUntilEndTag(it, tag_lexer, 'code')
467
468 if css_class == 'language-none':
469 # Allow ```none
470 pass
471
472 elif css_class in ('language-sh-prompt',
473 'language-oil-sh'):
474 # Here's we're KEEPING the original <pre><code>
475 # Print everything up to and including <pre><code language="...">
476 out.PrintUntil(code_start_pos)
477
478 plugin = ShPromptPlugin(s, code_start_pos,
479 slash_code_left)
480 plugin.PrintHighlighted(out)
481
482 out.SkipTo(slash_code_left)
483
484 elif css_class == 'language-ysh':
485 # TODO: Write an Oil syntax highlighter.
486 pass
487
488 elif css_class.startswith('language-chapter-links-'):
489 m = CSS_CLASS_RE.match(css_class)
490 assert m is not None, css_class
491
492 #log('%s GROUPS %s', css_class, m.groups())
493 chapter, num_str = m.groups()
494 if num_str is not None:
495 linkify_stop_col = int(num_str)
496 else:
497 linkify_stop_col = -1
498
499 out.PrintUntil(code_start_pos)
500
501 plugin = HelpTopicsPlugin(s, code_start_pos,
502 slash_code_left, chapter,
503 linkify_stop_col)
504
505 block_debug_info = plugin.PrintHighlighted(out)
506
507 # e.g. these are links to cmd-lang within a block in toc-ysh
508 chap_block = {
509 'to_chap': chapter,
510 'lines': block_debug_info
511 }
512 debug_out.append(chap_block)
513
514 out.SkipTo(slash_code_left)
515
516 else: # language-*: Use Pygments
517 # We REMOVE the original <pre><code> because
518 # Pygments gives you a <pre> already
519
520 # We just read closing </code>, and the next one
521 # should be </pre>.
522 try:
523 tok_id, end_pos = next(it)
524 except StopIteration:
525 break
526 tag_lexer.Reset(slash_code_right, end_pos)
527 assert tok_id == html.EndTag, tok_id
528 assert tag_lexer.TagName(
529 ) == 'pre', tag_lexer.TagName()
530 slash_pre_right = end_pos
531
532 out.PrintUntil(pre_start_pos)
533
534 lang = css_class[len('language-'):]
535 plugin = PygmentsPlugin(s, code_start_pos,
536 slash_code_left, lang)
537 plugin.PrintHighlighted(out)
538
539 out.SkipTo(slash_pre_right)
540 f.write('<!-- done pygments -->\n')
541
542 pos = end_pos
543
544 out.PrintTheRest()
545
546 return f.getvalue()
547
548
549def ExtractCode(s, f):
550 """Print code blocks to a plain text file.
551
552 So we can at least validate the syntax.
553
554 Similar to the algorithm code above:
555
556 1. Collect what's inside <pre><code> ...
557 2. Decode &amp; -> &,e tc. and return it
558 """
559 out = html.Output(s, f)
560 tag_lexer = html.TagLexer(s)
561
562 block_num = 0
563 pos = 0
564 it = html.ValidTokens(s)
565
566 while True:
567 try:
568 tok_id, end_pos = next(it)
569 except StopIteration:
570 break
571
572 if tok_id == html.StartTag:
573 tag_lexer.Reset(pos, end_pos)
574 if tag_lexer.TagName() == 'pre':
575 pre_start_pos = pos
576 pos = end_pos
577
578 try:
579 tok_id, end_pos = next(it)
580 except StopIteration:
581 break
582
583 tag_lexer.Reset(pos, end_pos)
584 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
585
586 css_class = tag_lexer.GetAttrRaw('class')
587 # Skip code blocks that look like ```foo
588 # Usually we use 'oil-sh' as the default_highlighter, and
589 # all those code blocks should be extracted. TODO: maybe
590 # this should be oil-language?
591 if css_class is None:
592 code_start_pos = end_pos
593
594 out.SkipTo(code_start_pos)
595 out.Print('# block %d' % block_num)
596 out.Print('\n')
597
598 slash_code_left, slash_code_right = \
599 html.ReadUntilEndTag(it, tag_lexer, 'code')
600
601 text = html.ToText(s, code_start_pos, slash_code_left)
602 out.SkipTo(slash_code_left)
603
604 out.Print(text)
605 out.Print('\n')
606
607 block_num += 1
608
609 pos = end_pos
610
611 #out.PrintTheRest()
612
613
614class ShellSession(object):
615 """
616 TODO: Pass this to HighlightCode as a plugin
617
618 $ x=one
619 $ echo $x
620 $ echo two
621
622 Becomes
623
624 $ x=one
625 $ echo $x
626 one
627 $ echo two
628 two
629
630 And then you will have
631 blog/2019/12/_shell_session/
632 $hash1-stdout.txt
633 $hash2-stdout.txt
634
635 It hashes the command with md5 and then brings it back.
636 If the file already exists then it doesn't run it again.
637 You can delete the file to redo it.
638
639 TODO: write a loop that reads one line at a time, writes, it, then reads
640 output from bash.
641 Use the Lines iterator to get lines.
642 For extra credit, you can solve the PS2 problem? That's easily done with
643 Oil's parser.
644 """
645
646 def __init__(self, shell_exe, cache_dir):
647 """
648 Args:
649 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
650 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
651 """
652 self.shell_exe = shell_exe
653 self.cache_dir = cache_dir
654
655 def PrintHighlighted(self, s, start_pos, end_pos, out):
656 """
657 Args:
658 s: an HTML string.
659 """
660 pass
661
662
663def main(argv):
664 action = argv[1]
665
666 if action == 'highlight':
667 # for test/shell-vs-shell.sh
668
669 html = sys.stdin.read()
670 out = SimpleHighlightCode(html)
671 print(out)
672
673 else:
674 raise RuntimeError('Invalid action %r' % action)
675
676
677if __name__ == '__main__':
678 main(sys.argv)