OILS / doctools / oils_doc.py View on Github | oils.pub

658 lines, 369 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15try:
16 from cStringIO import StringIO
17except ImportError:
18 # for python3
19 from io import StringIO # type: ignore
20import re
21import sys
22
23from doctools.util import log
24from lazylex import html
25
26try:
27 import pygments
28except ImportError:
29 pygments = None
30
31
32class _Abbrev(object):
33
34 def __init__(self, fmt):
35 self.fmt = fmt
36
37 def __call__(self, value):
38 return self.fmt % {'value': value}
39
40
41_ABBREVIATIONS = {
42 'xref':
43 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
44
45 # alias for osh-help, for backward compatibility
46 # to link to the same version
47
48 # TODO: Remove all of these broken links!
49 'help':
50 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
51 'osh-help':
52 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
53 'oil-help':
54 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
55
56 # New style: one for every chapter?
57 # Problem: can't use relative links here, because some are from doc/ref, and
58 # some are from doc
59 'chap-type-method':
60 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
61 'chap-plugin':
62 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
63 'chap-builtin-cmd':
64 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
65
66 # for blog
67 'osh-help-latest':
68 _Abbrev(
69 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
70 ),
71 'oil-help-latest':
72 _Abbrev(
73 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
74 ),
75
76 # For the blog
77 'oils-doc':
78 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
79 'blog-tag':
80 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
81 'oils-commit':
82 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
83 'oils-src':
84 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
85 'blog-code-src':
86 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
87 'issue':
88 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
89 'wiki':
90 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
91}
92
93# Backward compatibility
94_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
95_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
96_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
97
98# $xref:foo
99_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
100
101
102def ExpandLinks(s):
103 """Expand $xref:bash and so forth."""
104 f = StringIO()
105 out = html.Output(s, f)
106
107 tag_lexer = html.TagLexer(s)
108
109 pos = 0
110
111 it = html.ValidTokens(s)
112 while True:
113 try:
114 tok_id, end_pos = next(it)
115 except StopIteration:
116 break
117
118 if tok_id == html.StartTag:
119
120 tag_lexer.Reset(pos, end_pos)
121 if tag_lexer.TagName() == 'a':
122 open_tag_right = end_pos
123
124 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
125 if href_start == -1:
126 continue
127
128 href_raw = s[href_start:href_end]
129
130 new = None
131 m = _SHORTCUT_RE.match(href_raw)
132 if m:
133 abbrev_name, arg = m.groups()
134 if not arg:
135 close_tag_left, _ = html.ReadUntilEndTag(
136 it, tag_lexer, 'a')
137 arg = s[open_tag_right:close_tag_left]
138
139 # Hack to so we can write [Wiki Page]($wiki) and have the
140 # link look like /Wiki-Page/
141 if abbrev_name == 'wiki':
142 arg = arg.replace(' ', '-')
143
144 func = _ABBREVIATIONS.get(abbrev_name)
145 if not func:
146 raise RuntimeError('Invalid abbreviation %r' %
147 abbrev_name)
148 new = func(arg)
149
150 if new is not None:
151 out.PrintUntil(href_start)
152 f.write(cgi.escape(new))
153 out.SkipTo(href_end)
154
155 pos = end_pos
156
157 out.PrintTheRest()
158
159 return f.getvalue()
160
161
162class _Plugin(object):
163 """
164 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
165 """
166
167 def __init__(self, s, start_pos, end_pos):
168 self.s = s
169 self.start_pos = start_pos
170 self.end_pos = end_pos
171
172 def PrintHighlighted(self, out):
173 raise NotImplementedError()
174
175
176# Optional newline at end
177_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
178
179_PROMPT_LINE_RE = re.compile(
180 r'''
181(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
182(.*?) # arbitrary text
183(?: # don't highlight tab completion
184 (&lt;TAB&gt;) # it's HTML escaped!!!
185 .*?
186)?
187(?:
188 [ ][ ]([#] .*) # optionally: two spaces then a comment
189)?
190$
191''', re.VERBOSE)
192
193_EOL_COMMENT_RE = re.compile(
194 r'''
195.*? # arbitrary text
196[ ][ ]([#] .*) # two spaces then a comment
197$
198''', re.VERBOSE)
199
200_COMMENT_LINE_RE = re.compile(r'#.*')
201
202
203def Lines(s, start_pos, end_pos):
204 """Yields positions in s that end a line."""
205 pos = start_pos
206 while pos < end_pos:
207 m = _LINE_RE.match(s, pos, end_pos)
208 if not m:
209 raise RuntimeError("Should have matched a line")
210 line_end = m.end(0)
211
212 yield line_end
213
214 pos = line_end
215
216
217class ShPromptPlugin(_Plugin):
218 """Highlight shell prompts."""
219
220 def PrintHighlighted(self, out):
221 pos = self.start_pos
222 for line_end in Lines(self.s, self.start_pos, self.end_pos):
223
224 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
225 if m:
226 out.PrintUntil(m.start(0))
227 out.Print('<span class="sh-comment">')
228 out.PrintUntil(m.end(0))
229 out.Print('</span>')
230 else:
231 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
232 if m:
233 #log('MATCH %r', m.groups())
234
235 out.PrintUntil(m.start(1))
236 out.Print('<span class="sh-prompt">')
237 out.PrintUntil(m.end(1))
238 out.Print('</span>')
239
240 out.PrintUntil(m.start(2))
241 out.Print('<span class="sh-command">')
242 out.PrintUntil(m.end(2))
243 out.Print('</span>')
244
245 if m.group(3):
246 out.PrintUntil(m.start(3))
247 out.Print('<span class="sh-tab-complete">')
248 out.PrintUntil(m.end(3))
249 out.Print('</span>')
250
251 if m.group(4):
252 out.PrintUntil(m.start(4))
253 out.Print('<span class="sh-comment">')
254 out.PrintUntil(m.end(4))
255 out.Print('</span>')
256 else:
257 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
258 if m:
259 out.PrintUntil(m.start(1))
260 out.Print('<span class="sh-comment">')
261 out.PrintUntil(m.end(1))
262 out.Print('</span>')
263
264 out.PrintUntil(line_end)
265
266 pos = line_end
267
268
269class HelpTopicsPlugin(_Plugin):
270 """Highlight blocks of doc/ref/toc-*.md."""
271
272 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
273 _Plugin.__init__(self, s, start_pos, end_pos)
274 self.chapter = chapter
275 self.linkify_stop_col = linkify_stop_col
276
277 def PrintHighlighted(self, out):
278 from doctools import help_gen
279
280 debug_out = []
281 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
282 self.linkify_stop_col)
283
284 pos = self.start_pos
285 for line_end in Lines(self.s, self.start_pos, self.end_pos):
286 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
287 # add tags and leave everything alone.
288 line = self.s[pos:line_end]
289
290 html_line = r.Render(line)
291
292 if html_line is not None:
293 out.PrintUntil(pos)
294 out.Print(html_line)
295 out.SkipTo(line_end)
296
297 pos = line_end
298
299 return debug_out
300
301
302class PygmentsPlugin(_Plugin):
303
304 def __init__(self, s, start_pos, end_pos, lang):
305 _Plugin.__init__(self, s, start_pos, end_pos)
306 self.lang = lang
307
308 def PrintHighlighted(self, out):
309 # unescape before passing to pygments, which will escape
310 code = html.ToText(self.s, self.start_pos, self.end_pos)
311
312 lexer = pygments.lexers.get_lexer_by_name(self.lang)
313 formatter = pygments.formatters.HtmlFormatter()
314
315 highlighted = pygments.highlight(code, lexer, formatter)
316 out.Print(highlighted)
317
318
319def SimpleHighlightCode(s):
320 """Simple highlighting for test/shell-vs-shell.sh."""
321
322 f = StringIO()
323 out = html.Output(s, f)
324
325 tag_lexer = html.TagLexer(s)
326
327 pos = 0
328
329 it = html.ValidTokens(s)
330
331 while True:
332 try:
333 tok_id, end_pos = next(it)
334 except StopIteration:
335 break
336
337 if tok_id == html.StartTag:
338
339 tag_lexer.Reset(pos, end_pos)
340 if tag_lexer.TagName() == 'pre':
341 pre_start_pos = pos
342 pre_end_pos = end_pos
343
344 slash_pre_right, slash_pre_right = \
345 html.ReadUntilEndTag(it, tag_lexer, 'pre')
346
347 out.PrintUntil(pre_end_pos)
348
349 # Using ShPromptPlugin because it does the comment highlighting we want!
350 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
351 plugin.PrintHighlighted(out)
352
353 out.SkipTo(slash_pre_right)
354
355 pos = end_pos
356
357 out.PrintTheRest()
358
359 return f.getvalue()
360
361
362CSS_CLASS_RE = re.compile(
363 r'''
364 language-chapter-links-
365 ([a-z0-9-]+) # chapter name
366 (?:_(\d+))? # optional linkify_stop_col
367 ''', re.VERBOSE)
368
369
370def HighlightCode(s, default_highlighter, debug_out=None):
371 """
372 Algorithm:
373 1. Collect what's inside <pre><code> ...
374 2. Then read lines with ShPromptPlugin.
375 3. If the line looks like a shell prompt and command, highlight them with
376 <span>
377 """
378 if debug_out is None:
379 debug_out = []
380
381 f = StringIO()
382 out = html.Output(s, f)
383
384 tag_lexer = html.TagLexer(s)
385
386 pos = 0
387
388 it = html.ValidTokens(s)
389
390 while True:
391 try:
392 tok_id, end_pos = next(it)
393 except StopIteration:
394 break
395
396 if tok_id == html.StartTag:
397
398 tag_lexer.Reset(pos, end_pos)
399 if tag_lexer.TagName() == 'pre':
400 pre_start_pos = pos
401 pos = end_pos
402
403 try:
404 tok_id, end_pos = next(it)
405 except StopIteration:
406 break
407
408 tag_lexer.Reset(pos, end_pos)
409 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
410
411 css_class = tag_lexer.GetAttrRaw('class')
412 code_start_pos = end_pos
413
414 if css_class is None:
415 slash_code_left, slash_code_right = \
416 html.ReadUntilEndTag(it, tag_lexer, 'code')
417
418 if default_highlighter is not None:
419 # TODO: Refactor this to remove duplication with
420 # language-{sh-prompt,oil-sh} below
421
422 # oil-sh for compatibility
423 if default_highlighter in ('sh-prompt', 'oils-sh',
424 'oil-sh'):
425 out.PrintUntil(code_start_pos)
426
427 # Using ShPromptPlugin because it does the comment highlighting
428 # we want!
429 plugin = ShPromptPlugin(
430 s, code_start_pos, slash_code_left)
431 plugin.PrintHighlighted(out)
432
433 out.SkipTo(slash_code_left)
434 else:
435 raise RuntimeError(
436 'Unknown default highlighter %r' %
437 default_highlighter)
438
439 elif css_class.startswith('language'):
440 slash_code_left, slash_code_right = \
441 html.ReadUntilEndTag(it, tag_lexer, 'code')
442
443 if css_class == 'language-none':
444 # Allow ```none
445 pass
446
447 elif css_class in ('language-sh-prompt',
448 'language-oil-sh'):
449 # Here's we're KEEPING the original <pre><code>
450 # Print everything up to and including <pre><code language="...">
451 out.PrintUntil(code_start_pos)
452
453 plugin = ShPromptPlugin(s, code_start_pos,
454 slash_code_left)
455 plugin.PrintHighlighted(out)
456
457 out.SkipTo(slash_code_left)
458
459 elif css_class == 'language-ysh':
460 # TODO: Write an Oil syntax highlighter.
461 pass
462
463 elif css_class.startswith('language-chapter-links-'):
464 m = CSS_CLASS_RE.match(css_class)
465 assert m is not None, css_class
466
467 #log('%s GROUPS %s', css_class, m.groups())
468 chapter, num_str = m.groups()
469 if num_str is not None:
470 linkify_stop_col = int(num_str)
471 else:
472 linkify_stop_col = -1
473
474 out.PrintUntil(code_start_pos)
475
476 plugin = HelpTopicsPlugin(s, code_start_pos,
477 slash_code_left, chapter,
478 linkify_stop_col)
479
480 block_debug_info = plugin.PrintHighlighted(out)
481
482 # e.g. these are links to cmd-lang within a block in toc-ysh
483 chap_block = {
484 'to_chap': chapter,
485 'lines': block_debug_info
486 }
487 debug_out.append(chap_block)
488
489 out.SkipTo(slash_code_left)
490
491 else: # language-*: Use Pygments
492 if pygments is None:
493 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
494 )
495 continue
496
497 # We REMOVE the original <pre><code> because
498 # Pygments gives you a <pre> already
499
500 # We just read closing </code>, and the next one
501 # should be </pre>.
502 try:
503 tok_id, end_pos = next(it)
504 except StopIteration:
505 break
506 tag_lexer.Reset(slash_code_right, end_pos)
507 assert tok_id == html.EndTag, tok_id
508 assert (tag_lexer.TagName() == 'pre'
509 ), tag_lexer.TagName()
510 slash_pre_right = end_pos
511
512 out.PrintUntil(pre_start_pos)
513
514 lang = css_class[len('language-'):]
515 plugin = PygmentsPlugin(s, code_start_pos,
516 slash_code_left, lang)
517 plugin.PrintHighlighted(out)
518
519 out.SkipTo(slash_pre_right)
520 f.write('<!-- done pygments -->\n')
521
522 pos = end_pos
523
524 out.PrintTheRest()
525
526 return f.getvalue()
527
528
529def ExtractCode(s, f):
530 """Print code blocks to a plain text file.
531
532 So we can at least validate the syntax.
533
534 Similar to the algorithm code above:
535
536 1. Collect what's inside <pre><code> ...
537 2. Decode &amp; -> &,e tc. and return it
538 """
539 out = html.Output(s, f)
540 tag_lexer = html.TagLexer(s)
541
542 block_num = 0
543 pos = 0
544 it = html.ValidTokens(s)
545
546 while True:
547 try:
548 tok_id, end_pos = next(it)
549 except StopIteration:
550 break
551
552 if tok_id == html.StartTag:
553 tag_lexer.Reset(pos, end_pos)
554 if tag_lexer.TagName() == 'pre':
555 pre_start_pos = pos
556 pos = end_pos
557
558 try:
559 tok_id, end_pos = next(it)
560 except StopIteration:
561 break
562
563 tag_lexer.Reset(pos, end_pos)
564 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
565
566 css_class = tag_lexer.GetAttrRaw('class')
567 # Skip code blocks that look like ```foo
568 # Usually we use 'oil-sh' as the default_highlighter, and
569 # all those code blocks should be extracted. TODO: maybe
570 # this should be oil-language?
571 if css_class is None:
572 code_start_pos = end_pos
573
574 out.SkipTo(code_start_pos)
575 out.Print('# block %d' % block_num)
576 out.Print('\n')
577
578 slash_code_left, slash_code_right = \
579 html.ReadUntilEndTag(it, tag_lexer, 'code')
580
581 text = html.ToText(s, code_start_pos, slash_code_left)
582 out.SkipTo(slash_code_left)
583
584 out.Print(text)
585 out.Print('\n')
586
587 block_num += 1
588
589 pos = end_pos
590
591 #out.PrintTheRest()
592
593
594class ShellSession(object):
595 """
596 TODO: Pass this to HighlightCode as a plugin
597
598 $ x=one
599 $ echo $x
600 $ echo two
601
602 Becomes
603
604 $ x=one
605 $ echo $x
606 one
607 $ echo two
608 two
609
610 And then you will have
611 blog/2019/12/_shell_session/
612 $hash1-stdout.txt
613 $hash2-stdout.txt
614
615 It hashes the command with md5 and then brings it back.
616 If the file already exists then it doesn't run it again.
617 You can delete the file to redo it.
618
619 TODO: write a loop that reads one line at a time, writes, it, then reads
620 output from bash.
621 Use the Lines iterator to get lines.
622 For extra credit, you can solve the PS2 problem? That's easily done with
623 Oil's parser.
624 """
625
626 def __init__(self, shell_exe, cache_dir):
627 """
628 Args:
629 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
630 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
631 """
632 self.shell_exe = shell_exe
633 self.cache_dir = cache_dir
634
635 def PrintHighlighted(self, s, start_pos, end_pos, out):
636 """
637 Args:
638 s: an HTML string.
639 """
640 pass
641
642
643def main(argv):
644 action = argv[1]
645
646 if action == 'highlight':
647 # for test/shell-vs-shell.sh
648
649 html = sys.stdin.read()
650 out = SimpleHighlightCode(html)
651 print(out)
652
653 else:
654 raise RuntimeError('Invalid action %r' % action)
655
656
657if __name__ == '__main__':
658 main(sys.argv)