OILS / doctools / oils_doc.py View on Github | oilshell.org

675 lines, 380 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15import cStringIO
16import re
17import sys
18
19from doctools.util import log
20from lazylex import html
21
22
23def RemoveComments(s):
24 """Remove <!-- comments -->"""
25 f = cStringIO.StringIO()
26 out = html.Output(s, f)
27
28 tag_lexer = html.TagLexer(s)
29
30 pos = 0
31
32 for tok_id, end_pos in html.ValidTokens(s):
33 if tok_id == html.Comment:
34 value = s[pos:end_pos]
35 # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
36 if 'REPLACE' not in value:
37 out.PrintUntil(pos)
38 out.SkipTo(end_pos)
39 pos = end_pos
40
41 out.PrintTheRest()
42 return f.getvalue()
43
44
45class _Abbrev(object):
46
47 def __init__(self, fmt):
48 self.fmt = fmt
49
50 def __call__(self, value):
51 return self.fmt % {'value': value}
52
53
54_ABBREVIATIONS = {
55 'xref':
56 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
57
58 # alias for osh-help, for backward compatibility
59 # to link to the same version
60
61 # TODO: Remove all of these broken links!
62 'help':
63 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
64 'osh-help':
65 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
66 'oil-help':
67 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
68
69 # New style: one for every chapter?
70 # Problem: can't use relative links here, because some are from doc/ref, and
71 # some are from doc
72 'chap-type-method':
73 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
74 'chap-plugin':
75 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
76 'chap-builtin-cmd':
77 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
78
79 # for blog
80 'osh-help-latest':
81 _Abbrev(
82 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
83 ),
84 'oil-help-latest':
85 _Abbrev(
86 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
87 ),
88
89 # For the blog
90 'oils-doc':
91 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
92 'blog-tag':
93 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
94 'oils-commit':
95 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
96 'oils-src':
97 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
98 'blog-code-src':
99 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
100 'issue':
101 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
102 'wiki':
103 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
104}
105
106# Backward compatibility
107_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
108_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
109_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
110
111# $xref:foo
112_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
113
114
115def ExpandLinks(s):
116 """Expand $xref:bash and so forth."""
117 f = cStringIO.StringIO()
118 out = html.Output(s, f)
119
120 tag_lexer = html.TagLexer(s)
121
122 pos = 0
123
124 it = html.ValidTokens(s)
125 while True:
126 try:
127 tok_id, end_pos = next(it)
128 except StopIteration:
129 break
130
131 if tok_id == html.StartTag:
132
133 tag_lexer.Reset(pos, end_pos)
134 if tag_lexer.TagName() == 'a':
135 open_tag_right = end_pos
136
137 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
138 if href_start == -1:
139 continue
140
141 href_raw = s[href_start:href_end]
142
143 new = None
144 m = _SHORTCUT_RE.match(href_raw)
145 if m:
146 abbrev_name, arg = m.groups()
147 if not arg:
148 close_tag_left, _ = html.ReadUntilEndTag(
149 it, tag_lexer, 'a')
150 arg = s[open_tag_right:close_tag_left]
151
152 # Hack to so we can write [Wiki Page]($wiki) and have the
153 # link look like /Wiki-Page/
154 if abbrev_name == 'wiki':
155 arg = arg.replace(' ', '-')
156
157 func = _ABBREVIATIONS.get(abbrev_name)
158 if not func:
159 raise RuntimeError('Invalid abbreviation %r' %
160 abbrev_name)
161 new = func(arg)
162
163 if new is not None:
164 out.PrintUntil(href_start)
165 f.write(cgi.escape(new))
166 out.SkipTo(href_end)
167
168 pos = end_pos
169
170 out.PrintTheRest()
171
172 return f.getvalue()
173
174
175class _Plugin(object):
176 """
177 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
178 """
179
180 def __init__(self, s, start_pos, end_pos):
181 self.s = s
182 self.start_pos = start_pos
183 self.end_pos = end_pos
184
185 def PrintHighlighted(self, out):
186 raise NotImplementedError()
187
188
189# Optional newline at end
190_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
191
192_PROMPT_LINE_RE = re.compile(
193 r'''
194(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
195(.*?) # arbitrary text
196(?: # don't highlight tab completion
197 (&lt;TAB&gt;) # it's HTML escaped!!!
198 .*?
199)?
200(?:
201 [ ][ ]([#] .*) # optionally: two spaces then a comment
202)?
203$
204''', re.VERBOSE)
205
206_EOL_COMMENT_RE = re.compile(
207 r'''
208.*? # arbitrary text
209[ ][ ]([#] .*) # two spaces then a comment
210$
211''', re.VERBOSE)
212
213_COMMENT_LINE_RE = re.compile(r'#.*')
214
215
216def Lines(s, start_pos, end_pos):
217 """Yields positions in s that end a line."""
218 pos = start_pos
219 while pos < end_pos:
220 m = _LINE_RE.match(s, pos, end_pos)
221 if not m:
222 raise RuntimeError("Should have matched a line")
223 line_end = m.end(0)
224
225 yield line_end
226
227 pos = line_end
228
229
230class ShPromptPlugin(_Plugin):
231 """Highlight shell prompts."""
232
233 def PrintHighlighted(self, out):
234 pos = self.start_pos
235 for line_end in Lines(self.s, self.start_pos, self.end_pos):
236
237 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
238 if m:
239 out.PrintUntil(m.start(0))
240 out.Print('<span class="sh-comment">')
241 out.PrintUntil(m.end(0))
242 out.Print('</span>')
243 else:
244 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
245 if m:
246 #log('MATCH %r', m.groups())
247
248 out.PrintUntil(m.start(1))
249 out.Print('<span class="sh-prompt">')
250 out.PrintUntil(m.end(1))
251 out.Print('</span>')
252
253 out.PrintUntil(m.start(2))
254 out.Print('<span class="sh-command">')
255 out.PrintUntil(m.end(2))
256 out.Print('</span>')
257
258 if m.group(3):
259 out.PrintUntil(m.start(3))
260 out.Print('<span class="sh-tab-complete">')
261 out.PrintUntil(m.end(3))
262 out.Print('</span>')
263
264 if m.group(4):
265 out.PrintUntil(m.start(4))
266 out.Print('<span class="sh-comment">')
267 out.PrintUntil(m.end(4))
268 out.Print('</span>')
269 else:
270 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
271 if m:
272 out.PrintUntil(m.start(1))
273 out.Print('<span class="sh-comment">')
274 out.PrintUntil(m.end(1))
275 out.Print('</span>')
276
277 out.PrintUntil(line_end)
278
279 pos = line_end
280
281
282class HelpTopicsPlugin(_Plugin):
283 """Highlight blocks of doc/ref/toc-*.md."""
284
285 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
286 _Plugin.__init__(self, s, start_pos, end_pos)
287 self.chapter = chapter
288 self.linkify_stop_col = linkify_stop_col
289
290 def PrintHighlighted(self, out):
291 from doctools import help_gen
292
293 debug_out = []
294 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
295 self.linkify_stop_col)
296
297 pos = self.start_pos
298 for line_end in Lines(self.s, self.start_pos, self.end_pos):
299 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
300 # add tags and leave everything alone.
301 line = self.s[pos:line_end]
302
303 html_line = r.Render(line)
304
305 if html_line is not None:
306 out.PrintUntil(pos)
307 out.Print(html_line)
308 out.SkipTo(line_end)
309
310 pos = line_end
311
312 return debug_out
313
314
315class PygmentsPlugin(_Plugin):
316
317 def __init__(self, s, start_pos, end_pos, lang):
318 _Plugin.__init__(self, s, start_pos, end_pos)
319 self.lang = lang
320
321 def PrintHighlighted(self, out):
322 try:
323 from pygments import lexers
324 from pygments import formatters
325 from pygments import highlight
326 except ImportError:
327 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
328 )
329 return
330
331 # unescape before passing to pygments, which will escape
332 code = html.ToText(self.s, self.start_pos, self.end_pos)
333
334 lexer = lexers.get_lexer_by_name(self.lang)
335 formatter = formatters.HtmlFormatter()
336
337 highlighted = highlight(code, lexer, formatter)
338 out.Print(highlighted)
339
340
341def SimpleHighlightCode(s):
342 """Simple highlighting for test/shell-vs-shell.sh."""
343
344 f = cStringIO.StringIO()
345 out = html.Output(s, f)
346
347 tag_lexer = html.TagLexer(s)
348
349 pos = 0
350
351 it = html.ValidTokens(s)
352
353 while True:
354 try:
355 tok_id, end_pos = next(it)
356 except StopIteration:
357 break
358
359 if tok_id == html.StartTag:
360
361 tag_lexer.Reset(pos, end_pos)
362 if tag_lexer.TagName() == 'pre':
363 pre_start_pos = pos
364 pre_end_pos = end_pos
365
366 slash_pre_right, slash_pre_right = \
367 html.ReadUntilEndTag(it, tag_lexer, 'pre')
368
369 out.PrintUntil(pre_end_pos)
370
371 # Using ShPromptPlugin because it does the comment highlighting we want!
372 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
373 plugin.PrintHighlighted(out)
374
375 out.SkipTo(slash_pre_right)
376
377 pos = end_pos
378
379 out.PrintTheRest()
380
381 return f.getvalue()
382
383
384CSS_CLASS_RE = re.compile(
385 r'''
386 language-chapter-links-
387 ([a-z0-9-]+) # chapter name
388 (?:_(\d+))? # optional linkify_stop_col
389 ''', re.VERBOSE)
390
391
392def HighlightCode(s, default_highlighter, debug_out=None):
393 """
394 Algorithm:
395 1. Collect what's inside <pre><code> ...
396 2. Then read lines with ShPromptPlugin.
397 3. If the line looks like a shell prompt and command, highlight them with
398 <span>
399 """
400 if debug_out is None:
401 debug_out = []
402
403 f = cStringIO.StringIO()
404 out = html.Output(s, f)
405
406 tag_lexer = html.TagLexer(s)
407
408 pos = 0
409
410 it = html.ValidTokens(s)
411
412 while True:
413 try:
414 tok_id, end_pos = next(it)
415 except StopIteration:
416 break
417
418 if tok_id == html.StartTag:
419
420 tag_lexer.Reset(pos, end_pos)
421 if tag_lexer.TagName() == 'pre':
422 pre_start_pos = pos
423 pos = end_pos
424
425 try:
426 tok_id, end_pos = next(it)
427 except StopIteration:
428 break
429
430 tag_lexer.Reset(pos, end_pos)
431 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
432
433 css_class = tag_lexer.GetAttrRaw('class')
434 code_start_pos = end_pos
435
436 if css_class is None:
437 slash_code_left, slash_code_right = \
438 html.ReadUntilEndTag(it, tag_lexer, 'code')
439
440 if default_highlighter is not None:
441 # TODO: Refactor this to remove duplication with
442 # language-{sh-prompt,oil-sh} below
443
444 # oil-sh for compatibility
445 if default_highlighter in ('sh-prompt', 'oils-sh',
446 'oil-sh'):
447 out.PrintUntil(code_start_pos)
448
449 # Using ShPromptPlugin because it does the comment highlighting
450 # we want!
451 plugin = ShPromptPlugin(
452 s, code_start_pos, slash_code_left)
453 plugin.PrintHighlighted(out)
454
455 out.SkipTo(slash_code_left)
456 else:
457 raise RuntimeError(
458 'Unknown default highlighter %r' %
459 default_highlighter)
460
461 elif css_class.startswith('language'):
462 slash_code_left, slash_code_right = \
463 html.ReadUntilEndTag(it, tag_lexer, 'code')
464
465 if css_class == 'language-none':
466 # Allow ```none
467 pass
468
469 elif css_class in ('language-sh-prompt',
470 'language-oil-sh'):
471 # Here's we're KEEPING the original <pre><code>
472 # Print everything up to and including <pre><code language="...">
473 out.PrintUntil(code_start_pos)
474
475 plugin = ShPromptPlugin(s, code_start_pos,
476 slash_code_left)
477 plugin.PrintHighlighted(out)
478
479 out.SkipTo(slash_code_left)
480
481 elif css_class == 'language-ysh':
482 # TODO: Write an Oil syntax highlighter.
483 pass
484
485 elif css_class.startswith('language-chapter-links-'):
486 m = CSS_CLASS_RE.match(css_class)
487 assert m is not None, css_class
488
489 #log('%s GROUPS %s', css_class, m.groups())
490 chapter, num_str = m.groups()
491 if num_str is not None:
492 linkify_stop_col = int(num_str)
493 else:
494 linkify_stop_col = -1
495
496 out.PrintUntil(code_start_pos)
497
498 plugin = HelpTopicsPlugin(s, code_start_pos,
499 slash_code_left, chapter,
500 linkify_stop_col)
501
502 block_debug_info = plugin.PrintHighlighted(out)
503
504 # e.g. these are links to cmd-lang within a block in toc-ysh
505 chap_block = {
506 'to_chap': chapter,
507 'lines': block_debug_info
508 }
509 debug_out.append(chap_block)
510
511 out.SkipTo(slash_code_left)
512
513 else: # language-*: Use Pygments
514 # We REMOVE the original <pre><code> because
515 # Pygments gives you a <pre> already
516
517 # We just read closing </code>, and the next one
518 # should be </pre>.
519 try:
520 tok_id, end_pos = next(it)
521 except StopIteration:
522 break
523 tag_lexer.Reset(slash_code_right, end_pos)
524 assert tok_id == html.EndTag, tok_id
525 assert tag_lexer.TagName(
526 ) == 'pre', tag_lexer.TagName()
527 slash_pre_right = end_pos
528
529 out.PrintUntil(pre_start_pos)
530
531 lang = css_class[len('language-'):]
532 plugin = PygmentsPlugin(s, code_start_pos,
533 slash_code_left, lang)
534 plugin.PrintHighlighted(out)
535
536 out.SkipTo(slash_pre_right)
537 f.write('<!-- done pygments -->\n')
538
539 pos = end_pos
540
541 out.PrintTheRest()
542
543 return f.getvalue()
544
545
546def ExtractCode(s, f):
547 """Print code blocks to a plain text file.
548
549 So we can at least validate the syntax.
550
551 Similar to the algorithm code above:
552
553 1. Collect what's inside <pre><code> ...
554 2. Decode &amp; -> &,e tc. and return it
555 """
556 out = html.Output(s, f)
557 tag_lexer = html.TagLexer(s)
558
559 block_num = 0
560 pos = 0
561 it = html.ValidTokens(s)
562
563 while True:
564 try:
565 tok_id, end_pos = next(it)
566 except StopIteration:
567 break
568
569 if tok_id == html.StartTag:
570 tag_lexer.Reset(pos, end_pos)
571 if tag_lexer.TagName() == 'pre':
572 pre_start_pos = pos
573 pos = end_pos
574
575 try:
576 tok_id, end_pos = next(it)
577 except StopIteration:
578 break
579
580 tag_lexer.Reset(pos, end_pos)
581 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
582
583 css_class = tag_lexer.GetAttrRaw('class')
584 # Skip code blocks that look like ```foo
585 # Usually we use 'oil-sh' as the default_highlighter, and
586 # all those code blocks should be extracted. TODO: maybe
587 # this should be oil-language?
588 if css_class is None:
589 code_start_pos = end_pos
590
591 out.SkipTo(code_start_pos)
592 out.Print('# block %d' % block_num)
593 out.Print('\n')
594
595 slash_code_left, slash_code_right = \
596 html.ReadUntilEndTag(it, tag_lexer, 'code')
597
598 text = html.ToText(s, code_start_pos, slash_code_left)
599 out.SkipTo(slash_code_left)
600
601 out.Print(text)
602 out.Print('\n')
603
604 block_num += 1
605
606 pos = end_pos
607
608 #out.PrintTheRest()
609
610
611class ShellSession(object):
612 """
613 TODO: Pass this to HighlightCode as a plugin
614
615 $ x=one
616 $ echo $x
617 $ echo two
618
619 Becomes
620
621 $ x=one
622 $ echo $x
623 one
624 $ echo two
625 two
626
627 And then you will have
628 blog/2019/12/_shell_session/
629 $hash1-stdout.txt
630 $hash2-stdout.txt
631
632 It hashes the command with md5 and then brings it back.
633 If the file already exists then it doesn't run it again.
634 You can delete the file to redo it.
635
636 TODO: write a loop that reads one line at a time, writes, it, then reads
637 output from bash.
638 Use the Lines iterator to get lines.
639 For extra credit, you can solve the PS2 problem? That's easily done with
640 Oil's parser.
641 """
642
643 def __init__(self, shell_exe, cache_dir):
644 """
645 Args:
646 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
647 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
648 """
649 self.shell_exe = shell_exe
650 self.cache_dir = cache_dir
651
652 def PrintHighlighted(self, s, start_pos, end_pos, out):
653 """
654 Args:
655 s: an HTML string.
656 """
657 pass
658
659
660def main(argv):
661 action = argv[1]
662
663 if action == 'highlight':
664 # for test/shell-vs-shell.sh
665
666 html = sys.stdin.read()
667 out = SimpleHighlightCode(html)
668 print(out)
669
670 else:
671 raise RuntimeError('Invalid action %r' % action)
672
673
674if __name__ == '__main__':
675 main(sys.argv)