OILS / doctools / oils_doc.py View on Github | oils.pub

656 lines, 369 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15try:
16 from cStringIO import StringIO
17except ImportError:
18 from io import StringIO # python3
19import re
20import sys
21
22from doctools.util import log
23from lazylex import html
24
25
26class _Abbrev(object):
27
28 def __init__(self, fmt):
29 self.fmt = fmt
30
31 def __call__(self, value):
32 return self.fmt % {'value': value}
33
34
35_ABBREVIATIONS = {
36 'xref':
37 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
38
39 # alias for osh-help, for backward compatibility
40 # to link to the same version
41
42 # TODO: Remove all of these broken links!
43 'help':
44 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
45 'osh-help':
46 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
47 'oil-help':
48 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
49
50 # New style: one for every chapter?
51 # Problem: can't use relative links here, because some are from doc/ref, and
52 # some are from doc
53 'chap-type-method':
54 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
55 'chap-plugin':
56 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
57 'chap-builtin-cmd':
58 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
59
60 # for blog
61 'osh-help-latest':
62 _Abbrev(
63 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
64 ),
65 'oil-help-latest':
66 _Abbrev(
67 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
68 ),
69
70 # For the blog
71 'oils-doc':
72 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
73 'blog-tag':
74 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
75 'oils-commit':
76 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
77 'oils-src':
78 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
79 'blog-code-src':
80 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
81 'issue':
82 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
83 'wiki':
84 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
85}
86
87# Backward compatibility
88_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
89_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
90_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
91
92# $xref:foo
93_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
94
95
96def ExpandLinks(s):
97 """Expand $xref:bash and so forth."""
98 f = StringIO()
99 out = html.Output(s, f)
100
101 tag_lexer = html.TagLexer(s)
102
103 pos = 0
104
105 it = html.ValidTokens(s)
106 while True:
107 try:
108 tok_id, end_pos = next(it)
109 except StopIteration:
110 break
111
112 if tok_id == html.StartTag:
113
114 tag_lexer.Reset(pos, end_pos)
115 if tag_lexer.TagName() == 'a':
116 open_tag_right = end_pos
117
118 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
119 if href_start == -1:
120 continue
121
122 href_raw = s[href_start:href_end]
123
124 new = None
125 m = _SHORTCUT_RE.match(href_raw)
126 if m:
127 abbrev_name, arg = m.groups()
128 if not arg:
129 close_tag_left, _ = html.ReadUntilEndTag(
130 it, tag_lexer, 'a')
131 arg = s[open_tag_right:close_tag_left]
132
133 # Hack to so we can write [Wiki Page]($wiki) and have the
134 # link look like /Wiki-Page/
135 if abbrev_name == 'wiki':
136 arg = arg.replace(' ', '-')
137
138 func = _ABBREVIATIONS.get(abbrev_name)
139 if not func:
140 raise RuntimeError('Invalid abbreviation %r' %
141 abbrev_name)
142 new = func(arg)
143
144 if new is not None:
145 out.PrintUntil(href_start)
146 f.write(cgi.escape(new))
147 out.SkipTo(href_end)
148
149 pos = end_pos
150
151 out.PrintTheRest()
152
153 return f.getvalue()
154
155
156class _Plugin(object):
157 """
158 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
159 """
160
161 def __init__(self, s, start_pos, end_pos):
162 self.s = s
163 self.start_pos = start_pos
164 self.end_pos = end_pos
165
166 def PrintHighlighted(self, out):
167 raise NotImplementedError()
168
169
170# Optional newline at end
171_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
172
173_PROMPT_LINE_RE = re.compile(
174 r'''
175(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
176(.*?) # arbitrary text
177(?: # don't highlight tab completion
178 (&lt;TAB&gt;) # it's HTML escaped!!!
179 .*?
180)?
181(?:
182 [ ][ ]([#] .*) # optionally: two spaces then a comment
183)?
184$
185''', re.VERBOSE)
186
187_EOL_COMMENT_RE = re.compile(
188 r'''
189.*? # arbitrary text
190[ ][ ]([#] .*) # two spaces then a comment
191$
192''', re.VERBOSE)
193
194_COMMENT_LINE_RE = re.compile(r'#.*')
195
196
197def Lines(s, start_pos, end_pos):
198 """Yields positions in s that end a line."""
199 pos = start_pos
200 while pos < end_pos:
201 m = _LINE_RE.match(s, pos, end_pos)
202 if not m:
203 raise RuntimeError("Should have matched a line")
204 line_end = m.end(0)
205
206 yield line_end
207
208 pos = line_end
209
210
211class ShPromptPlugin(_Plugin):
212 """Highlight shell prompts."""
213
214 def PrintHighlighted(self, out):
215 pos = self.start_pos
216 for line_end in Lines(self.s, self.start_pos, self.end_pos):
217
218 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
219 if m:
220 out.PrintUntil(m.start(0))
221 out.Print('<span class="sh-comment">')
222 out.PrintUntil(m.end(0))
223 out.Print('</span>')
224 else:
225 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
226 if m:
227 #log('MATCH %r', m.groups())
228
229 out.PrintUntil(m.start(1))
230 out.Print('<span class="sh-prompt">')
231 out.PrintUntil(m.end(1))
232 out.Print('</span>')
233
234 out.PrintUntil(m.start(2))
235 out.Print('<span class="sh-command">')
236 out.PrintUntil(m.end(2))
237 out.Print('</span>')
238
239 if m.group(3):
240 out.PrintUntil(m.start(3))
241 out.Print('<span class="sh-tab-complete">')
242 out.PrintUntil(m.end(3))
243 out.Print('</span>')
244
245 if m.group(4):
246 out.PrintUntil(m.start(4))
247 out.Print('<span class="sh-comment">')
248 out.PrintUntil(m.end(4))
249 out.Print('</span>')
250 else:
251 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
252 if m:
253 out.PrintUntil(m.start(1))
254 out.Print('<span class="sh-comment">')
255 out.PrintUntil(m.end(1))
256 out.Print('</span>')
257
258 out.PrintUntil(line_end)
259
260 pos = line_end
261
262
263class HelpTopicsPlugin(_Plugin):
264 """Highlight blocks of doc/ref/toc-*.md."""
265
266 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
267 _Plugin.__init__(self, s, start_pos, end_pos)
268 self.chapter = chapter
269 self.linkify_stop_col = linkify_stop_col
270
271 def PrintHighlighted(self, out):
272 from doctools import help_gen
273
274 debug_out = []
275 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
276 self.linkify_stop_col)
277
278 pos = self.start_pos
279 for line_end in Lines(self.s, self.start_pos, self.end_pos):
280 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
281 # add tags and leave everything alone.
282 line = self.s[pos:line_end]
283
284 html_line = r.Render(line)
285
286 if html_line is not None:
287 out.PrintUntil(pos)
288 out.Print(html_line)
289 out.SkipTo(line_end)
290
291 pos = line_end
292
293 return debug_out
294
295
296class PygmentsPlugin(_Plugin):
297
298 def __init__(self, s, start_pos, end_pos, lang):
299 _Plugin.__init__(self, s, start_pos, end_pos)
300 self.lang = lang
301
302 def PrintHighlighted(self, out):
303 try:
304 from pygments import lexers
305 from pygments import formatters
306 from pygments import highlight
307 except ImportError:
308 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
309 )
310 return
311
312 # unescape before passing to pygments, which will escape
313 code = html.ToText(self.s, self.start_pos, self.end_pos)
314
315 lexer = lexers.get_lexer_by_name(self.lang)
316 formatter = formatters.HtmlFormatter()
317
318 highlighted = highlight(code, lexer, formatter)
319 out.Print(highlighted)
320
321
322def SimpleHighlightCode(s):
323 """Simple highlighting for test/shell-vs-shell.sh."""
324
325 f = StringIO()
326 out = html.Output(s, f)
327
328 tag_lexer = html.TagLexer(s)
329
330 pos = 0
331
332 it = html.ValidTokens(s)
333
334 while True:
335 try:
336 tok_id, end_pos = next(it)
337 except StopIteration:
338 break
339
340 if tok_id == html.StartTag:
341
342 tag_lexer.Reset(pos, end_pos)
343 if tag_lexer.TagName() == 'pre':
344 pre_start_pos = pos
345 pre_end_pos = end_pos
346
347 slash_pre_right, slash_pre_right = \
348 html.ReadUntilEndTag(it, tag_lexer, 'pre')
349
350 out.PrintUntil(pre_end_pos)
351
352 # Using ShPromptPlugin because it does the comment highlighting we want!
353 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
354 plugin.PrintHighlighted(out)
355
356 out.SkipTo(slash_pre_right)
357
358 pos = end_pos
359
360 out.PrintTheRest()
361
362 return f.getvalue()
363
364
365CSS_CLASS_RE = re.compile(
366 r'''
367 language-chapter-links-
368 ([a-z0-9-]+) # chapter name
369 (?:_(\d+))? # optional linkify_stop_col
370 ''', re.VERBOSE)
371
372
373def HighlightCode(s, default_highlighter, debug_out=None):
374 """
375 Algorithm:
376 1. Collect what's inside <pre><code> ...
377 2. Then read lines with ShPromptPlugin.
378 3. If the line looks like a shell prompt and command, highlight them with
379 <span>
380 """
381 if debug_out is None:
382 debug_out = []
383
384 f = StringIO()
385 out = html.Output(s, f)
386
387 tag_lexer = html.TagLexer(s)
388
389 pos = 0
390
391 it = html.ValidTokens(s)
392
393 while True:
394 try:
395 tok_id, end_pos = next(it)
396 except StopIteration:
397 break
398
399 if tok_id == html.StartTag:
400
401 tag_lexer.Reset(pos, end_pos)
402 if tag_lexer.TagName() == 'pre':
403 pre_start_pos = pos
404 pos = end_pos
405
406 try:
407 tok_id, end_pos = next(it)
408 except StopIteration:
409 break
410
411 tag_lexer.Reset(pos, end_pos)
412 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
413
414 css_class = tag_lexer.GetAttrRaw('class')
415 code_start_pos = end_pos
416
417 if css_class is None:
418 slash_code_left, slash_code_right = \
419 html.ReadUntilEndTag(it, tag_lexer, 'code')
420
421 if default_highlighter is not None:
422 # TODO: Refactor this to remove duplication with
423 # language-{sh-prompt,oil-sh} below
424
425 # oil-sh for compatibility
426 if default_highlighter in ('sh-prompt', 'oils-sh',
427 'oil-sh'):
428 out.PrintUntil(code_start_pos)
429
430 # Using ShPromptPlugin because it does the comment highlighting
431 # we want!
432 plugin = ShPromptPlugin(
433 s, code_start_pos, slash_code_left)
434 plugin.PrintHighlighted(out)
435
436 out.SkipTo(slash_code_left)
437 else:
438 raise RuntimeError(
439 'Unknown default highlighter %r' %
440 default_highlighter)
441
442 elif css_class.startswith('language'):
443 slash_code_left, slash_code_right = \
444 html.ReadUntilEndTag(it, tag_lexer, 'code')
445
446 if css_class == 'language-none':
447 # Allow ```none
448 pass
449
450 elif css_class in ('language-sh-prompt',
451 'language-oil-sh'):
452 # Here's we're KEEPING the original <pre><code>
453 # Print everything up to and including <pre><code language="...">
454 out.PrintUntil(code_start_pos)
455
456 plugin = ShPromptPlugin(s, code_start_pos,
457 slash_code_left)
458 plugin.PrintHighlighted(out)
459
460 out.SkipTo(slash_code_left)
461
462 elif css_class == 'language-ysh':
463 # TODO: Write an Oil syntax highlighter.
464 pass
465
466 elif css_class.startswith('language-chapter-links-'):
467 m = CSS_CLASS_RE.match(css_class)
468 assert m is not None, css_class
469
470 #log('%s GROUPS %s', css_class, m.groups())
471 chapter, num_str = m.groups()
472 if num_str is not None:
473 linkify_stop_col = int(num_str)
474 else:
475 linkify_stop_col = -1
476
477 out.PrintUntil(code_start_pos)
478
479 plugin = HelpTopicsPlugin(s, code_start_pos,
480 slash_code_left, chapter,
481 linkify_stop_col)
482
483 block_debug_info = plugin.PrintHighlighted(out)
484
485 # e.g. these are links to cmd-lang within a block in toc-ysh
486 chap_block = {
487 'to_chap': chapter,
488 'lines': block_debug_info
489 }
490 debug_out.append(chap_block)
491
492 out.SkipTo(slash_code_left)
493
494 else: # language-*: Use Pygments
495 # We REMOVE the original <pre><code> because
496 # Pygments gives you a <pre> already
497
498 # We just read closing </code>, and the next one
499 # should be </pre>.
500 try:
501 tok_id, end_pos = next(it)
502 except StopIteration:
503 break
504 tag_lexer.Reset(slash_code_right, end_pos)
505 assert tok_id == html.EndTag, tok_id
506 assert tag_lexer.TagName(
507 ) == 'pre', tag_lexer.TagName()
508 slash_pre_right = end_pos
509
510 out.PrintUntil(pre_start_pos)
511
512 lang = css_class[len('language-'):]
513 plugin = PygmentsPlugin(s, code_start_pos,
514 slash_code_left, lang)
515 plugin.PrintHighlighted(out)
516
517 out.SkipTo(slash_pre_right)
518 f.write('<!-- done pygments -->\n')
519
520 pos = end_pos
521
522 out.PrintTheRest()
523
524 return f.getvalue()
525
526
527def ExtractCode(s, f):
528 """Print code blocks to a plain text file.
529
530 So we can at least validate the syntax.
531
532 Similar to the algorithm code above:
533
534 1. Collect what's inside <pre><code> ...
535 2. Decode &amp; -> &,e tc. and return it
536 """
537 out = html.Output(s, f)
538 tag_lexer = html.TagLexer(s)
539
540 block_num = 0
541 pos = 0
542 it = html.ValidTokens(s)
543
544 while True:
545 try:
546 tok_id, end_pos = next(it)
547 except StopIteration:
548 break
549
550 if tok_id == html.StartTag:
551 tag_lexer.Reset(pos, end_pos)
552 if tag_lexer.TagName() == 'pre':
553 pre_start_pos = pos
554 pos = end_pos
555
556 try:
557 tok_id, end_pos = next(it)
558 except StopIteration:
559 break
560
561 tag_lexer.Reset(pos, end_pos)
562 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
563
564 css_class = tag_lexer.GetAttrRaw('class')
565 # Skip code blocks that look like ```foo
566 # Usually we use 'oil-sh' as the default_highlighter, and
567 # all those code blocks should be extracted. TODO: maybe
568 # this should be oil-language?
569 if css_class is None:
570 code_start_pos = end_pos
571
572 out.SkipTo(code_start_pos)
573 out.Print('# block %d' % block_num)
574 out.Print('\n')
575
576 slash_code_left, slash_code_right = \
577 html.ReadUntilEndTag(it, tag_lexer, 'code')
578
579 text = html.ToText(s, code_start_pos, slash_code_left)
580 out.SkipTo(slash_code_left)
581
582 out.Print(text)
583 out.Print('\n')
584
585 block_num += 1
586
587 pos = end_pos
588
589 #out.PrintTheRest()
590
591
592class ShellSession(object):
593 """
594 TODO: Pass this to HighlightCode as a plugin
595
596 $ x=one
597 $ echo $x
598 $ echo two
599
600 Becomes
601
602 $ x=one
603 $ echo $x
604 one
605 $ echo two
606 two
607
608 And then you will have
609 blog/2019/12/_shell_session/
610 $hash1-stdout.txt
611 $hash2-stdout.txt
612
613 It hashes the command with md5 and then brings it back.
614 If the file already exists then it doesn't run it again.
615 You can delete the file to redo it.
616
617 TODO: write a loop that reads one line at a time, writes, it, then reads
618 output from bash.
619 Use the Lines iterator to get lines.
620 For extra credit, you can solve the PS2 problem? That's easily done with
621 Oil's parser.
622 """
623
624 def __init__(self, shell_exe, cache_dir):
625 """
626 Args:
627 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
628 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
629 """
630 self.shell_exe = shell_exe
631 self.cache_dir = cache_dir
632
633 def PrintHighlighted(self, s, start_pos, end_pos, out):
634 """
635 Args:
636 s: an HTML string.
637 """
638 pass
639
640
641def main(argv):
642 action = argv[1]
643
644 if action == 'highlight':
645 # for test/shell-vs-shell.sh
646
647 html = sys.stdin.read()
648 out = SimpleHighlightCode(html)
649 print(out)
650
651 else:
652 raise RuntimeError('Invalid action %r' % action)
653
654
655if __name__ == '__main__':
656 main(sys.argv)