1 | #!/usr/bin/env python2
|
2 | """html_lib.py.
|
3 |
|
4 | Shared between HTML processors.
|
5 |
|
6 | TODO: Write a "pull parser" API!
|
7 | """
|
8 | from __future__ import print_function
|
9 |
|
10 | import cgi
|
11 | import re
|
12 | from typing import List
|
13 |
|
14 |
|
15 | def AttrsToString(attrs):
|
16 | # type: (List) -> str
|
17 | if not attrs:
|
18 | return ''
|
19 |
|
20 | # Important: there's a leading space here.
|
21 | # TODO: Change href="$help:command" to href="help.html#command"
|
22 | return ''.join(' %s="%s"' % (k, cgi.escape(v)) for (k, v) in attrs)
|
23 |
|
24 |
|
25 | def PrettyHref(s, preserve_anchor_case=False):
|
26 | # type: (str, bool) -> str
|
27 | """Turn arbitrary heading text into href with no special characters.
|
28 |
|
29 | This is modeled after what github does. It makes everything lower case.
|
30 | """
|
31 | # Split by whitespace or hyphen
|
32 | words = re.split(r'[\s\-]+', s)
|
33 |
|
34 | if preserve_anchor_case:
|
35 | # doc/ref: Keep only alphanumeric and /, for List/append, cmd/append
|
36 | # Note that "preserve_anchor_case" could be renamed
|
37 | keep_re = r'[\w/]+'
|
38 | else:
|
39 | # Keep only alphanumeric
|
40 | keep_re = r'\w+'
|
41 |
|
42 | keep = [''.join(re.findall(keep_re, w)) for w in words]
|
43 |
|
44 | # Join with - and lowercase. And then remove empty words, unlike Github.
|
45 | # This is SIMILAR to what Github does, but there's no need to be 100%
|
46 | # compatible.
|
47 |
|
48 | pretty = '-'.join(p for p in keep if p)
|
49 | if not preserve_anchor_case:
|
50 | pretty = pretty.lower()
|
51 | return pretty
|