OILS / doctools / split_doc.py View on Github | oils.pub

160 lines, 110 significant
1#!/usr/bin/env python2
2"""split_doc.py."""
3from __future__ import print_function
4
5import json
6import optparse
7import re
8import sys
9from typing import List, Dict, IO
10
11DATE_RE = re.compile(r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
12
13META_RE = re.compile(r'(\S+): [ ]* (.*)', re.VERBOSE)
14
15
16def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
17 # type: (Dict[str, str], IO[str], IO[str], IO[str], bool) -> None
18 """Split a document into metadata JSON and content Markdown.
19
20 Used for blog posts and index.md / cross-ref.md.
21 """
22 first_line = entry_f.readline()
23 if strict and first_line.strip() != '---':
24 raise RuntimeError("Document should start with --- (got %r)" %
25 first_line)
26
27 meta = {}
28
29 # TODO: if first_line is ---, then read metadata in key: value format.
30 if first_line.strip() == '---':
31 while True:
32 line = entry_f.readline().strip()
33 if line == '---':
34 break
35 m = META_RE.match(line)
36 if not m:
37 raise RuntimeError('Invalid metadata line %r' % line)
38 name, value = m.groups()
39
40 if name == 'date':
41 m2 = DATE_RE.match(value)
42 if not m2:
43 raise RuntimeError('Invalid date %r' % value)
44 year, month, day = m2.groups()
45 meta['year'] = int(year)
46 meta['month'] = int(month)
47 meta['day'] = int(day)
48
49 elif name == 'updated_date':
50 m2 = DATE_RE.match(value)
51 if not m2:
52 raise RuntimeError('Invalid date %r' % value)
53 year, month, day = m2.groups()
54 meta['updated_year'] = int(year)
55 meta['updated_month'] = int(month)
56 meta['updated_day'] = int(day)
57
58 else:
59 meta[name] = value
60
61 #print('line = %r' % line, file=sys.stderr)
62 while True:
63 first_nonempty = entry_f.readline()
64 if first_nonempty.strip() != '':
65 break
66
67 else:
68 if first_line:
69 first_nonempty = first_line
70 else:
71 while True:
72 first_nonempty = entry_f.readline()
73 if first_nonempty.strip() != '':
74 break
75
76 # Invariant: we've read the first non-empty line here. Now we need to see if
77 # it's the title.
78
79 #print('first_nonempty = %r' % first_nonempty, file=sys.stderr)
80
81 line_two = entry_f.readline()
82 if re.match('=+', line_two):
83 meta['title'] = first_nonempty.strip()
84
85 # Fill in defaults after parsing all values.
86 for name, value in default_vals.iteritems():
87 if name not in meta:
88 meta[name] = value
89
90 json.dump(meta, meta_f, indent=2)
91
92 # Read the rest of the file and write it
93 contents = entry_f.read()
94
95 content_f.write(first_nonempty)
96 content_f.write(line_two)
97
98 content_f.write(contents)
99
100 comments_url = meta.get('comments_url', '')
101 if comments_url:
102 content_f.write("""
103[comments-url]: %s
104
105""" % comments_url)
106
107
108def Options():
109 p = optparse.OptionParser('split_doc.py [options] input_file out_prefix')
110 # Like awk -v
111 p.add_option(
112 '-v',
113 dest='default_vals',
114 action='append',
115 default=[],
116 help=
117 "If the doc's own metadata doesn't define 'name', set it to this value"
118 )
119 p.add_option('-s',
120 '--strict',
121 dest='strict',
122 action='store_true',
123 default=False,
124 help="Require metadata")
125 return p
126
127
128def main(argv):
129 # type: (List[str]) -> None
130 o = Options()
131 opts, argv = o.parse_args(argv)
132
133 entry_path = argv[1] # e.g. blog/2016/11/01.md
134 out_prefix = argv[2] # e.g _site/blog/2016/11/01
135
136 meta_path = out_prefix + '_meta.json'
137 content_path = out_prefix + '_content.md'
138
139 default_vals = {}
140 for pair in opts.default_vals:
141 name, value = pair.split('=', 1)
142 default_vals[name] = value
143
144 with \
145 open(entry_path) as entry_f, \
146 open(meta_path, 'w') as meta_f, \
147 open(content_path, 'w') as content_f:
148 SplitDocument(default_vals,
149 entry_f,
150 meta_f,
151 content_f,
152 strict=opts.strict)
153
154
155if __name__ == '__main__':
156 try:
157 main(sys.argv)
158 except RuntimeError as e:
159 print('FATAL: %s' % e, file=sys.stderr)
160 sys.exit(1)