OILS / doctools / split_doc.py View on Github | oils.pub

157 lines, 109 significant
1#!/usr/bin/env python2
2"""split_doc.py."""
3from __future__ import print_function
4
5import json
6import optparse
7import re
8import sys
9
10DATE_RE = re.compile(r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
11
12META_RE = re.compile(r'(\S+): [ ]* (.*)', re.VERBOSE)
13
14
15def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
16 """Split a document into metadata JSON and content Markdown.
17
18 Used for blog posts and index.md / cross-ref.md.
19 """
20 first_line = entry_f.readline()
21 if strict and first_line.strip() != '---':
22 raise RuntimeError("Document should start with --- (got %r)" %
23 first_line)
24
25 meta = {}
26
27 # TODO: if first_line is ---, then read metadata in key: value format.
28 if first_line.strip() == '---':
29 while True:
30 line = entry_f.readline().strip()
31 if line == '---':
32 break
33 m = META_RE.match(line)
34 if not m:
35 raise RuntimeError('Invalid metadata line %r' % line)
36 name, value = m.groups()
37
38 if name == 'date':
39 m2 = DATE_RE.match(value)
40 if not m2:
41 raise RuntimeError('Invalid date %r' % value)
42 year, month, day = m2.groups()
43 meta['year'] = int(year)
44 meta['month'] = int(month)
45 meta['day'] = int(day)
46
47 elif name == 'updated_date':
48 m2 = DATE_RE.match(value)
49 if not m2:
50 raise RuntimeError('Invalid date %r' % value)
51 year, month, day = m2.groups()
52 meta['updated_year'] = int(year)
53 meta['updated_month'] = int(month)
54 meta['updated_day'] = int(day)
55
56 else:
57 meta[name] = value
58
59 #print('line = %r' % line, file=sys.stderr)
60 while True:
61 first_nonempty = entry_f.readline()
62 if first_nonempty.strip() != '':
63 break
64
65 else:
66 if first_line:
67 first_nonempty = first_line
68 else:
69 while True:
70 first_nonempty = entry_f.readline()
71 if first_nonempty.strip() != '':
72 break
73
74 # Invariant: we've read the first non-empty line here. Now we need to see if
75 # it's the title.
76
77 #print('first_nonempty = %r' % first_nonempty, file=sys.stderr)
78
79 line_two = entry_f.readline()
80 if re.match('=+', line_two):
81 meta['title'] = first_nonempty.strip()
82
83 # Fill in defaults after parsing all values.
84 for name, value in default_vals.iteritems():
85 if name not in meta:
86 meta[name] = value
87
88 json.dump(meta, meta_f, indent=2)
89
90 # Read the rest of the file and write it
91 contents = entry_f.read()
92
93 content_f.write(first_nonempty)
94 content_f.write(line_two)
95
96 content_f.write(contents)
97
98 comments_url = meta.get('comments_url', '')
99 if comments_url:
100 content_f.write("""
101[comments-url]: %s
102
103""" % comments_url)
104
105
106def Options():
107 p = optparse.OptionParser('split_doc.py [options] input_file out_prefix')
108 # Like awk -v
109 p.add_option(
110 '-v',
111 dest='default_vals',
112 action='append',
113 default=[],
114 help=
115 "If the doc's own metadata doesn't define 'name', set it to this value"
116 )
117 p.add_option('-s',
118 '--strict',
119 dest='strict',
120 action='store_true',
121 default=False,
122 help="Require metadata")
123 return p
124
125
126def main(argv):
127 o = Options()
128 opts, argv = o.parse_args(argv)
129
130 entry_path = argv[1] # e.g. blog/2016/11/01.md
131 out_prefix = argv[2] # e.g _site/blog/2016/11/01
132
133 meta_path = out_prefix + '_meta.json'
134 content_path = out_prefix + '_content.md'
135
136 default_vals = {}
137 for pair in opts.default_vals:
138 name, value = pair.split('=', 1)
139 default_vals[name] = value
140
141 with \
142 open(entry_path) as entry_f, \
143 open(meta_path, 'w') as meta_f, \
144 open(content_path, 'w') as content_f:
145 SplitDocument(default_vals,
146 entry_f,
147 meta_f,
148 content_f,
149 strict=opts.strict)
150
151
152if __name__ == '__main__':
153 try:
154 main(sys.argv)
155 except RuntimeError as e:
156 print('FATAL: %s' % e, file=sys.stderr)
157 sys.exit(1)