OILS / doctools / src-tree.sh View on Github | oils.pub

280 lines, 164 significant
1#!/usr/bin/env bash
2#
3# Source code -> HTML tree
4#
5# Usage:
6# doctools/src-tree.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
13readonly REPO_ROOT
14
15source build/common.sh # log
16
17export PYTHONPATH=.
18
19src-tree-py() {
20 PYTHONPATH='.:vendor/' doctools/src_tree.py "$@"
21}
22
23install-deps() {
24 sudo apt-get install moreutils # for isutf8
25}
26
27lexer-files() {
28 ### linked from doc/release-quality.md
29
30 for rel_path in \
31 _gen/_tmp/match.re2c-input.h \
32 _gen/frontend/match.re2c.h \
33 _gen/frontend/id_kind.asdl_c.h; do
34 echo $rel_path
35 done
36}
37
38_print-files() {
39 #lexer-files
40
41 find _gen/ -type f
42
43 # TODO: move _devbuild/bin/time-helper elsewhere?
44 find _devbuild/ -type f -a -name '*.py'
45 find _devbuild/help -type f
46
47 # For some reason it shows py-yajl
48 # Remove binary file (probably should delete it altogether, but it's a nice
49 # test of UTF-8)
50
51 # Remove spec/ysh-string.test.sh because it conatins YSH <<<, which messes up
52 # the shell here doc parser.
53 # TODO: ysh needs micro-syntax support
54
55 git ls-files | egrep -v 'Python-2.7.13|^py-yajl|rsa_travis.enc|ysh-string.test.sh'
56
57 return
58
59 # We also had this way of categorizing. Should unify these line counts with
60 # micro-syntax.
61 metrics/source-code.sh overview-list
62}
63
64# overview-list has dupes
65sorted-files() {
66 _print-files | sort | uniq
67}
68
69readonly BASE_DIR=_tmp/src-tree
70
71classify() {
72 ### Classify files on stdin
73
74 while read -r path; do
75 case $path in
76 */here-doc.test.sh|*/posix.test.sh|*/gold/complex-here-docs.sh|*/07-unterminated-here-doc.sh)
77 # Plain text since they can have invalid here docs
78 #
79 # TODO: make a style for *.test.sh?
80 echo "$path" >& $txt
81 ;;
82
83 # TODO: Fix BUG in micro-syntax: $(( 1 << i )) is confused for here doc!
84 demo/sparse-array.sh)
85 echo "$path" >& $txt
86 ;;
87
88 *.cc|*.c|*.h)
89 echo "$path" >& $cpp
90 ;;
91 *.py|*.pyi|*.pgen2) # pgen2 uses Python lexical syntax
92 echo "$path" >& $py
93 ;;
94 *.sh|*.bash|*.osh|*.ysh|configure|install|uninstall)
95 echo "$path" >& $shell
96 ;;
97 *.asdl)
98 echo "$path" >& $asdl
99 ;;
100 *.R)
101 echo "$path" >& $R
102 ;;
103 *.js)
104 echo "$path" >& $js
105 ;;
106 *.css)
107 echo "$path" >& $css
108 ;;
109 *.md)
110 echo "$path" >& $md
111 ;;
112 *.yml)
113 echo "$path" >& $yaml
114 ;;
115 *.txt)
116 echo "$path" >& $txt
117 ;;
118 *)
119 echo "$path" >& $other
120 esac
121 done {cpp}>$BASE_DIR/cpp.txt \
122 {py}>$BASE_DIR/py.txt \
123 {shell}>$BASE_DIR/shell.txt \
124 {asdl}>$BASE_DIR/asdl.txt \
125 {R}>$BASE_DIR/R.txt \
126 {js}>$BASE_DIR/js.txt \
127 {css}>$BASE_DIR/css.txt \
128 {md}>$BASE_DIR/md.txt \
129 {yaml}>$BASE_DIR/yaml.txt \
130 {txt}>$BASE_DIR/txt.txt \
131 {other}>$BASE_DIR/other.txt
132
133 # Other
134 # .mk
135 # .re2c.txt - rename this one to .h
136 #
137 # Just leave those un-highlighted for now
138
139 wc -l $BASE_DIR/*.txt
140}
141
142all-html-to-files() {
143 local out_dir=$1
144 for lang in cpp py shell asdl R js css md yaml txt other; do
145 log "=== $lang ==="
146
147 cat $BASE_DIR/$lang.txt | xargs _tmp/micro-syntax/micro_syntax -l $lang -w \
148 | $0 src-tree-py write-html-fragments $out_dir
149 log ''
150 done
151}
152
153check-is-utf8() {
154 local manifest=$1
155
156 log '--- Checking that files are UTF-8'
157 log ''
158
159 if ! xargs isutf8 --list < $manifest; then
160 echo
161 die "The files shown aren't UTF-8"
162 fi
163}
164
165highlight() {
166 local variant=opt
167 #local variant=asan
168
169 doctools/micro-syntax.sh build $variant
170 echo
171
172 local www_dir=_tmp/src-tree-www
173 mkdir -p $BASE_DIR $www_dir
174
175 sorted-files > $BASE_DIR/manifest.txt
176 wc -l $BASE_DIR/manifest.txt
177 echo
178
179 # Fails if there is non UTF-8
180 # Disable until moreutils is in our Soil CI images
181 # check-is-utf8 $BASE_DIR/manifest.txt
182
183 # Figure file types
184 classify < $BASE_DIR/manifest.txt
185
186 local attrs=$BASE_DIR/attrs.txt
187
188 time all-html-to-files $www_dir > $attrs
189
190 # Now write index.html dir listings
191 time src-tree-py dirs $www_dir < $attrs
192}
193
194soil-run() {
195 ### Write tree starting at _tmp/src-tree/index.html
196
197 highlight
198}
199
200cat-benchmark() {
201 # 355 ms to cat the files! It takes 2.75 seconds to syntax highlight 'src_tree.py files'
202 #
203 # Producing 5.9 MB of text.
204 time sorted-files | xargs cat | wc --bytes
205
206 # Note: wc -l is not much slower.
207}
208
209micro-bench() {
210 # ~435 ms, not bad. cat is ~355 ms, so that's only 70 ms more.
211
212 local variant=opt
213 #local variant=asan
214 doctools/micro-syntax.sh build $variant
215
216 local lang=cpp
217
218 # Buggy!
219 local lang=py
220
221 # optimization:
222 # lang=cpp: 11.4 MB -> 11.3 MB
223 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang | wc --bytes
224
225 # optimization:
226 # lang=cpp: 18.5 MB -> 18.4 MB
227 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang -w | wc --bytes
228}
229
230
231#
232# Misc ways of counting files
233# TODO: unify or remove these
234#
235
236repo() {
237 git ls-files
238}
239
240no-cpython() {
241 grep -v 'Python-2.7.13'
242}
243
244compress() {
245 local out=_tmp/source-code.zip
246
247 rm -f -v $out
248
249 repo | no-cpython | xargs --verbose -- zip $out
250 echo
251
252 # 1688 files in 3.6 MB, OK seems fine
253 repo | no-cpython | wc -l
254
255 ls -l -h $out
256}
257
258extensions() {
259 repo \
260 | no-cpython \
261 | grep -v 'testdata/' \
262 | awk --field-separator . '{ print $(NF) }' \
263 | sort | uniq -c | sort -n
264}
265
266#
267# Debug CSS
268#
269
270css-deploy() {
271 local host=oilshell.org
272 ssh $host mkdir -p $host/tmp
273 scp web/src-tree.css $host:$host/tmp
274 sed 's;../../../web/;;g' _tmp/src-tree/www/configure.html > _tmp/configure.html
275 scp _tmp/configure.html $host:$host/tmp
276}
277
278if test $(basename $0) = 'src-tree.sh'; then
279 "$@"
280fi