OILS / doctools / src-tree.sh View on Github | oils.pub

276 lines, 161 significant
1#!/usr/bin/env bash
2#
3# Source code -> HTML tree
4#
5# Usage:
6# doctools/src-tree.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
13readonly REPO_ROOT
14
15source build/common.sh # log
16
17export PYTHONPATH=.
18
19install-deps() {
20 sudo apt-get install moreutils # for isutf8
21}
22
23lexer-files() {
24 ### linked from doc/release-quality.md
25
26 for rel_path in \
27 _gen/_tmp/match.re2c-input.h \
28 _gen/frontend/match.re2c.h \
29 _gen/frontend/id_kind.asdl_c.h; do
30 echo $rel_path
31 done
32}
33
34_print-files() {
35 #lexer-files
36
37 find _gen/ -type f
38
39 # TODO: move _devbuild/bin/time-helper elsewhere?
40 find _devbuild/ -type f -a -name '*.py'
41 find _devbuild/help -type f
42
43 # For some reason it shows py-yajl
44 # Remove binary file (probably should delete it altogether, but it's a nice
45 # test of UTF-8)
46
47 # Remove spec/ysh-string.test.sh because it conatins YSH <<<, which messes up
48 # the shell here doc parser.
49 # TODO: ysh needs micro-syntax support
50
51 git ls-files | egrep -v 'Python-2.7.13|^py-yajl|rsa_travis.enc|ysh-string.test.sh'
52
53 return
54
55 # We also had this way of categorizing. Should unify these line counts with
56 # micro-syntax.
57 metrics/source-code.sh overview-list
58}
59
60# overview-list has dupes
61sorted-files() {
62 _print-files | sort | uniq
63}
64
65readonly BASE_DIR=_tmp/src-tree
66
67classify() {
68 ### Classify files on stdin
69
70 while read -r path; do
71 case $path in
72 */here-doc.test.sh|*/posix.test.sh|*/gold/complex-here-docs.sh|*/07-unterminated-here-doc.sh)
73 # Plain text since they can have invalid here docs
74 #
75 # TODO: make a style for *.test.sh?
76 echo "$path" >& $txt
77 ;;
78
79 # TODO: Fix BUG in micro-syntax: $(( 1 << i )) is confused for here doc!
80 demo/sparse-array.sh)
81 echo "$path" >& $txt
82 ;;
83
84 *.cc|*.c|*.h)
85 echo "$path" >& $cpp
86 ;;
87 *.py|*.pyi|*.pgen2) # pgen2 uses Python lexical syntax
88 echo "$path" >& $py
89 ;;
90 *.sh|*.bash|*.osh|*.ysh|configure|install|uninstall)
91 echo "$path" >& $shell
92 ;;
93 *.asdl)
94 echo "$path" >& $asdl
95 ;;
96 *.R)
97 echo "$path" >& $R
98 ;;
99 *.js)
100 echo "$path" >& $js
101 ;;
102 *.css)
103 echo "$path" >& $css
104 ;;
105 *.md)
106 echo "$path" >& $md
107 ;;
108 *.yml)
109 echo "$path" >& $yaml
110 ;;
111 *.txt)
112 echo "$path" >& $txt
113 ;;
114 *)
115 echo "$path" >& $other
116 esac
117 done {cpp}>$BASE_DIR/cpp.txt \
118 {py}>$BASE_DIR/py.txt \
119 {shell}>$BASE_DIR/shell.txt \
120 {asdl}>$BASE_DIR/asdl.txt \
121 {R}>$BASE_DIR/R.txt \
122 {js}>$BASE_DIR/js.txt \
123 {css}>$BASE_DIR/css.txt \
124 {md}>$BASE_DIR/md.txt \
125 {yaml}>$BASE_DIR/yaml.txt \
126 {txt}>$BASE_DIR/txt.txt \
127 {other}>$BASE_DIR/other.txt
128
129 # Other
130 # .mk
131 # .re2c.txt - rename this one to .h
132 #
133 # Just leave those un-highlighted for now
134
135 wc -l $BASE_DIR/*.txt
136}
137
138all-html-to-files() {
139 local out_dir=$1
140 for lang in cpp py shell asdl R js css md yaml txt other; do
141 log "=== $lang ==="
142
143 cat $BASE_DIR/$lang.txt | xargs _tmp/micro-syntax/micro_syntax -l $lang -w \
144 | doctools/src_tree.py write-html-fragments $out_dir
145 log ''
146 done
147}
148
149check-is-utf8() {
150 local manifest=$1
151
152 log '--- Checking that files are UTF-8'
153 log ''
154
155 if ! xargs isutf8 --list < $manifest; then
156 echo
157 die "The files shown aren't UTF-8"
158 fi
159}
160
161highlight() {
162 local variant=opt
163 #local variant=asan
164
165 doctools/micro-syntax.sh build $variant
166 echo
167
168 local www_dir=_tmp/src-tree-www
169 mkdir -p $BASE_DIR $www_dir
170
171 sorted-files > $BASE_DIR/manifest.txt
172 wc -l $BASE_DIR/manifest.txt
173 echo
174
175 # Fails if there is non UTF-8
176 # Disable until moreutils is in our Soil CI images
177 # check-is-utf8 $BASE_DIR/manifest.txt
178
179 # Figure file types
180 classify < $BASE_DIR/manifest.txt
181
182 local attrs=$BASE_DIR/attrs.txt
183
184 time all-html-to-files $www_dir > $attrs
185
186 # Now write index.html dir listings
187 time doctools/src_tree.py dirs $www_dir < $attrs
188}
189
190soil-run() {
191 ### Write tree starting at _tmp/src-tree/index.html
192
193 highlight
194}
195
196cat-benchmark() {
197 # 355 ms to cat the files! It takes 2.75 seconds to syntax highlight 'src_tree.py files'
198 #
199 # Producing 5.9 MB of text.
200 time sorted-files | xargs cat | wc --bytes
201
202 # Note: wc -l is not much slower.
203}
204
205micro-bench() {
206 # ~435 ms, not bad. cat is ~355 ms, so that's only 70 ms more.
207
208 local variant=opt
209 #local variant=asan
210 doctools/micro-syntax.sh build $variant
211
212 local lang=cpp
213
214 # Buggy!
215 local lang=py
216
217 # optimization:
218 # lang=cpp: 11.4 MB -> 11.3 MB
219 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang | wc --bytes
220
221 # optimization:
222 # lang=cpp: 18.5 MB -> 18.4 MB
223 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang -w | wc --bytes
224}
225
226
227#
228# Misc ways of counting files
229# TODO: unify or remove these
230#
231
232repo() {
233 git ls-files
234}
235
236no-cpython() {
237 grep -v 'Python-2.7.13'
238}
239
240compress() {
241 local out=_tmp/source-code.zip
242
243 rm -f -v $out
244
245 repo | no-cpython | xargs --verbose -- zip $out
246 echo
247
248 # 1688 files in 3.6 MB, OK seems fine
249 repo | no-cpython | wc -l
250
251 ls -l -h $out
252}
253
254extensions() {
255 repo \
256 | no-cpython \
257 | grep -v 'testdata/' \
258 | awk --field-separator . '{ print $(NF) }' \
259 | sort | uniq -c | sort -n
260}
261
262#
263# Debug CSS
264#
265
266css-deploy() {
267 local host=oilshell.org
268 ssh $host mkdir -p $host/tmp
269 scp web/src-tree.css $host:$host/tmp
270 sed 's;../../../web/;;g' _tmp/src-tree/www/configure.html > _tmp/configure.html
271 scp _tmp/configure.html $host:$host/tmp
272}
273
274if test $(basename $0) = 'src-tree.sh'; then
275 "$@"
276fi