1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Source code -> HTML tree
|
4 | #
|
5 | # Usage:
|
6 | # doctools/src-tree.sh <function name>
|
7 |
|
8 | set -o nounset
|
9 | set -o pipefail
|
10 | set -o errexit
|
11 |
|
12 | REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
|
13 | readonly REPO_ROOT
|
14 |
|
15 | source build/common.sh # log
|
16 |
|
17 | export PYTHONPATH=.
|
18 |
|
19 | install-deps() {
|
20 | sudo apt-get install moreutils # for isutf8
|
21 | }
|
22 |
|
23 | lexer-files() {
|
24 | ### linked from doc/release-quality.md
|
25 |
|
26 | for rel_path in \
|
27 | _gen/_tmp/match.re2c-input.h \
|
28 | _gen/frontend/match.re2c.h \
|
29 | _gen/frontend/id_kind.asdl_c.h; do
|
30 | echo $rel_path
|
31 | done
|
32 | }
|
33 |
|
34 | _print-files() {
|
35 | #lexer-files
|
36 |
|
37 | find _gen/ -type f
|
38 |
|
39 | # TODO: move _devbuild/bin/time-helper elsewhere?
|
40 | find _devbuild/ -type f -a -name '*.py'
|
41 | find _devbuild/help -type f
|
42 |
|
43 | # For some reason it shows py-yajl
|
44 | # Remove binary file (probably should delete it altogether, but it's a nice
|
45 | # test of UTF-8)
|
46 |
|
47 | # Remove spec/ysh-string.test.sh because it conatins YSH <<<, which messes up
|
48 | # the shell here doc parser.
|
49 | # TODO: ysh needs micro-syntax support
|
50 |
|
51 | git ls-files | egrep -v 'Python-2.7.13|^py-yajl|rsa_travis.enc|ysh-string.test.sh'
|
52 |
|
53 | return
|
54 |
|
55 | # We also had this way of categorizing. Should unify these line counts with
|
56 | # micro-syntax.
|
57 | metrics/source-code.sh overview-list
|
58 | }
|
59 |
|
60 | # overview-list has dupes
|
61 | sorted-files() {
|
62 | _print-files | sort | uniq
|
63 | }
|
64 |
|
65 | readonly BASE_DIR=_tmp/src-tree
|
66 |
|
67 | classify() {
|
68 | ### Classify files on stdin
|
69 |
|
70 | while read -r path; do
|
71 | case $path in
|
72 | */here-doc.test.sh|*/posix.test.sh|*/gold/complex-here-docs.sh|*/07-unterminated-here-doc.sh)
|
73 | # Plain text since they can have invalid here docs
|
74 | #
|
75 | # TODO: make a style for *.test.sh?
|
76 | echo "$path" >& $txt
|
77 | ;;
|
78 |
|
79 | # TODO: Fix BUG in micro-syntax: $(( 1 << i )) is confused for here doc!
|
80 | demo/sparse-array.sh)
|
81 | echo "$path" >& $txt
|
82 | ;;
|
83 |
|
84 | *.cc|*.c|*.h)
|
85 | echo "$path" >& $cpp
|
86 | ;;
|
87 | *.py|*.pyi|*.pgen2) # pgen2 uses Python lexical syntax
|
88 | echo "$path" >& $py
|
89 | ;;
|
90 | *.sh|*.bash|*.osh|*.ysh|configure|install|uninstall)
|
91 | echo "$path" >& $shell
|
92 | ;;
|
93 | *.asdl)
|
94 | echo "$path" >& $asdl
|
95 | ;;
|
96 | *.R)
|
97 | echo "$path" >& $R
|
98 | ;;
|
99 | *.js)
|
100 | echo "$path" >& $js
|
101 | ;;
|
102 | *.css)
|
103 | echo "$path" >& $css
|
104 | ;;
|
105 | *.md)
|
106 | echo "$path" >& $md
|
107 | ;;
|
108 | *.yml)
|
109 | echo "$path" >& $yaml
|
110 | ;;
|
111 | *.txt)
|
112 | echo "$path" >& $txt
|
113 | ;;
|
114 | *)
|
115 | echo "$path" >& $other
|
116 | esac
|
117 | done {cpp}>$BASE_DIR/cpp.txt \
|
118 | {py}>$BASE_DIR/py.txt \
|
119 | {shell}>$BASE_DIR/shell.txt \
|
120 | {asdl}>$BASE_DIR/asdl.txt \
|
121 | {R}>$BASE_DIR/R.txt \
|
122 | {js}>$BASE_DIR/js.txt \
|
123 | {css}>$BASE_DIR/css.txt \
|
124 | {md}>$BASE_DIR/md.txt \
|
125 | {yaml}>$BASE_DIR/yaml.txt \
|
126 | {txt}>$BASE_DIR/txt.txt \
|
127 | {other}>$BASE_DIR/other.txt
|
128 |
|
129 | # Other
|
130 | # .mk
|
131 | # .re2c.txt - rename this one to .h
|
132 | #
|
133 | # Just leave those un-highlighted for now
|
134 |
|
135 | wc -l $BASE_DIR/*.txt
|
136 | }
|
137 |
|
138 | all-html-to-files() {
|
139 | local out_dir=$1
|
140 | for lang in cpp py shell asdl R js css md yaml txt other; do
|
141 | log "=== $lang ==="
|
142 |
|
143 | cat $BASE_DIR/$lang.txt | xargs _tmp/micro-syntax/micro_syntax -l $lang -w \
|
144 | | doctools/src_tree.py write-html-fragments $out_dir
|
145 | log ''
|
146 | done
|
147 | }
|
148 |
|
149 | check-is-utf8() {
|
150 | local manifest=$1
|
151 |
|
152 | log '--- Checking that files are UTF-8'
|
153 | log ''
|
154 |
|
155 | if ! xargs isutf8 --list < $manifest; then
|
156 | echo
|
157 | die "The files shown aren't UTF-8"
|
158 | fi
|
159 | }
|
160 |
|
161 | highlight() {
|
162 | local variant=opt
|
163 | #local variant=asan
|
164 |
|
165 | doctools/micro-syntax.sh build $variant
|
166 | echo
|
167 |
|
168 | local www_dir=_tmp/src-tree-www
|
169 | mkdir -p $BASE_DIR $www_dir
|
170 |
|
171 | sorted-files > $BASE_DIR/manifest.txt
|
172 | wc -l $BASE_DIR/manifest.txt
|
173 | echo
|
174 |
|
175 | # Fails if there is non UTF-8
|
176 | # Disable until moreutils is in our Soil CI images
|
177 | # check-is-utf8 $BASE_DIR/manifest.txt
|
178 |
|
179 | # Figure file types
|
180 | classify < $BASE_DIR/manifest.txt
|
181 |
|
182 | local attrs=$BASE_DIR/attrs.txt
|
183 |
|
184 | time all-html-to-files $www_dir > $attrs
|
185 |
|
186 | # Now write index.html dir listings
|
187 | time doctools/src_tree.py dirs $www_dir < $attrs
|
188 | }
|
189 |
|
190 | soil-run() {
|
191 | ### Write tree starting at _tmp/src-tree/index.html
|
192 |
|
193 | highlight
|
194 | }
|
195 |
|
196 | cat-benchmark() {
|
197 | # 355 ms to cat the files! It takes 2.75 seconds to syntax highlight 'src_tree.py files'
|
198 | #
|
199 | # Producing 5.9 MB of text.
|
200 | time sorted-files | xargs cat | wc --bytes
|
201 |
|
202 | # Note: wc -l is not much slower.
|
203 | }
|
204 |
|
205 | micro-bench() {
|
206 | # ~435 ms, not bad. cat is ~355 ms, so that's only 70 ms more.
|
207 |
|
208 | local variant=opt
|
209 | #local variant=asan
|
210 | doctools/micro-syntax.sh build $variant
|
211 |
|
212 | local lang=cpp
|
213 |
|
214 | # Buggy!
|
215 | local lang=py
|
216 |
|
217 | # optimization:
|
218 | # lang=cpp: 11.4 MB -> 11.3 MB
|
219 | time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang | wc --bytes
|
220 |
|
221 | # optimization:
|
222 | # lang=cpp: 18.5 MB -> 18.4 MB
|
223 | time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang -w | wc --bytes
|
224 | }
|
225 |
|
226 |
|
227 | #
|
228 | # Misc ways of counting files
|
229 | # TODO: unify or remove these
|
230 | #
|
231 |
|
232 | repo() {
|
233 | git ls-files
|
234 | }
|
235 |
|
236 | no-cpython() {
|
237 | grep -v 'Python-2.7.13'
|
238 | }
|
239 |
|
240 | compress() {
|
241 | local out=_tmp/source-code.zip
|
242 |
|
243 | rm -f -v $out
|
244 |
|
245 | repo | no-cpython | xargs --verbose -- zip $out
|
246 | echo
|
247 |
|
248 | # 1688 files in 3.6 MB, OK seems fine
|
249 | repo | no-cpython | wc -l
|
250 |
|
251 | ls -l -h $out
|
252 | }
|
253 |
|
254 | extensions() {
|
255 | repo \
|
256 | | no-cpython \
|
257 | | grep -v 'testdata/' \
|
258 | | awk --field-separator . '{ print $(NF) }' \
|
259 | | sort | uniq -c | sort -n
|
260 | }
|
261 |
|
262 | #
|
263 | # Debug CSS
|
264 | #
|
265 |
|
266 | css-deploy() {
|
267 | local host=oilshell.org
|
268 | ssh $host mkdir -p $host/tmp
|
269 | scp web/src-tree.css $host:$host/tmp
|
270 | sed 's;../../../web/;;g' _tmp/src-tree/www/configure.html > _tmp/configure.html
|
271 | scp _tmp/configure.html $host:$host/tmp
|
272 | }
|
273 |
|
274 | if test $(basename $0) = 'src-tree.sh'; then
|
275 | "$@"
|
276 | fi
|