OILS / doctools / micro-syntax.sh View on Github | oils.pub

383 lines, 151 significant
1#!/usr/bin/env bash
2#
3# Lexing / Parsing experiment
4#
5# Usage:
6# doctools/micro-syntax.sh <function name>
7
8# TODO:
9# - Rename to micro-syntax, from micro-grammars and uchex?
10# - micro-segmenting and lexing - comments, strings, and maybe { }
11# - micro-parsing: for indent/dedent
12#
13# - use GNU long flags, test them
14
15# C++
16#
17# - ANSI should cat all argv, and it should print line numbers
18# - HTML string can append with with netstrings!
19# - (path, html, path, html, ...) should be sufficient, though not fully general
20# - print SLOC at the top
21# - COALESCE tokens to save space
22
23# Then src-tree reads this stream
24# - actually it can take the filenames directly from here
25# - it can discard the big HTML!
26
27# Later: port some kind of parser combinator for
28# - def class, etc.
29
30set -o nounset
31set -o pipefail
32set -o errexit
33
34REPO_ROOT=$(cd "$(dirname $0)/.."; pwd) # tsv-lib.sh uses this
35
36#source build/dev-shell.sh # 're2c' in path
37source build/ninja-rules-cpp.sh
38
39my-re2c() {
40 local in=$1
41 local out=$2
42
43 # Copied from build/py.sh, and added --tags
44 re2c --tags -W -Wno-match-empty-string -Werror -o $out $in
45}
46
47readonly BASE_DIR=_tmp/micro-syntax
48
49build() {
50 local variant=${1:-asan}
51
52 case $variant in
53 asan)
54 cxxflags='-O0 -fsanitize=address'
55 ;;
56 opt)
57 cxxflags='-O2'
58 ;;
59 *)
60 die "Invalid variant $variant"
61 ;;
62 esac
63
64 mkdir -p $BASE_DIR
65
66 local cc=doctools/micro_syntax.cc
67 local h=$BASE_DIR/micro_syntax.h
68 local bin=$BASE_DIR/micro_syntax
69
70 my-re2c doctools/micro_syntax.re2c.h $h
71
72 # Note: with cc, you need gnu99 instead of c99 for fdopen() and getline()
73
74 # g++ - otherwise virtual functions don't work!
75
76 set -o xtrace
77 g++ -std=c++11 -Wall -I $BASE_DIR $cxxflags \
78 -o $bin $cc
79 set +o xtrace
80
81 strip -o $bin.stripped $bin
82
83 log " CXX $cc"
84
85}
86
87readonly -a PY_TESTS=(
88 'abc' '""'
89 '"dq \" backslash \\"' '"missing '
90 "'sq \\' backslash \\\\'"
91 '"line\n"' '"quote \" backslash \\ "'
92 '"\n"'
93 'hi # comment'
94 '"hi" # comment'
95 '(r"raw dq")'
96 "(r'raw \\' sq')"
97
98' "L1" # first
99 L2 # second'
100
101' def f():
102 """docstring
103 with "quote"
104 """
105 pass'
106
107" def f():
108 '''docstring
109 with 'quote'
110 '''
111 pass"
112
113 " print(r'''hello''')"
114 ' print(r"""hi there""")'
115
116 '"hi" # comment'
117)
118
119readonly -a CPP_TESTS=(
120 '#if 0'
121 'not prepreproc #ifdef 0'
122 "// comment can't "
123 "f(); // comment isn't "
124
125 # Char literal in C
126 "'\\''"
127
128 'void f(); /* multi-line
129 comment
130 */
131 void g(int x);'
132
133 '#include "foo.h"'
134 '#include <foo.h> // comment'
135
136 '#define X 3 // comment
137 int g();'
138
139 '// hello
140 #include <stdio.h>
141 #define SUM(x, y) \
142 (x) + \
143 (y) // comment
144 void f();'
145
146 '#undef x'
147
148 '#define F(x) x##name'
149
150 'char* s = f(R"(one
151 two
152 three)");
153 '
154
155 'char* s = f(R"zzXX(hi
156 world
157 )zzX" (not the end)
158 )zzXX");
159 '
160
161 'char* unclosed = f(R"zzXX(hi
162 world
163 )oops");
164 '
165)
166
167readonly -a SHELL_TESTS=(
168 "echo $'multi \\n
169 sq \\' line'"
170
171 # Quoted backslash
172 "echo hi \\' there"
173
174 'echo one#two'
175 'echo $(( 16#ff ))'
176
177 '# comment'
178 '### comment'
179
180 'echo one # comment'
181
182 'cat <<EOF
183hello $world
184EOF'
185
186 'cat <<- "EOF"
187$3.99
188EOF '
189
190 'cat <<- \_ACAWK
191$3.99
192more
193_ACAWK
194echo yo'
195
196 'echo multiple << EOF1 << EOF2 > out
197one
198EOF1
199...
200two
201EOF2
202echo done'
203)
204
205readonly -a R_TESTS=(
206 'f() # hello'
207 'x = f("1
208 2 \"quote\"
209 3")'
210
211 "x = f('1
212 2
213 3')"
214)
215
216readonly -a HTML_TESTS=(
217 '<p>hi</p>'
218 'hi <br/>'
219 '<img src="foo"/>'
220 '<a href=foo>link</a>'
221)
222
223run-cases() {
224 local lang=$1
225 shift
226
227 local bin=$BASE_DIR/micro_syntax
228
229 for s in "$@"; do
230 echo "==== $s"
231 echo "$s" | $bin -l $lang
232 echo
233 done
234}
235
236test-shell() {
237 build # TODO: use Ninja
238 run-cases shell "${SHELL_TESTS[@]}"
239}
240
241test-cpp() {
242 build
243 run-cases cpp "${CPP_TESTS[@]}"
244}
245
246test-py() {
247 build
248 run-cases py "${PY_TESTS[@]}"
249}
250
251test-R() {
252 build
253 run-cases R "${R_TESTS[@]}"
254}
255
256test-html() {
257 build
258 run-cases html "${HTML_TESTS[@]}"
259}
260
261
262run-tests() {
263 local bin=$BASE_DIR/micro_syntax
264
265 build
266
267 run-cases shell "${SHELL_TESTS[@]}"
268 run-cases cpp "${CPP_TESTS[@]}"
269 run-cases py "${PY_TESTS[@]}"
270 run-cases R "${R_TESTS[@]}"
271 run-cases html "${HTML_TESTS[@]}"
272
273 # No language specified
274 echo '==== No language'
275 head $0 | $bin
276 echo
277
278 echo '/dev/null'
279 $bin < /dev/null
280}
281
282cpp-self() {
283 build
284 cat doctools/micro_syntax.{re2c.h,cc} | $BASE_DIR/micro_syntax -l cpp | less -r
285}
286
287sh-self() {
288 build
289 #$BASE_DIR/micro_syntax -l shell < doctools/micro_syntax.sh | less -r
290
291 $BASE_DIR/micro_syntax -l shell doctools/micro-syntax.sh
292}
293
294lexer-def() {
295 ### Test on a hard Python file
296
297 build
298 $BASE_DIR/micro_syntax -l py < frontend/lexer_def.py | less -r
299}
300
301git-comp() {
302 ### Test on a hard shell file
303
304 # Exposes nested double quote issue
305 build
306 $BASE_DIR/micro_syntax -l shell < testdata/completion/git | less -r
307}
308
309mycpp-runtime() {
310 build
311 cat mycpp/gc_str.* | $BASE_DIR/micro_syntax -l cpp | less -r
312}
313
314count() {
315 wc -l doctools/micro_syntax*
316 echo
317 wc -l $BASE_DIR/*.h
318 echo
319 ls -l --si -h $BASE_DIR
320}
321
322test-usage() {
323 build
324
325 # help
326 $BASE_DIR/micro_syntax -h
327
328 echo 'ANSI'
329 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell
330 echo
331
332 echo 'WEB'
333 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -w
334 echo
335
336 set -x
337 echo 'TSV'
338 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -t
339
340 echo
341 echo
342 echo '"dq"' | $BASE_DIR/micro_syntax -l shell
343
344 $BASE_DIR/micro_syntax -l shell configure | wc -l
345
346 # TODO: need a nicer pattern for this test
347 set +o errexit
348 $BASE_DIR/micro_syntax -l shell _nonexistent_ZZ
349 local status=$?
350 if test $status -ne 1; then
351 die 'Expected status 1'
352 fi
353 set -o errexit
354}
355
356soil-run() {
357 test-usage
358 echo
359
360 run-tests
361}
362
363### Shell Tests
364
365here-doc-syntax() {
366 ### Test here doc syntax with $0 sh-self
367
368 echo 42 > _tmp/42.txt
369
370 # _tmp/42 and - are arguments to cat! Vim doesn't understand
371 # and >_tmp/here.txt is not part of the here doc
372
373 cat <<EOF _tmp/42.txt - >_tmp/here.txt
374x
375short
376hello there
377EOF
378
379 cat _tmp/here.txt
380}
381
382"$@"
383