OILS / doctools / micro-syntax.sh View on Github | oils.pub

436 lines, 153 significant
1#!/usr/bin/env bash
2#
3# Lexing / Parsing experiment
4#
5# Usage:
6# doctools/micro-syntax.sh <function name>
7
8# TODO:
9# - Rename to micro-syntax, from micro-grammars and uchex?
10# - micro-segmenting and lexing - comments, strings, and maybe { }
11# - micro-parsing: for indent/dedent
12#
13# - use GNU long flags, test them
14
15# C++
16#
17# - ANSI should cat all argv, and it should print line numbers
18# - HTML string can append with with netstrings!
19# - (path, html, path, html, ...) should be sufficient, though not fully general
20# - print SLOC at the top
21# - COALESCE tokens to save space
22
23# Then src-tree reads this stream
24# - actually it can take the filenames directly from here
25# - it can discard the big HTML!
26
27# Later: port some kind of parser combinator for
28# - def class, etc.
29
30set -o nounset
31set -o pipefail
32set -o errexit
33
34REPO_ROOT=$(cd "$(dirname $0)/.."; pwd) # tsv-lib.sh uses this
35
36#source build/dev-shell.sh # 're2c' in path
37source build/ninja-rules-cpp.sh
38
39my-re2c() {
40 local in=$1
41 local out=$2
42
43 # Copied from build/py.sh, and added --tags
44 re2c --tags -W -Wno-match-empty-string -Werror -o $out $in
45}
46
47readonly BASE_DIR=_tmp/micro-syntax
48
49build() {
50 local variant=${1:-asan}
51
52 case $variant in
53 asan)
54 cxxflags='-O0 -fsanitize=address'
55 ;;
56 opt)
57 cxxflags='-O2'
58 ;;
59 *)
60 die "Invalid variant $variant"
61 ;;
62 esac
63
64 mkdir -p $BASE_DIR
65
66 local cc=doctools/micro_syntax.cc
67 local h=$BASE_DIR/micro_syntax.h
68 local bin=$BASE_DIR/micro_syntax
69
70 my-re2c doctools/micro_syntax.re2c.h $h
71
72 # Note: with cc, you need gnu99 instead of c99 for fdopen() and getline()
73
74 # g++ - otherwise virtual functions don't work!
75
76 set -o xtrace
77 g++ -std=c++11 -Wall -I $BASE_DIR $cxxflags \
78 -o $bin $cc
79 set +o xtrace
80
81 strip -o $bin.stripped $bin
82
83 log " CXX $cc"
84
85}
86
87readonly -a PY_TESTS=(
88 'abc' '""'
89 '"dq \" backslash \\"' '"missing '
90 "'sq \\' backslash \\\\'"
91 '"line\n"' '"quote \" backslash \\ "'
92 '"\n"'
93 'hi # comment'
94 '"hi" # comment'
95 '(r"raw dq")'
96 "(r'raw \\' sq')"
97
98' "L1" # first
99 L2 # second'
100
101' def f():
102 """docstring
103 with "quote"
104 """
105 pass'
106
107" def f():
108 '''docstring
109 with 'quote'
110 '''
111 pass"
112
113 " print(r'''hello''')"
114 ' print(r"""hi there""")'
115
116 '"hi" # comment'
117)
118
119readonly -a CPP_TESTS=(
120 '#if 0'
121 'not prepreproc #ifdef 0'
122 "// comment can't "
123 "f(); // comment isn't "
124
125 # Char literal in C
126 "'\\''"
127
128 'void f(); /* multi-line
129 comment
130 */
131 void g(int x);'
132
133 '#include "foo.h"'
134 '#include <foo.h> // comment'
135
136 '#define X 3 // comment
137 int g();'
138
139 '// hello
140 #include <stdio.h>
141 #define SUM(x, y) \
142 (x) + \
143 (y) // comment
144 void f();'
145
146 '#undef x'
147
148 '#define F(x) x##name'
149
150 'char* s = f(R"(one
151 two
152 three)");
153 '
154
155 'char* s = f(R"zzXX(hi
156 world
157 )zzX" (not the end)
158 )zzXX");
159 '
160
161 'char* unclosed = f(R"zzXX(hi
162 world
163 )oops");
164 '
165)
166
167readonly -a SHELL_TESTS=(
168 "echo $'multi \\n
169 sq \\' line'"
170
171 # Quoted backslash
172 "echo hi \\' there"
173
174 'echo one#two'
175 'echo $(( 16#ff ))'
176
177 '# comment'
178 '### comment'
179
180 'echo one # comment'
181
182 'cat <<EOF
183hello $world
184EOF'
185
186 'cat <<- "EOF"
187$3.99
188EOF '
189
190 'cat <<- \_ACAWK
191$3.99
192more
193_ACAWK
194echo yo'
195
196 'echo multiple << EOF1 << EOF2 > out
197one
198EOF1
199...
200two
201EOF2
202echo done'
203)
204
205readonly -a R_TESTS=(
206 'f() # hello'
207 'x = f("1
208 2 \"quote\"
209 3")'
210
211 "x = f('1
212 2
213 3')"
214)
215
216readonly -a HTML_TESTS=(
217 '<p>hi &amp; </p>'
218 '<li dec="dec &#123;"> dec &#123; </li>'
219 '<li hex="hex &#x00ff;"> hex &#x00ff; </li>'
220
221 '<p double="3 &lt; 4">hi </p>'
222 "<p missing single='4 &gt; 3'>hi </p>"
223 '<p unquoted=value missing missing double="z">'
224
225 # Errors
226 '<p !badname>'
227 '<p badvalue=&>'
228 'less < greater > amp & foo'
229 '<p quoted="less < greater > amp & foo">'
230 "<p quoted='less < greater > amp & foo'>"
231 '<ul> <li>hi</li> </ul>'
232 'hi <br/>'
233 '<a href=foo>link</a>'
234
235 # All of these are values
236 '<a href=/>'
237 '<a href=//>'
238 '<a href= />'
239 # Fixed
240 '<a href="/">'
241 '<a href="foo"/>'
242
243 # More
244 'decl <!DOCTYPE html> z'
245 'decl <?xml version="1.0"?> z'
246 'hello <!-- comment
247 <not-a-tag> --> more <p>'
248 'foo <![CDATA[ hello
249 <not-a-tag> ]]> more <p>'
250 'not-cdata <![cdata[ hello
251 <not-a-tag> ]]> more <p>'
252 '<script>if (x<y) {
253 console.log("hi"); } </script> hi <p>'
254 '<style>p { background-color: red;
255 } </style> more <p>'
256
257 # Attributes
258 '<div missing unquoted = foo double="d" single='\'\''>hi</div>'
259 )
260
261readonly -a HTML_ERRORS=(
262 # premature EOF - TODO: how do we show errors?
263 '<a' # in AttrName state
264 '<a name' # in AttrValue state
265 '<a name=foo'
266 '<a name="double' # DQ state
267 "<a name='single" # SQ state
268 'a <!DOCTYPE'
269 'a <!-- comm'
270 'a <![CDATA[ foo ]]'
271 '<script a=y> zzz'
272 '<style a=y> zzz'
273
274)
275
276run-cases() {
277 local lang=$1
278 shift
279
280 local bin=$BASE_DIR/micro_syntax
281
282 for s in "$@"; do
283 echo "==== $s"
284 echo "$s" | $bin -l $lang
285 echo
286 done
287}
288
289test-shell() {
290 build # TODO: use Ninja
291 run-cases shell "${SHELL_TESTS[@]}"
292}
293
294test-cpp() {
295 build
296 run-cases cpp "${CPP_TESTS[@]}"
297}
298
299test-py() {
300 build
301 run-cases py "${PY_TESTS[@]}"
302}
303
304test-R() {
305 build
306 run-cases R "${R_TESTS[@]}"
307}
308
309test-html() {
310 build
311 run-cases html "${HTML_TESTS[@]}" #"${HTML_ERRORS[@]}"
312}
313
314
315run-tests() {
316 local bin=$BASE_DIR/micro_syntax
317
318 build
319
320 run-cases shell "${SHELL_TESTS[@]}"
321 run-cases cpp "${CPP_TESTS[@]}"
322 run-cases py "${PY_TESTS[@]}"
323 run-cases R "${R_TESTS[@]}"
324 run-cases html "${HTML_TESTS[@]}" "${HTML_ERRORS[@]}"
325
326 # No language specified
327 echo '==== No language'
328 head $0 | $bin
329 echo
330
331 echo '/dev/null'
332 $bin < /dev/null
333}
334
335cpp-self() {
336 build
337 cat doctools/micro_syntax.{re2c.h,cc} | $BASE_DIR/micro_syntax -l cpp | less -r
338}
339
340sh-self() {
341 build
342 #$BASE_DIR/micro_syntax -l shell < doctools/micro_syntax.sh | less -r
343
344 $BASE_DIR/micro_syntax -l shell doctools/micro-syntax.sh
345}
346
347lexer-def() {
348 ### Test on a hard Python file
349
350 build
351 $BASE_DIR/micro_syntax -l py < frontend/lexer_def.py | less -r
352}
353
354git-comp() {
355 ### Test on a hard shell file
356
357 # Exposes nested double quote issue
358 build
359 $BASE_DIR/micro_syntax -l shell < testdata/completion/git | less -r
360}
361
362mycpp-runtime() {
363 build
364 cat mycpp/gc_str.* | $BASE_DIR/micro_syntax -l cpp | less -r
365}
366
367count() {
368 wc -l doctools/micro_syntax*
369 echo
370 wc -l $BASE_DIR/*.h
371 echo
372 ls -l --si -h $BASE_DIR
373}
374
375test-usage() {
376 build
377
378 # help
379 $BASE_DIR/micro_syntax -h
380
381 echo 'ANSI'
382 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell
383 echo
384
385 echo 'WEB'
386 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -w
387 echo
388
389 set -x
390 echo 'TSV'
391 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -t
392
393 echo
394 echo
395 echo '"dq"' | $BASE_DIR/micro_syntax -l shell
396
397 $BASE_DIR/micro_syntax -l shell configure | wc -l
398
399 # TODO: need a nicer pattern for this test
400 set +o errexit
401 $BASE_DIR/micro_syntax -l shell _nonexistent_ZZ
402 local status=$?
403 if test $status -ne 1; then
404 die 'Expected status 1'
405 fi
406 set -o errexit
407}
408
409soil-run() {
410 test-usage
411 echo
412
413 run-tests
414}
415
416### Shell Tests
417
418here-doc-syntax() {
419 ### Test here doc syntax with $0 sh-self
420
421 echo 42 > _tmp/42.txt
422
423 # _tmp/42 and - are arguments to cat! Vim doesn't understand
424 # and >_tmp/here.txt is not part of the here doc
425
426 cat <<EOF _tmp/42.txt - >_tmp/here.txt
427x
428short
429hello there
430EOF
431
432 cat _tmp/here.txt
433}
434
435"$@"
436