1 | #!/usr/bin/env bash
2 | #
3 | # Lexing / Parsing experiment
4 | #
5 | # Usage:
6 | # doctools/micro-syntax.sh <function name>
7 |
8 | # TODO:
9 | # - Rename to micro-syntax, from micro-grammars and uchex?
10 | # - micro-segmenting and lexing - comments, strings, and maybe { }
11 | # - micro-parsing: for indent/dedent
12 | #
13 | # - use GNU long flags, test them
14 |
15 | # C++
16 | #
17 | # - ANSI should cat all argv, and it should print line numbers
18 | # - HTML string can append with with netstrings!
19 | # - (path, html, path, html, ...) should be sufficient, though not fully general
20 | # - print SLOC at the top
21 | # - COALESCE tokens to save space
22 |
23 | # Then src-tree reads this stream
24 | # - actually it can take the filenames directly from here
25 | # - it can discard the big HTML!
26 |
27 | # Later: port some kind of parser combinator for
28 | # - def class, etc.
29 |
30 | set -o nounset
31 | set -o pipefail
32 | set -o errexit
33 |
34 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd) # tsv-lib.sh uses this
35 |
36 | #source build/dev-shell.sh # 're2c' in path
37 | source build/ninja-rules-cpp.sh
38 |
39 | my-re2c() {
40 | local in=$1
41 | local out=$2
42 |
43 | # Copied from build/py.sh, and added --tags
44 | re2c --tags -W -Wno-match-empty-string -Werror -o $out $in
45 | }
46 |
47 | readonly BASE_DIR=_tmp/micro-syntax
48 |
49 | build() {
50 | local variant=${1:-asan}
51 |
52 | case $variant in
53 | asan)
54 | cxxflags='-O0 -fsanitize=address'
55 | ;;
56 | opt)
57 | cxxflags='-O2'
58 | ;;
59 | *)
60 | die "Invalid variant $variant"
61 | ;;
62 | esac
63 |
64 | mkdir -p $BASE_DIR
65 |
66 | local cc=doctools/micro_syntax.cc
67 | local h=$BASE_DIR/micro_syntax.h
68 | local bin=$BASE_DIR/micro_syntax
69 |
70 | my-re2c doctools/micro_syntax.re2c.h $h
71 |
72 | # Note: with cc, you need gnu99 instead of c99 for fdopen() and getline()
73 |
74 | # g++ - otherwise virtual functions don't work!
75 |
76 | set -o xtrace
77 | g++ -std=c++11 -Wall -I $BASE_DIR $cxxflags \
78 | -o $bin $cc
79 | set +o xtrace
80 |
81 | strip -o $bin.stripped $bin
82 |
83 | log " CXX $cc"
84 |
85 | }
86 |
87 | readonly -a PY_TESTS=(
88 | 'abc' '""'
89 | '"dq \" backslash \\"' '"missing '
90 | "'sq \\' backslash \\\\'"
91 | '"line\n"' '"quote \" backslash \\ "'
92 | '"\n"'
93 | 'hi # comment'
94 | '"hi" # comment'
95 | '(r"raw dq")'
96 | "(r'raw \\' sq')"
97 |
98 | ' "L1" # first
99 | L2 # second'
100 |
101 | ' def f():
102 | """docstring
103 | with "quote"
104 | """
105 | pass'
106 |
107 | " def f():
108 | '''docstring
109 | with 'quote'
110 | '''
111 | pass"
112 |
113 | " print(r'''hello''')"
114 | ' print(r"""hi there""")'
115 |
116 | '"hi" # comment'
117 | )
118 |
119 | readonly -a CPP_TESTS=(
120 | '#if 0'
121 | 'not prepreproc #ifdef 0'
122 | "// comment can't "
123 | "f(); // comment isn't "
124 |
125 | # Char literal in C
126 | "'\\''"
127 |
128 | 'void f(); /* multi-line
129 | comment
130 | */
131 | void g(int x);'
132 |
133 | '#include "foo.h"'
134 | '#include <foo.h> // comment'
135 |
136 | '#define X 3 // comment
137 | int g();'
138 |
139 | '// hello
140 | #include <stdio.h>
141 | #define SUM(x, y) \
142 | (x) + \
143 | (y) // comment
144 | void f();'
145 |
146 | '#undef x'
147 |
148 | '#define F(x) x##name'
149 |
150 | 'char* s = f(R"(one
151 | two
152 | three)");
153 | '
154 |
155 | 'char* s = f(R"zzXX(hi
156 | world
157 | )zzX" (not the end)
158 | )zzXX");
159 | '
160 |
161 | 'char* unclosed = f(R"zzXX(hi
162 | world
163 | )oops");
164 | '
165 | )
166 |
167 | readonly -a SHELL_TESTS=(
168 | "echo $'multi \\n
169 | sq \\' line'"
170 |
171 | # Quoted backslash
172 | "echo hi \\' there"
173 |
174 | 'echo one#two'
175 | 'echo $(( 16#ff ))'
176 |
177 | '# comment'
178 | '### comment'
179 |
180 | 'echo one # comment'
181 |
182 | 'cat <<EOF
183 | hello $world
184 | EOF'
185 |
186 | 'cat <<- "EOF"
187 | $3.99
188 | EOF '
189 |
190 | 'cat <<- \_ACAWK
191 | $3.99
192 | more
193 | _ACAWK
194 | echo yo'
195 |
196 | 'echo multiple << EOF1 << EOF2 > out
197 | one
198 | EOF1
199 | ...
200 | two
201 | EOF2
202 | echo done'
203 | )
204 |
205 | readonly -a R_TESTS=(
206 | 'f() # hello'
207 | 'x = f("1
208 | 2 \"quote\"
209 | 3")'
210 |
211 | "x = f('1
212 | 2
213 | 3')"
214 | )
215 |
216 | readonly -a HTML_TESTS=(
217 | '<p>hi</p>'
218 | 'hi <br/>'
219 | '<img src="foo"/>'
220 | '<a href=foo>link</a>'
221 | )
222 |
223 | run-cases() {
224 | local lang=$1
225 | shift
226 |
227 | local bin=$BASE_DIR/micro_syntax
228 |
229 | for s in "$@"; do
230 | echo "==== $s"
231 | echo "$s" | $bin -l $lang
232 | echo
233 | done
234 | }
235 |
236 | test-shell() {
237 | build # TODO: use Ninja
238 | run-cases shell "${SHELL_TESTS[@]}"
239 | }
240 |
241 | test-cpp() {
242 | build
243 | run-cases cpp "${CPP_TESTS[@]}"
244 | }
245 |
246 | test-py() {
247 | build
248 | run-cases py "${PY_TESTS[@]}"
249 | }
250 |
251 | test-R() {
252 | build
253 | run-cases R "${R_TESTS[@]}"
254 | }
255 |
256 | test-html() {
257 | build
258 | run-cases html "${HTML_TESTS[@]}"
259 | }
260 |
261 |
262 | run-tests() {
263 | local bin=$BASE_DIR/micro_syntax
264 |
265 | build
266 |
267 | run-cases shell "${SHELL_TESTS[@]}"
268 | run-cases cpp "${CPP_TESTS[@]}"
269 | run-cases py "${PY_TESTS[@]}"
270 | run-cases R "${R_TESTS[@]}"
271 | run-cases html "${HTML_TESTS[@]}"
272 |
273 | # No language specified
274 | echo '==== No language'
275 | head $0 | $bin
276 | echo
277 |
278 | echo '/dev/null'
279 | $bin < /dev/null
280 | }
281 |
282 | cpp-self() {
283 | build
284 | cat doctools/micro_syntax.{re2c.h,cc} | $BASE_DIR/micro_syntax -l cpp | less -r
285 | }
286 |
287 | sh-self() {
288 | build
289 | #$BASE_DIR/micro_syntax -l shell < doctools/micro_syntax.sh | less -r
290 |
291 | $BASE_DIR/micro_syntax -l shell doctools/micro-syntax.sh
292 | }
293 |
294 | lexer-def() {
295 | ### Test on a hard Python file
296 |
297 | build
298 | $BASE_DIR/micro_syntax -l py < frontend/lexer_def.py | less -r
299 | }
300 |
301 | git-comp() {
302 | ### Test on a hard shell file
303 |
304 | # Exposes nested double quote issue
305 | build
306 | $BASE_DIR/micro_syntax -l shell < testdata/completion/git | less -r
307 | }
308 |
309 | mycpp-runtime() {
310 | build
311 | cat mycpp/gc_str.* | $BASE_DIR/micro_syntax -l cpp | less -r
312 | }
313 |
314 | count() {
315 | wc -l doctools/micro_syntax*
316 | echo
317 | wc -l $BASE_DIR/*.h
318 | echo
319 | ls -l --si -h $BASE_DIR
320 | }
321 |
322 | test-usage() {
323 | build
324 |
325 | # help
326 | $BASE_DIR/micro_syntax -h
327 |
328 | echo 'ANSI'
329 | echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell
330 | echo
331 |
332 | echo 'WEB'
333 | echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -w
334 | echo
335 |
336 | set -x
337 | echo 'TSV'
338 | echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -t
339 |
340 | echo
341 | echo
342 | echo '"dq"' | $BASE_DIR/micro_syntax -l shell
343 |
344 | $BASE_DIR/micro_syntax -l shell configure | wc -l
345 |
346 | # TODO: need a nicer pattern for this test
347 | set +o errexit
348 | $BASE_DIR/micro_syntax -l shell _nonexistent_ZZ
349 | local status=$?
350 | if test $status -ne 1; then
351 | die 'Expected status 1'
352 | fi
353 | set -o errexit
354 | }
355 |
356 | soil-run() {
357 | test-usage
358 | echo
359 |
360 | run-tests
361 | }
362 |
363 | ### Shell Tests
364 |
365 | here-doc-syntax() {
366 | ### Test here doc syntax with $0 sh-self
367 |
368 | echo 42 > _tmp/42.txt
369 |
370 | # _tmp/42 and - are arguments to cat! Vim doesn't understand
371 | # and >_tmp/here.txt is not part of the here doc
372 |
373 | cat <<EOF _tmp/42.txt - >_tmp/here.txt
374 | x
375 | short
376 | hello there
377 | EOF
378 |
379 | cat _tmp/here.txt
380 | }
381 |
382 | "$@"
383 |