1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Usage:
|
4 | # data_lang/htm8-test.sh
|
5 |
|
6 | : ${LIB_OSH=stdlib/osh}
|
7 | source $LIB_OSH/bash-strict.sh
|
8 | source $LIB_OSH/task-five.sh
|
9 |
|
10 | # parse with lazylex/html.py, or data_lang/htm8.py
|
11 |
|
12 | site-files() {
|
13 | find ../../oilshell/oilshell.org__deploy -name '*.html'
|
14 | }
|
15 |
|
16 | # Issues with lazylex/html.py
|
17 | #
|
18 | # - Token ID is annoying to express in Python
|
19 | # - re.DOTALL for newlines
|
20 | # - can we change that with [.\n]*?
|
21 | # - nongreedy match for --> and ?>
|
22 |
|
23 |
|
24 | test-site() {
|
25 | # 1.5 M lines of HTML - takes 3 xargs invocations!
|
26 | #
|
27 | # TODO:
|
28 | # - test that it lexes
|
29 | # - test that tags are balanced
|
30 |
|
31 | site-files | xargs wc -l
|
32 | }
|
33 |
|
34 | test-wwz() {
|
35 | echo 'TODO: download .wwz from CI'
|
36 | }
|
37 |
|
38 | task-five "$@"
|