OILS / data_lang / htm8-test.sh View on Github | oils.pub

38 lines, 13 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# data_lang/htm8-test.sh
5
6: ${LIB_OSH=stdlib/osh}
7source $LIB_OSH/bash-strict.sh
8source $LIB_OSH/task-five.sh
9
10# parse with lazylex/html.py, or data_lang/htm8.py
11
12site-files() {
13 find ../../oilshell/oilshell.org__deploy -name '*.html'
14}
15
16# Issues with lazylex/html.py
17#
18# - Token ID is annoying to express in Python
19# - re.DOTALL for newlines
20# - can we change that with [.\n]*?
21# - nongreedy match for --> and ?>
22
23
24test-site() {
25 # 1.5 M lines of HTML - takes 3 xargs invocations!
26 #
27 # TODO:
28 # - test that it lexes
29 # - test that tags are balanced
30
31 site-files | xargs wc -l
32}
33
34test-wwz() {
35 echo 'TODO: download .wwz from CI'
36}
37
38task-five "$@"