1 |
|
2 | module htm8
|
3 | {
|
4 |
|
5 | h8_id =
|
6 | Decl
|
7 |
|
8 | # CommentBegin, ProcessingBegin, CDataBegin are "pseudo-tokens", not visible
|
9 | | Comment | CommentBegin
|
10 | | Processing | ProcessingBegin
|
11 | | CData | CDataBegin
|
12 |
|
13 | | StartTag | StartEndTag | EndTag
|
14 |
|
15 | | DecChar | HexChar | CharEntity
|
16 |
|
17 | | RawData | HtmlCData
|
18 |
|
19 | | BadAmpersand | BadGreaterThan | BadLessThan
|
20 |
|
21 | | Invalid
|
22 | | EndOfStream
|
23 | generate [no_namespace_suffix] # cosmetic: call it h8_id, not h8_id_e
|
24 |
|
25 |
|
26 | h8_tag_id =
|
27 | TagName
|
28 | | AttrName
|
29 | | UnquotedValue | QuotedValue | MissingValue
|
30 | generate [no_namespace_suffix]
|
31 |
|
32 | attr_name =
|
33 | Ok # Found it
|
34 | | Eof
|
35 | | Error(int pos) # LexError
|
36 |
|
37 | attr_value =
|
38 | Missing # <a missing>
|
39 | | Empty # <a empty= >
|
40 | | Unquoted # <a unquoted=1 >
|
41 | | Quoted # <a quoted="1" >
|
42 | generate [no_namespace_suffix]
|
43 |
|
44 | # This API is maybe more natural, but has more allocations
|
45 | #
|
46 | # tag_lexer.Read()
|
47 |
|
48 | # # Unquoted, Quoted, Empty, Missing
|
49 | # (int tag_name_start, int tag_name_end, attr_value)
|
50 |
|
51 | # attr_value =
|
52 | # Missing # <a missing> - tag_name_end adds =""
|
53 |
|
54 | # | Empty (int equals_pos) # <a empty=>
|
55 |
|
56 | # # <a unquoted=foo>
|
57 | # # the first one has end_pos 0, and can be h8_id.ZeroPosition?
|
58 | # | Unquoted (List[Tuple[h8_id, end_pos]] tokens)
|
59 |
|
60 | # # <a quoted="foo">
|
61 | # | Quoted (List[Tuple[h8_id, end_pos]] tokens)
|
62 |
|
63 | # # Rather than raise an exception, we should have this for bad data
|
64 | # | Invalid(int pos)
|
65 |
|
66 |
|
67 | }
|
68 |
|