doctools/micro_syntax.re2c.h

OILS / doctools / micro_syntax.re2c.h View on Github | oils.pub

855 lines, 663 significant

1	#ifndef MICRO_SYNTAX_H
2	#define MICRO_SYNTAX_H
3
4	#include <assert.h>
5	#include <string.h> // strlen()
6
7	#include <vector>
8
9	enum class Id {
10	// Common to nearly all languages
11	Comm,
12	MaybeComment, // for shell, resolved in a fix-up pass
13
14	WS,
15
16	Name, // Keyword or Identifier
17	Str, // "" and Python r""
18	// '' and Python r''
19	// ''' """
20	// body of here docs
21
22	Other, // any other text
23	Unknown,
24
25	// C++
26	DelimStrBegin, // for C++ R"zzz(hello)zzz"
27	DelimStrEnd,
28	Re2c, // re2c code block
29
30	MaybePreproc, // resolved to PreprocCommand/PreprocOther in fix-up pass
31	PreprocCommand, // resolved #define
32	PreprocOther, // any other text
33	LineCont, // backslash at end of line, for #define continuation
34
35	// Braces for C++ block structure. Could be done in second pass after
36	// removing comments/strings?
37	LBrace,
38	RBrace,
39
40	// Shell
41	HereBegin,
42	HereEnd,
43
44	// Html
45	TagNameLeft, // start <a> or <br id=foo />
46	SelfClose, // />
47	TagNameRight, // >
48	EndTag, // </a>
49	CharEscape, // &
50	AttrName, // foo=
51	BadAmpersand,
52	BadLessThan,
53	BadGreaterThan,
54	// Reused: Str Other
55
56	// Zero-width token to detect #ifdef and Python INDENT/DEDENT
57	// StartLine,
58
59	// These are special zero-width tokens for Python
60	// Indent,
61	// Dedent,
62	// Maintain our own stack!
63	// https://stackoverflow.com/questions/40960123/how-exactly-a-dedent-token-is-generated-in-python
64	};
65
66	struct Token {
67	Token() : id(Id::Unknown), end_col(0), submatch_start(0), submatch_end(0) {
68	}
69	Token(Id id, int end_col)
70	: id(id), end_col(end_col), submatch_start(0), submatch_end(0) {
71	}
72
73	Id id;
74	int end_col; // offset from char* line
75	int submatch_start; // ditto
76	int submatch_end; // ditto
77	};
78
79	// Lexer and Matcher are specialized on py_mode_e, cpp_mode_e, ...
80
81	template <typename T>
82	class Lexer {
83	public:
84	Lexer(char* line) : line_(line), p_current(line), line_mode(T::Outer) {
85	}
86
87	void SetLine(char* line) {
88	line_ = line;
89	p_current = line;
90	}
91
92	const char* line_;
93	const char* p_current; // points into line
94	T line_mode; // current mode, starts with Outer
95	};
96
97	template <typename T>
98	class Matcher {
99	public:
100	// Returns whether EOL was hit. Mutates lexer state, and fills in tok out
101	// param.
102	bool Match(Lexer<T>* lexer, Token* tok);
103	};
104
105	// Macros for semantic actions
106
107	#define TOK(k) \
108	tok->id = k; \
109	break;
110
111	#define TOK_MODE(k, m) \
112	tok->id = k; \
113	lexer->line_mode = m; \
114	break;
115
116	// Must call TOK*() after this
117	#define SUBMATCH(s, e) \
118	tok->submatch_start = s - lexer->line_; \
119	tok->submatch_end = e - lexer->line_;
120
121	// Regex definitions shared between languages
122
123	/*!re2c
124	re2c:yyfill:enable = 0;
125	re2c:define:YYCTYPE = char;
126	re2c:define:YYCURSOR = p;
127
128	nul = [\x00];
129	not_nul = [^\x00];
130
131	// Whitespace is needed for SLOC, to tell if a line is entirely blank
132	whitespace = [ \t\r\n]*;
133	space_required = [ \t\r\n]+;
134
135	identifier = [_a-zA-Z][_a-zA-Z0-9]*;
136
137	// Python and C++ have "" strings
138	// C++ char literals are similar, e.g. '\''
139	// We are not more precise
140
141	sq_middle = ( [^\x00'\\] \| "\\" not_nul )*;
142	dq_middle = ( [^\x00"\\] \| "\\" not_nul )*;
143
144	sq_string = ['] sq_middle ['];
145	dq_string = ["] dq_middle ["];
146
147	// Shell and Python have # comments
148	pound_comment = "#" not_nul*;
149
150	// YSH and Python have ''' """
151	triple_sq = "'''";
152	triple_dq = ["]["]["];
153	*/
154
155	enum class text_mode_e {
156	Outer, // default
157	};
158
159	// Returns whether EOL was hit
160	template <>
161	bool Matcher<text_mode_e>::Match(Lexer<text_mode_e>* lexer, Token* tok) {
162	const char* p = lexer->p_current; // mutated by re2c
163
164	while (true) {
165	/*!re2c
166	nul { return true; }
167
168	// whitespace at start of line
169	whitespace { TOK(Id::WS); }
170
171	// This rule consumes trailing whitespace, but
172	// it's OK. We're counting significant lines, not
173	// highlighting.
174	[^\x00]+ { TOK(Id::Other); }
175
176	* { TOK(Id::Other); }
177
178	*/
179	}
180
181	tok->end_col = p - lexer->line_;
182	lexer->p_current = p;
183	return false;
184	}
185
186	enum class asdl_mode_e {
187	Outer,
188	};
189
190	// Returns whether EOL was hit
191	template <>
192	bool Matcher<asdl_mode_e>::Match(Lexer<asdl_mode_e>* lexer, Token* tok) {
193	const char* p = lexer->p_current; // mutated by re2c
194
195	switch (lexer->line_mode) {
196	case asdl_mode_e::Outer:
197	while (true) {
198	/*!re2c
199	nul { return true; }
200
201	whitespace { TOK(Id::WS); }
202
203	identifier { TOK(Id::Name); }
204
205	pound_comment { TOK(Id::Comm); }
206
207	// Not the start of a comment, identifier
208	[^\x00#_a-zA-Z]+ { TOK(Id::Other); }
209
210	// e.g. unclosed quote like "foo
211	* { TOK(Id::Unknown); }
212
213	*/
214	}
215	break;
216	}
217
218	tok->end_col = p - lexer->line_;
219	lexer->p_current = p;
220	return false;
221	}
222
223	enum class py_mode_e {
224	Outer, // default
225	MultiSQ, // inside '''
226	MultiDQ, // inside """
227	};
228
229	// Returns whether EOL was hit
230	template <>
231	bool Matcher<py_mode_e>::Match(Lexer<py_mode_e>* lexer, Token* tok) {
232	const char* p = lexer->p_current; // mutated by re2c
233	const char* YYMARKER = p;
234
235	switch (lexer->line_mode) {
236	case py_mode_e::Outer:
237	while (true) {
238	/*!re2c
239	nul { return true; }
240
241	whitespace { TOK(Id::WS); }
242
243	identifier { TOK(Id::Name); }
244
245	[r]? sq_string { TOK(Id::Str); }
246	[r]? dq_string { TOK(Id::Str); }
247
248	// optional raw prefix
249	[r]? triple_sq { TOK_MODE(Id::Str, py_mode_e::MultiSQ); }
250	[r]? triple_dq { TOK_MODE(Id::Str, py_mode_e::MultiDQ); }
251
252	pound_comment { TOK(Id::Comm); }
253
254	// Not the start of a string, comment, identifier
255	[^\x00"'#_a-zA-Z]+ { TOK(Id::Other); }
256
257	// e.g. unclosed quote like "foo
258	* { TOK(Id::Unknown); }
259
260	*/
261	}
262	break;
263
264	case py_mode_e::MultiSQ:
265	while (true) {
266	/*!re2c
267	nul { return true; }
268
269	triple_sq { TOK_MODE(Id::Str, py_mode_e::Outer); }
270
271	[^\x00']* { TOK(Id::Str); }
272
273	* { TOK(Id::Str); }
274
275	*/
276	}
277	break;
278
279	case py_mode_e::MultiDQ:
280	while (true) {
281	/*!re2c
282	nul { return true; }
283
284	triple_dq { TOK_MODE(Id::Str, py_mode_e::Outer); }
285
286	[^\x00"]* { TOK(Id::Str); }
287
288	* { TOK(Id::Str); }
289
290	*/
291	}
292	break;
293	}
294
295	tok->end_col = p - lexer->line_;
296	lexer->p_current = p;
297	return false;
298	}
299
300	enum class cpp_mode_e {
301	Outer, // default
302	Comm, // inside /* */ comment
303	DelimStr, // R"zz(string literal)zz"
304	Re2c, // /* !re2c
305	};
306
307	// Returns whether EOL was hit
308	template <>
309	bool Matcher<cpp_mode_e>::Match(Lexer<cpp_mode_e>* lexer, Token* tok) {
310	const char* p = lexer->p_current; // mutated by re2c
311	const char* YYMARKER = p;
312	const char s, e; // submatch extraction
313
314	// Autogenerated tag variables used by the lexer to track tag values.
315	/!stags:re2c format = 'const char @@;\n'; */
316
317	switch (lexer->line_mode) {
318	case cpp_mode_e::Outer:
319
320	while (true) {
321	/*!re2c
322	nul { return true; }
323
324	whitespace { TOK(Id::WS); }
325
326	"{" { TOK(Id::LBrace); }
327	"}" { TOK(Id::RBrace); }
328
329	identifier { TOK(Id::Name); }
330
331	// approximation for C++ char literals
332	sq_string { TOK(Id::Str); }
333	dq_string { TOK(Id::Str); }
334
335	// Not the start of a string, comment, identifier
336	[^\x00"'/_a-zA-Z{}]+ { TOK(Id::Other); }
337
338	"//" not_nul* { TOK(Id::Comm); }
339
340	// Treat re2c as preprocessor block
341	"/" "*!re2c" { TOK_MODE(Id::Re2c, cpp_mode_e::Re2c); }
342
343	"/" "*" { TOK_MODE(Id::Comm, cpp_mode_e::Comm); }
344
345	// Not sure what the rules are for R"zz(hello)zz". Make it similar to
346	// here docs.
347	cpp_delim_str = [_a-zA-Z]*;
348
349	"R" ["] @s cpp_delim_str @e "(" {
350	SUBMATCH(s, e);
351	TOK_MODE(Id::DelimStrBegin, cpp_mode_e::DelimStr);
352	}
353
354	// e.g. unclosed quote like "foo
355	* { TOK(Id::Unknown); }
356
357	*/
358	}
359	break;
360
361	case cpp_mode_e::Comm:
362	// Search until next */
363	while (true) {
364	/*!re2c
365	nul { return true; }
366
367	"*" "/" { TOK_MODE(Id::Comm, cpp_mode_e::Outer); }
368
369	[^\x00] { TOK(Id::Comm); }
370
371	* { TOK(Id::Comm); }
372
373	*/
374	}
375	break;
376
377	case cpp_mode_e::Re2c:
378	// Search until next */
379	while (true) {
380	/*!re2c
381	nul { return true; }
382
383	"*" "/" { TOK_MODE(Id::Re2c, cpp_mode_e::Outer); }
384
385	[^\x00] { TOK(Id::Re2c); }
386
387	* { TOK(Id::Re2c); }
388
389	*/
390	}
391	break;
392
393	case cpp_mode_e::DelimStr:
394	// Search until next */
395	while (true) {
396	/*!re2c
397	nul { return true; }
398
399	")" @s cpp_delim_str @e ["] {
400	SUBMATCH(s, e);
401	TOK(Id::DelimStrEnd);
402
403	// Caller is responsible for checking the extracted delimiter, and
404	// setting mode back to Cpp::Outer!
405	}
406
407	[^\x00)]* { TOK(Id::Str); }
408
409	* { TOK(Id::Str); }
410
411	*/
412	}
413	break;
414	}
415
416	tok->end_col = p - lexer->line_;
417	lexer->p_current = p;
418	return false;
419	}
420
421	class Hook {
422	public:
423	// Return true if this is a preprocessor line, and fill in tokens
424	// Caller should check last token for whether there is a continuation line.
425	virtual void TryPreprocess(char* line, std::vector<Token>* tokens) {
426	;
427	}
428	virtual ~Hook() {
429	}
430	};
431
432	enum class pp_mode_e {
433	Outer,
434	};
435
436	// Returns whether EOL was hit
437	template <>
438	bool Matcher<pp_mode_e>::Match(Lexer<pp_mode_e>* lexer, Token* tok) {
439	const char* p = lexer->p_current; // mutated by re2c
440	const char* YYMARKER = p;
441
442	switch (lexer->line_mode) {
443	case pp_mode_e::Outer:
444	while (true) {
445	/*!re2c
446	nul { return true; }
447
448	// Resolved in fix-up pass
449	// #include #define etc. only valid at the
450	// beginning
451	[ \t]* "#" [a-z]+ { TOK(Id::MaybePreproc); }
452
453	// C-style comments can end these lines
454	"//" not_nul* { TOK(Id::Comm); }
455
456	[\\] [\n] { TOK(Id::LineCont); }
457
458	// A line could be all whitespace, then \ at the
459	// end. And it's not significant
460	whitespace { TOK(Id::WS); }
461
462	// Not the start of a command, comment, or line
463	// continuation
464	[^\x00#/\\]+ { TOK(Id::PreprocOther); }
465
466	* { TOK(Id::PreprocOther); }
467
468	*/
469	}
470	break;
471	}
472
473	tok->end_col = p - lexer->line_;
474	lexer->p_current = p;
475	return false;
476	}
477
478	class CppHook : public Hook {
479	public:
480	virtual void TryPreprocess(char* line, std::vector<Token>* tokens);
481	};
482
483	enum class R_mode_e {
484	Outer, // default
485
486	SQ, // inside multi-line ''
487	DQ, // inside multi-line ""
488	};
489
490	// Returns whether EOL was hit
491	template <>
492	bool Matcher<R_mode_e>::Match(Lexer<R_mode_e>* lexer, Token* tok) {
493	const char* p = lexer->p_current; // mutated by re2c
494	const char* YYMARKER = p;
495
496	switch (lexer->line_mode) {
497	case R_mode_e::Outer:
498	while (true) {
499	/*!re2c
500	nul { return true; }
501
502	whitespace { TOK(Id::WS); }
503
504	pound_comment { TOK(Id::Comm); }
505
506	identifier { TOK(Id::Name); }
507
508	// Not the start of a string, escaped, comment, identifier
509	[^\x00"'#_a-zA-Z]+ { TOK(Id::Other); }
510
511	['] { TOK_MODE(Id::Str, R_mode_e::SQ); }
512	["] { TOK_MODE(Id::Str, R_mode_e::DQ); }
513
514	* { TOK(Id::Unknown); }
515
516	*/
517	}
518	break;
519
520	case R_mode_e::SQ:
521	while (true) {
522	/*!re2c
523	nul { return true; }
524
525	['] { TOK_MODE(Id::Str, R_mode_e::Outer); }
526
527	sq_middle { TOK(Id::Str); }
528
529	* { TOK(Id::Str); }
530
531	*/
532	}
533	break;
534
535	case R_mode_e::DQ:
536	while (true) {
537	/*!re2c
538	nul { return true; }
539
540	["] { TOK_MODE(Id::Str, R_mode_e::Outer); }
541
542	dq_middle { TOK(Id::Str); }
543
544	* { TOK(Id::Str); }
545
546	*/
547	}
548	break;
549	}
550
551	tok->end_col = p - lexer->line_;
552	lexer->p_current = p;
553	return false;
554	}
555
556	// Problem with shell: nested double quotes!!!
557	// We probably discourage this in YSH
558
559	enum class sh_mode_e {
560	Outer, // default
561
562	SQ, // inside multi-line ''
563	DollarSQ, // inside multi-line $''
564	DQ, // inside multi-line ""
565
566	// We could have a separate thing for this
567	YshSQ, // inside '''
568	YshDQ, // inside """
569	YshJ, // inside j"""
570	};
571
572	// Returns whether EOL was hit
573
574	// Submatch docs:
575	// https://re2c.org/manual/manual_c.html#submatch-extraction
576
577	template <>
578	bool Matcher<sh_mode_e>::Match(Lexer<sh_mode_e>* lexer, Token* tok) {
579	const char* p = lexer->p_current; // mutated by re2c
580	const char* YYMARKER = p;
581	const char s, e; // submatch extraction
582
583	// Autogenerated tag variables used by the lexer to track tag values.
584	/!stags:re2c format = 'const char @@;\n'; */
585
586	switch (lexer->line_mode) {
587	case sh_mode_e::Outer:
588	while (true) {
589	/*!re2c
590	nul { return true; }
591
592	whitespace { TOK(Id::WS); }
593
594	// Resolved in fix-up pass
595	pound_comment { TOK(Id::MaybeComment); }
596
597	// not that relevant for shell
598	identifier { TOK(Id::Name); }
599
600	// Not the start of a string, escaped, comment, identifier, here doc
601	[^\x00"'$#_a-zA-Z\\<]+ { TOK(Id::Other); }
602
603	// echo is like a string
604	"\\" . { TOK(Id::Str); }
605
606	['] { TOK_MODE(Id::Str, sh_mode_e::SQ); }
607	["] { TOK_MODE(Id::Str, sh_mode_e::DQ); }
608	"$'" { TOK_MODE(Id::Str, sh_mode_e::DollarSQ); }
609
610	// <<- is another syntax
611	here_op = "<<" [-]? [ \t]*;
612	h_delim = [_a-zA-Z][_a-zA-Z0-9]*;
613
614	// unquoted or quoted
615	here_op @s h_delim @e { SUBMATCH(s, e); TOK(Id::HereBegin); }
616	here_op ['] @s h_delim @e ['] { SUBMATCH(s, e); TOK(Id::HereBegin); }
617	here_op ["] @s h_delim @e ["] { SUBMATCH(s, e); TOK(Id::HereBegin); }
618	here_op "\\" @s h_delim @e { SUBMATCH(s, e); TOK(Id::HereBegin); }
619
620	// NOT Unknown, as in Python
621	* { TOK(Id::Other); }
622
623	*/
624	}
625	break;
626
627	case sh_mode_e::SQ:
628	// Search until next ' unconditionally
629	while (true) {
630	/*!re2c
631	nul { return true; }
632
633	['] { TOK_MODE(Id::Str, sh_mode_e::Outer); }
634
635	[^\x00']* { TOK(Id::Str); }
636
637	* { TOK(Id::Str); }
638
639	*/
640	}
641	break;
642
643	case sh_mode_e::DQ:
644	// Search until next " that's not preceded by "
645	while (true) {
646	/*!re2c
647	nul { return true; }
648
649	["] { TOK_MODE(Id::Str, sh_mode_e::Outer); }
650
651	dq_middle { TOK(Id::Str); }
652
653	* { TOK(Id::Str); }
654
655	*/
656	}
657	break;
658
659	case sh_mode_e::DollarSQ:
660	// Search until next ' that's not preceded by "
661	while (true) {
662	/*!re2c
663	nul { return true; }
664
665	['] { TOK_MODE(Id::Str, sh_mode_e::Outer); }
666
667	sq_middle { TOK(Id::Str); }
668
669	* { TOK(Id::Str); }
670
671	*/
672	}
673	break;
674	case sh_mode_e::YshSQ:
675	case sh_mode_e::YshDQ:
676	case sh_mode_e::YshJ:
677	assert(0);
678	}
679
680	tok->end_col = p - lexer->line_;
681	lexer->p_current = p;
682	return false;
683	}
684
685	enum class html_mode_e {
686	Outer, // <NAME enters the TAG state
687	AttrName, // NAME=" NAME=' NAME= NAME
688	AttrValue, // NAME=" NAME=' NAME=
689	SQ, // respects Chars, can contain "
690	DQ, // respects Chars, can contain '
691	};
692
693	// LeftStartTag -> RightStartTag <a href=/ >
694	// LeftStartTag -> SelfClose <br id=foo />
695
696	// Returns whether EOL was hit
697	template <>
698	bool Matcher<html_mode_e>::Match(Lexer<html_mode_e>* lexer, Token* tok) {
699	const char* p = lexer->p_current; // mutated by re2c
700	const char* YYMARKER = p;
701
702	/*!re2c
703	// Common definitions
704
705	// Like _NAME_RE in HTM8
706	name = [a-zA-Z][a-zA-Z0-9:_-]* ;
707
708	// TODO: check this pattern
709	char_name = '&' [a-zA-Z][a-zA-Z0-9]* ';' ;
710	char_dec = '&#' [0-9]+ ';' ;
711	char_hex = '&#x' [0-9a-fA-F]+ ';' ;
712	*/
713
714	switch (lexer->line_mode) {
715	case html_mode_e::Outer:
716	while (true) {
717	/*!re2c
718	// accepted EOF
719	nul { return true; }
720
721	char_name { TOK(Id::CharEscape); }
722	char_dec { TOK(Id::CharEscape); }
723	char_hex { TOK(Id::CharEscape); }
724
725	'&' { TOK(Id::BadAmpersand); }
726	'>' { TOK(Id::BadGreaterThan); }
727	'<' { TOK(Id::BadLessThan); }
728
729	'</' name '>' { TOK(Id::EndTag); }
730
731	'<' name {
732	TOK_MODE(Id::TagNameLeft, html_mode_e::AttrName);
733	// TODO: <script> <style> - special logic for strstr()
734	}
735
736	'<!' [^\x00>]* '>' { TOK(Id::Str); }
737
738	// TODO: use strstr() to end these?
739	// Problem: they all need their own mode, just like cpp_mode_e::Comm
740	// html_mode_e::{Comm,Processing,CData,Script,Style}
741	'<!--' { TOK(Id::Str); }
742	'<?' { TOK(Id::Str); }
743	'<![CDATA[' { TOK(Id::Str); }
744
745
746	// Like RawData
747	* { TOK(Id::Other); }
748
749	*/
750	}
751	break;
752	case html_mode_e::AttrName:
753	while (true) {
754	/*!re2c
755	nul { return true; } // TODO: error
756
757	'>' { TOK_MODE(Id::TagNameRight, html_mode_e::Outer); }
758	'/>' { TOK_MODE(Id::SelfClose, html_mode_e::Outer); }
759
760	space_required name {
761	// <a missing> - stay in the AttrName mode
762	TOK(Id::AttrName);
763	}
764
765	space_required name whitespace '=' whitespace {
766	// NAME= NAME=' NAME=" - expecting a value
767	TOK_MODE(Id::AttrName, html_mode_e::AttrValue);
768	}
769
770	* { TOK(Id::Unknown); }
771	*/
772	}
773	break;
774	case html_mode_e::AttrValue:
775	while (true) {
776	/*!re2c
777	nul { return true; } // TODO: error
778
779	'"' { TOK_MODE(Id::Str, html_mode_e::DQ); }
780	"'" { TOK_MODE(Id::Str, html_mode_e::SQ); }
781
782	// Unquoted value - a single token
783	unquoted_value = [^\x00 \r\n\t<>&"']+ ;
784
785	unquoted_value { TOK_MODE(Id::Str, html_mode_e::AttrName); }
786
787	* { TOK(Id::Unknown); }
788	*/
789	}
790	break;
791
792	case html_mode_e::DQ:
793	while (true) {
794	/*!re2c
795	nul { return true; } // TODO: error
796	char_name { TOK(Id::CharEscape); }
797	char_dec { TOK(Id::CharEscape); }
798	char_hex { TOK(Id::CharEscape); }
799
800	// we would only need these for translation to XML, not
801	// highlighting?
802	'&' { TOK(Id::BadAmpersand); }
803	'>' { TOK(Id::BadGreaterThan); }
804	'<' { TOK(Id::BadLessThan); }
805
806	'"' { TOK_MODE(Id::Str, html_mode_e::AttrName); }
807	* { TOK(Id::Str); }
808	*/
809	}
810	break;
811	case html_mode_e::SQ:
812	while (true) {
813	/*!re2c
814	nul { return true; } // TODO: error
815	char_name { TOK(Id::CharEscape); }
816	char_dec { TOK(Id::CharEscape); }
817	char_hex { TOK(Id::CharEscape); }
818
819	// we would only need these for translation to XML, not
820	// highlighting?
821	'&' { TOK(Id::BadAmpersand); }
822	'>' { TOK(Id::BadGreaterThan); }
823	'<' { TOK(Id::BadLessThan); }
824	"'" { TOK_MODE(Id::Str, html_mode_e::AttrName); }
825
826	* { TOK(Id::Str); }
827	*/
828	}
829	break;
830	default:
831	assert(0);
832	}
833
834	tok->end_col = p - lexer->line_;
835	lexer->p_current = p;
836	return false;
837	}
838
839	// TODO:
840	// - Lua / Rust-style multi-line strings, with matching delimiters e.g. r###"
841	// - same as C++ raw string, I think
842	// - similar to here docs, but less complex
843	//
844	// Inherent problems with "micro segmentation":
845	//
846	// - Nested double quotes in shell. echo "hi ${name:-"default"}"
847	// - This means that lexing is dependent on parsing: does the second
848	// double quote close the first one, or does it start a nested string?
849	// - lexing is non-recursive, parsing is recursive
850
851	// Shell Comments depend on operator chars
852	// echo one # comment
853	// echo $(( 16#ff ))'
854
855	#endif // MICRO_SYNTAX_H