summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurent Bercot <ska-skaware@skarnet.org>2022-05-26 12:46:37 +0000
committerLaurent Bercot <ska@appnovation.com>2022-05-26 12:46:37 +0000
commit7b631e14a70f6c2e8dcabd2713422bc585af2703 (patch)
tree3b5d8592acc340a5d79bb6dbeaed4b719662d689
parentdb5bf6b28e46da0305d13383201e3dfbf2e80178 (diff)
downloadexecline-7b631e14a70f6c2e8dcabd2713422bc585af2703.tar.xz
Rewrite el_parse.c, document the transition table
Signed-off-by: Laurent Bercot <ska@appnovation.com>
-rw-r--r--doc/execlineb.html8
-rw-r--r--src/libexecline/PARSING.txt99
-rw-r--r--src/libexecline/el_parse.c95
3 files changed, 157 insertions, 45 deletions
diff --git a/doc/execlineb.html b/doc/execlineb.html
index fe4f0d3..f9d3e4d 100644
--- a/doc/execlineb.html
+++ b/doc/execlineb.html
@@ -133,10 +133,11 @@ newlines disappear completely. </li>
<li> <tt>\0x<em>ab</em></tt> sequences are recognized in quoted strings
and evaluate to ASCII hexadecimal number <em>ab</em>. </li>
<li> <tt>\0<em>abc</em></tt> sequences are recognized in quoted strings
-and evaluate to ASCII octal number <em>abc</em>. </li>
+and evaluate to ASCII octal number <em>abc</em>. <em>abc</em> must not
+be greater than <em>377</em>, or evaluate to 0. </li>
<li> <tt>\<em>abc</em></tt> sequences are recognized in quoted strings
and evaluate to ASCII decimal number <em>abc</em>. <em>a</em> must not
-be zero. </li>
+be zero. <em>abc</em> must not be greater than 255, or evaluate to 0. </li>
<li> A comment starts with a <tt>#</tt> and ends with the line. Comments
are not recognized inside quoted strings. </li>
<li> Anything else is an unquoted string, that can evaluate to
@@ -144,6 +145,9 @@ zero or more words. </li>
<li> Any character can be escaped in unquoted strings by prepending
it with a backslash. It works the same way in quoted strings, except
for the special sequences described above. </li>
+ <li> As a special case, an unquoted backslash at the end of a line, or at
+the end of the input, is ignored. This is to make it easier to copy
+execline fragments from a shell script. </li>
</ul>
<p>
diff --git a/src/libexecline/PARSING.txt b/src/libexecline/PARSING.txt
new file mode 100644
index 0000000..b84c0ff
--- /dev/null
+++ b/src/libexecline/PARSING.txt
@@ -0,0 +1,99 @@
+el_parse.c:
+
+class | 0 1 2 3 4 5 6 7 8 9 a b c d e f
+st\ev | \0 space # " newline \ normal abf 1-7 8-9 0 nrtv x A-Fcde { }
+
+START | n n p n p n p n p n p n p n p n p n p n p
+00 | END START COMMENT Q START Q1 W W W W W W W W OPENB CLOSEB
+
+COMMENT |
+01 | END COMMENT COMMENT COMMENT START COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT
+
+OPENB | { p { p p p p p p p p p p
+02 | X START W Q START Q1 W W W W W W W W W W
+
+CLOSEB | } 0 } 0 p } 0 p p p p p p p p p p
+03 | END START W Q START Q1 W W W W W W W W W W
+
+W | 0 0 p 0 p p p p p p p p p p
+04 | END START W Q START Q2 W W W W W W W W W W
+
+Q1 | n p n p n p n p n p n p n p n p n p n p n p n p n p n p
+05 | END W W W START W W W W W W W W W W W
+
+Q2 | p p p p p p p p p p p p p p p
+06 | X W W W W W W W W W W W W W W W
+
+Q3 | p p p p p c m p b m p b b c p p p p
+07 | X Q Q Q Q Q Q Q DEC1 DEC1 OCT Q Q Q Q Q
+
+Q | p p p p p p p p p p p p p
+08 | X Q Q W Q Q3 Q Q Q Q Q Q Q Q Q Q
+
+OCT | m p m p b
+09 | X X X X X X X X OCT1 X OCT1 X HEX X X X
+
+OCT1 | s 0 s p s p s s p s s p s p p s p p s p s p s p s p s p
+0a | END Q Q W Q Q3 Q Q OCT2 Q OCT2 Q Q Q Q Q
+
+OCT2 | s 0 s p s p s s p s s p s p p s p p s p s p s p s p s p
+0b | END Q Q W Q Q3 Q Q ENDNUM Q ENDNUM Q Q Q Q Q
+
+DEC1 | s 0 s p s p s s p s s p s p p p p s p s p s p s p s p
+0c | END Q Q W Q Q3 Q Q DEC2 DEC2 DEC2 Q Q Q Q Q
+
+DEC2 | s 0 s p s p s s p s s p s p p p p s p s p s p s p s p
+0d | END Q Q W Q Q3 Q Q ENDNUM ENDNUM ENDNUM Q Q Q Q Q
+
+HEX | m p m p m p m p m p
+0e | X X X X X X X HEX1 HEX1 HEX1 HEX1 X X HEX1 X X
+
+HEX1 | s 0 s p s p s s p s s p p p p p s p s p p s p s p
+0f | END Q Q W Q Q3 Q ENDNUM ENDNUM ENDNUM ENDNUM Q Q ENDNUM Q Q
+
+ENDNUM | s 0 s p s p s s p s s p s p s p s p s p s p s p s p s p s p
+10 | END Q Q W Q Q3 Q Q Q Q Q Q Q Q Q Q
+
+END
+11
+
+X
+12
+
+
+States
+
+START: in whitespace; initial state
+COMMENT: in a comment line
+OPENB: after a raw {
+CLOSEB: after a raw }
+W: in an unquoted word
+Q1: after a backslash in whitespace
+Q2: after a backslash in an unquoted word
+Q3: after a backslash in a quoted string
+Q: in a quoted string
+OCT: after \0 in a quoted string
+OCT1: after \0a in a quoted string
+OCT2: after \0ab in a quoted string
+DEC1: after \a in a quoted string
+DEC2: after \ab in a quoted string
+HEX: after \0x in a quoted string
+HEX1: after \0xa in a quoted string
+ENDNUM: after \0abc, \abc or \0xab in a quoted string
+END: success
+X: syntax error
+
+
+Actions
+
+8000 s scan integer from mark to cur
+4000 m set mark
+2000 n add blevel spaces
+1000 { inc blevel + unpush blevel
+0800 } dec blevel + unpush 2
+0400 p add cur to word
+0200 c add control char (depending on cur) to word
+0100 0 end word
+0080 b switch base according to cur
+0040 unused
+0020 unused
diff --git a/src/libexecline/el_parse.c b/src/libexecline/el_parse.c
index b45b776..6ef7ce9 100644
--- a/src/libexecline/el_parse.c
+++ b/src/libexecline/el_parse.c
@@ -1,88 +1,97 @@
/* ISC license. */
-#include <sys/types.h>
+#include <stddef.h>
#include <stdint.h>
-#include <skalibs/types.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <skalibs/uint64.h>
#include <skalibs/bytestr.h>
#include <skalibs/stralloc.h>
-#include <skalibs/djbunix.h>
+
#include <execline/execline.h>
int el_parse (stralloc *sa, el_chargen_func_ref next, void *source)
{
static unsigned char const class[256] = "`aaaaaaaaadaaaaaaaaaaaaaaaaaaaaaafcbffffffffffffjhhhhhhhiifffffffmmmmmmfffffffffffffffffffffeffffggmmmgfffffffkfffkfkfkflffnfoffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" ;
- static uint16_t const table[16][16] =
+ static uint16_t const table[17][16] =
{
- { 0x0011, 0x4011, 0x0010, 0x0010, 0x0010, 0x0011, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x4091 },
- { 0x0000, 0x4000, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x0100, 0x4080 },
- { 0x0005, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
- { 0x0203, 0x0003, 0x8001, 0x0001, 0x8003, 0x0005, 0x0010, 0x0401, 0x0401, 0x0401, 0x0401, 0x0010, 0x0401, 0x0401, 0x0003, 0x0003 },
- { 0x0000, 0x4000, 0x8001, 0x8003, 0x0003, 0x0000, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x0100, 0x4080 },
- { 0x0202, 0x0002, 0x8001, 0x0004, 0x8003, 0x0005, 0x0010, 0x0404, 0x0404, 0x0404, 0x0404, 0x0010, 0x0404, 0x0404, 0x0002, 0x0002 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x2003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x9809, 0x0005, 0x8807, 0x8008, 0x800d, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x9809, 0x0005, 0x0010, 0x8403, 0x8403, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x1006, 0x0005, 0x8807, 0x8008, 0x800d, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x2003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x100b, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
- { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
- { 0x820e, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
- { 0x820f, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }
+ { 0x0011, 0x0000, 0x0001, 0x2008, 0x0000, 0x0005, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2402, 0x2403 },
+ { 0x0011, 0x0001, 0x0001, 0x0001, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 },
+ { 0x0012, 0x1000, 0x0404, 0x0008, 0x1000, 0x0005, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+ { 0x0911, 0x0900, 0x0404, 0x0008, 0x0900, 0x0005, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+ { 0x0111, 0x0100, 0x0404, 0x0008, 0x0100, 0x0006, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+ { 0x0011, 0x2404, 0x2404, 0x2404, 0x0000, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404 },
+ { 0x0012, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+ { 0x0012, 0x0408, 0x0408, 0x0408, 0x0008, 0x0408, 0x0408, 0x0208, 0x448c, 0x448c, 0x0089, 0x0208, 0x0408, 0x0408, 0x0408, 0x0408 },
+ { 0x0012, 0x0408, 0x0408, 0x0004, 0x0408, 0x0007, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408 },
+ { 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x440a, 0x0012, 0x440a, 0x0012, 0x008e, 0x0012, 0x0012, 0x0012 },
+ { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x040b, 0x8408, 0x040b, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+ { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x0410, 0x8408, 0x0410, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+ { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x040d, 0x040d, 0x040d, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+ { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x0410, 0x0410, 0x0410, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+ { 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x440f, 0x440f, 0x440f, 0x440f, 0x0012, 0x0012, 0x440f, 0x0012, 0x0012 },
+ { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x0410, 0x0410, 0x0410, 0x0410, 0x8408, 0x8408, 0x0410, 0x8408, 0x8408 },
+ { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 }
} ;
size_t mark = 0 ;
- int n = 0 ;
- unsigned int blevel = 0 ;
- unsigned char state = 0, base = 10 ;
+ unsigned int n = 0, blevel = 0 ;
+ uint8_t state = 0, base = 10 ;
- while (state < 0x10)
+ while (state < 0x11)
{
uint16_t c ;
unsigned char cur ;
if (!(*next)(&cur, source)) return -1 ;
- c = table[class[cur]-'`'][state] ;
+ c = table[state][class[cur]-'`'] ;
state = c & 0x1F ;
- if (c & 0x0400)
+ if (c & 0x8000U)
{
- unsigned int z ;
+ uint64_t u ;
if (!stralloc_0(sa)) return -1 ;
sa->len = mark ;
- uint_scan_base(sa->s + sa->len, &z, base) ;
- sa->s[sa->len++] = (unsigned char)z ;
+ uint64_scan_base(sa->s + sa->len, &u, base) ;
+ if (!u || u > 0xff) return -2 ;
+ sa->s[sa->len++] = (unsigned char)u ;
}
- if (c & 0x0800) mark = sa->len ;
- if (c & 0x0200)
+ if (c & 0x4000U) mark = sa->len ;
+ if (c & 0x2000U)
{
- char tilde = EXECLINE_BLOCK_QUOTE_CHAR ;
unsigned int i = blevel ;
- if (!stralloc_readyplus(sa, i<<1)) return -1 ;
- while (i--) stralloc_catb(sa, &tilde, 1) ;
+ if (!stralloc_readyplus(sa, i<<2)) return -1 ;
+ while (i--) sa->s[sa->len++] = ' ' ;
}
- if (c & 0x0100) sa->len -= ++blevel ;
- if (c & 0x0080)
+ if (c & 0x1000U) sa->len -= ++blevel ;
+ if (c & 0x0800U)
{
if (!blevel--) return -4 ;
- sa->s[--sa->len-1] = EXECLINE_BLOCK_END_CHAR ;
- if (!EXECLINE_BLOCK_END_CHAR) sa->len-- ;
+ sa->len -= 2 ;
}
- if (c & 0x8000) if (!stralloc_catb(sa, (char *)&cur, 1)) return -1 ;
- if (c & 0x2000)
+ if (c & 0x0400) if (!stralloc_catb(sa, (char *)&cur, 1)) return -1 ;
+ if (c & 0x0200)
{
char x = 7 + byte_chr("abtnvfr", 7, cur) ;
if (!stralloc_catb(sa, &x, 1)) return -1 ;
}
- if (c & 0x4000) if (n++, !stralloc_0(sa)) return -1 ;
- if (c & 0x1000)
+ if (c & 0x0100)
+ {
+ if (n++ >= INT_MAX) return (errno = E2BIG, -1) ;
+ if (!stralloc_0(sa)) return -1 ;
+ }
+ if (c & 0x0080)
+ {
switch (cur)
{
case 'x' : base = 16 ; break ;
case '0' : base = 8 ; break ;
default : base = 10 ;
}
+ }
}
- if (state == 0x10) return -2 ;
+
+ if (state > 0x11) return -2 ;
if (blevel) return -3 ;
return n ;
}