Completely redesigned meta-arg syntax.
This commit is contained in:
parent
737ebc5afe
commit
579ee12924
152
proc2.c
152
proc2.c
|
@ -55,31 +55,31 @@
|
||||||
** comment character when it begins to scan the second line.
|
** comment character when it begins to scan the second line.
|
||||||
**
|
**
|
||||||
** Arguments are parsed from the second line as follows:
|
** Arguments are parsed from the second line as follows:
|
||||||
** Arguments are white-space separated. The only special character is \,
|
** - The only special chars are space, tab, newline, and \.
|
||||||
** the knock-down character. \nnn, for three octal digits n, reads as the
|
** - Every space char terminates an argument.
|
||||||
** char whose ASCII code is nnn. \n is newline. \ followed by anything else
|
** Multiple spaces therefore introduce empty-string arguments.
|
||||||
** is just that character -- including \, space, tab, and newline. It is an
|
** - A newline terminates the argument list, and will also terminate a
|
||||||
** error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
|
** non-empty argument (but a newline following a space does not introduce
|
||||||
** "3Q" -- it's an error.
|
** a final "" argument; it only terminates the argument list).
|
||||||
**
|
** - Tab is not allowed.
|
||||||
** The argument line is terminated by newline or end-of-file.
|
** This is to prevent you from being screwed by thinking you had several
|
||||||
|
** spaces where you really had a tab, and vice-versa.
|
||||||
|
** - The only other special character is \, the knock-down character.
|
||||||
|
** \ escapes \, space, tab, and newline, turning off their special
|
||||||
|
** functions. The ANSI C escape sequences, such as \n and \t are
|
||||||
|
** supported; these also produce argument-constituents -- \n doesn't act
|
||||||
|
** like a terminating newline. \nnn for *exactly* three octal digits reads
|
||||||
|
** as the char whose ASCII code is nnn. It is an error if \ is followed by
|
||||||
|
** just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
|
||||||
|
** constituent chars. \ followed by other chars is not allowed (so we can
|
||||||
|
** extend the escape-code space later if we like).
|
||||||
**
|
**
|
||||||
** Nul bytes & empty strings -- completeness at all costs:
|
** You have to construct these line-2 arg lines carefully. For example,
|
||||||
** Not that it is very useful, but how does one get empty arguments ("")
|
** beware of trailing spaces at the end of the line. They'll give you
|
||||||
** with this syntax? Well, ASCII nuls are taken to terminate arguments
|
** extra trailing empty-string args.
|
||||||
** -- this is a fairly deeply-embedded property of UNIX. Each nul
|
|
||||||
** encountered on the argument line immediately terminates the current
|
|
||||||
** argument. So, three nuls surrounded by whitespace produces 3 empty
|
|
||||||
** arguments in series. This nul termination happens after \nnn processing,
|
|
||||||
** so you can use a line like
|
|
||||||
** #!/bin/interpreter \
|
|
||||||
** foo \000bar \000\000baz\000 quux
|
|
||||||
** to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
|
|
||||||
** The rule is: a run of whitespace terminates an argument,
|
|
||||||
** but *each* individual nul terminates an argument.
|
|
||||||
**
|
**
|
||||||
** \ followed by a nul is an error (it's not possible to knock-down nul
|
** You should also beware of including nul bytes into your arguments, since
|
||||||
** in UNIX).
|
** C's pathetic excuse for a string data-type will lose if you try this.
|
||||||
**
|
**
|
||||||
**
|
**
|
||||||
** Another way to get this sort of multiple-argument functionality, with
|
** Another way to get this sort of multiple-argument functionality, with
|
||||||
|
@ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
|
||||||
return realloc(vec, len*elt_size);
|
return realloc(vec, len*elt_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */
|
/* The do ... while(0) is a trick to make this macro accept a terminating
|
||||||
|
** semicolon.
|
||||||
|
*/
|
||||||
#define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \
|
#define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \
|
||||||
{elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
|
do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
|
||||||
index, sizeof(elt_t)); \
|
index, sizeof(elt_t)); \
|
||||||
if(mgv_tmp) vec = mgv_tmp; else goto lose;}
|
if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0);
|
||||||
|
|
||||||
|
|
||||||
/* process_meta_arg(fname, av)
|
/* process_meta_arg(fname, av)
|
||||||
|
@ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
|
||||||
** argument following the \ switch, i.e., the <fname> argument.
|
** argument following the \ switch, i.e., the <fname> argument.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static char* read_arg(FILE*, int*);
|
static char* read_arg(FILE*);
|
||||||
|
|
||||||
char **process_meta_arg(char **av)
|
char **process_meta_arg(char **av)
|
||||||
{
|
{
|
||||||
char **argv, *arg, **ap;
|
char **argv, *arg, **ap;
|
||||||
|
int c;
|
||||||
FILE *script;
|
FILE *script;
|
||||||
int error_code; /* So ugly. */
|
|
||||||
char *fname;
|
char *fname;
|
||||||
int av_len;
|
int av_len;
|
||||||
int argv_i=0, argv_len=100;
|
int argv_i=0, argv_len=100;
|
||||||
|
@ -188,13 +190,15 @@ char **process_meta_arg(char **av)
|
||||||
argv = Malloc(char*, argv_len);
|
argv = Malloc(char*, argv_len);
|
||||||
if( !argv ) goto lose3;
|
if( !argv ) goto lose3;
|
||||||
|
|
||||||
while( (arg=read_arg(script, &error_code)) ) {
|
while( EOF != (c=getc(script)) && '\n' != c ) {
|
||||||
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1)
|
char *arg;
|
||||||
|
ungetc(c,script);
|
||||||
|
arg = read_arg(script);
|
||||||
|
if( !arg ) goto lose2;
|
||||||
|
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1);
|
||||||
argv[argv_i++] = arg;
|
argv[argv_i++] = arg;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( error_code ) goto lose2;
|
|
||||||
|
|
||||||
for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */
|
for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */
|
||||||
|
|
||||||
/* Precisely re-size argv. */
|
/* Precisely re-size argv. */
|
||||||
|
@ -218,64 +222,77 @@ char **process_meta_arg(char **av)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *read_arg(FILE *f, int *status_ptr)
|
/* Read in one arg and it's terminating space.
|
||||||
|
** If arg is terminated by a newline, leave the newline in
|
||||||
|
** the stream so the outer loop can see it. Return a newly-allocated
|
||||||
|
** string containing the arg; NULL if there's an error.
|
||||||
|
*/
|
||||||
|
static char *read_arg(FILE *f)
|
||||||
{
|
{
|
||||||
char *buf, *tmp;
|
char *buf, *tmp;
|
||||||
int buflen, i;
|
int buflen, i;
|
||||||
int c;
|
|
||||||
|
|
||||||
*status_ptr = 0;
|
|
||||||
|
|
||||||
/* Skip whitespace. */
|
|
||||||
while( EOF != (c=getc(f)) )
|
|
||||||
if( c=='\n' ) return NULL;
|
|
||||||
else if( !isspace(c) )
|
|
||||||
{ungetc(c,f); break;}
|
|
||||||
|
|
||||||
if( c == EOF ) return NULL;
|
|
||||||
|
|
||||||
/* Allocate a buffer for the arg. */
|
/* Allocate a buffer for the arg. */
|
||||||
i = 0;
|
i = 0;
|
||||||
buflen=20;
|
buflen=20;
|
||||||
if( !(buf = Malloc(char, buflen)) ) {
|
if( !(buf = Malloc(char, buflen)) ) return NULL;
|
||||||
*status_ptr = -1;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Read in the arg. */
|
/* Read in the arg. */
|
||||||
while( EOF != (c=getc(f)) && !isspace(c) ) {
|
while(1) {
|
||||||
|
int c = getc(f);
|
||||||
|
|
||||||
|
if( c == EOF || c == ' ' ) break;
|
||||||
|
if( c == '\n' ) {ungetc(c, f); break;}
|
||||||
|
|
||||||
/* Do knock-down processing. */
|
/* Do knock-down processing. */
|
||||||
if( c == '\\' ) {
|
if( c == '\\' ) {
|
||||||
int c1, c2, c3;
|
int c1, c2, c3;
|
||||||
if( EOF == (c1 = getc(f)) ) goto lose;
|
switch (c1=getc(f)) {
|
||||||
if( isodigit(c1) ) {
|
case EOF:
|
||||||
|
goto lose;
|
||||||
|
|
||||||
|
/* \nnn octal escape. */
|
||||||
|
case '0': case '1':
|
||||||
|
case '2': case '3':
|
||||||
|
case '4': case '5':
|
||||||
|
case '6': case '7':
|
||||||
if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose;
|
if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose;
|
||||||
if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose;
|
if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose;
|
||||||
c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0');
|
c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0');
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ANSI C escapes. */
|
||||||
|
case 'n': c='\n'; break;
|
||||||
|
case 'r': c='\r'; break;
|
||||||
|
case 't': c='\t'; break;
|
||||||
|
case 'b': c='\b'; break;
|
||||||
|
|
||||||
|
/* Simple knock-down: \, space, tab, newline. */
|
||||||
|
case '\\': case ' ':
|
||||||
|
case '\t': case '\n':
|
||||||
|
c=c1; break;
|
||||||
|
|
||||||
|
/* Nothing else allowed. */
|
||||||
|
default: goto lose;
|
||||||
}
|
}
|
||||||
else if( c1 == 'n' ) c='\n';
|
|
||||||
else c=c1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Maybe_Grow_Vec(buf, buflen, i, char, lose)
|
/* No tab allowed. */
|
||||||
|
else if( c == '\t' ) goto lose;
|
||||||
|
|
||||||
|
Maybe_Grow_Vec(buf, buflen, i, char, lose);
|
||||||
buf[i++] = c;
|
buf[i++] = c;
|
||||||
if( c == '\0' ) break; /* nul terminates args. */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */
|
/* Null terminate the arg. */
|
||||||
|
Maybe_Grow_Vec(buf, buflen, i, char, lose);
|
||||||
/* Null terminate the arg if it hasn't been done already. */
|
buf[i++] = '\0';
|
||||||
if( c != '\0' ) {
|
|
||||||
Maybe_Grow_Vec(buf, buflen, i, char, lose)
|
|
||||||
buf[i++] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Precisely re-size buf and return. */
|
/* Precisely re-size buf and return. */
|
||||||
if( tmp=Realloc(char,buf,i) ) return tmp;
|
if( tmp=Realloc(char,buf,i) ) return tmp;
|
||||||
|
|
||||||
lose:
|
lose:
|
||||||
Free(buf);
|
Free(buf);
|
||||||
*status_ptr = -1;
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -354,11 +371,8 @@ main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
args_done:
|
args_done:
|
||||||
if( *argv ) fputs(*argv++, stdout);
|
if( *argv ) printf("\"%s\"", *argv++);
|
||||||
while( *argv ) {
|
while( *argv ) printf(" \"%s\"", *argv++);
|
||||||
putchar(' ');
|
|
||||||
fputs(*argv++, stdout);
|
|
||||||
}
|
|
||||||
if( !n_flag ) putchar('\n');
|
if( !n_flag ) putchar('\n');
|
||||||
}
|
}
|
||||||
#endif /* 0 */
|
#endif /* 0 */
|
||||||
|
|
|
@ -5,32 +5,28 @@
|
||||||
|
|
||||||
;;; Syntax of the line 2 argument line:
|
;;; Syntax of the line 2 argument line:
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;; Arguments are white-space separated. The only special character is \,
|
;;; - The only special chars are space, tab, newline, and \.
|
||||||
;;; the knock-down character. \nnn, for three octal digits n, reads as the
|
;;; - Every space char terminates an argument.
|
||||||
;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else
|
;;; Multiple spaces therefore introduce empty-string arguments.
|
||||||
;;; is just that character -- including \, space, tab, and newline. It is an
|
;;; - A newline terminates the argument list, and will also terminate a
|
||||||
;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
|
;;; non-empty argument (but a newline following a space does not introduce
|
||||||
;;; "3Q" -- it's an error. A backslash-encoded char is always an argument
|
;;; a final "" argument; it only terminates the argument list).
|
||||||
;;; constituent unless it is the nul char (\000).
|
;;; - Tab is not allowed.
|
||||||
;;;
|
;;; This is to prevent you from being screwed by thinking you had several
|
||||||
;;; The argument line is terminated by newline or end-of-file.
|
;;; spaces where you really had a tab, and vice-versa.
|
||||||
;;;
|
;;; - The only other special character is \, the knock-down character.
|
||||||
;;; Nul bytes & empty strings -- completeness at all costs:
|
;;; \ escapes \, space, tab, and newline, turning off their special
|
||||||
;;; Not that it is very useful, but how does one get empty arguments ("")
|
;;; functions. The ANSI C escapes sequences, such as \n and \t are
|
||||||
;;; with this syntax? Well, ASCII nuls are taken to terminate arguments
|
;;; supported; these also produce argument-constituents -- \n doesn't act
|
||||||
;;; -- this is a fairly deeply-embedded property of UNIX. Each nul
|
;;; like a terminating newline. \nnn for *exactly* three octal digits reads
|
||||||
;;; encountered on the argument line immediately terminates the current
|
;;; as the char whose ASCII code is nnn. It is an error if \ is followed by
|
||||||
;;; argument. So, three nuls surrounded by whitespace produces 3 empty
|
;;; just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
|
||||||
;;; arguments in series. This nul termination happens after \nnn processing,
|
;;; constituent chars. \ followed by other chars is not allowed (so we can
|
||||||
;;; so you can use a line like
|
;;; extend the escape-code space later if we like).
|
||||||
;;; #!/bin/interpreter \
|
;;;
|
||||||
;;; foo \000bar \000\000baz\000 quux
|
;;; You have to construct these line-2 arg lines carefully. For example,
|
||||||
;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
|
;;; beware of trailing spaces at the end of the line. They'll give you
|
||||||
;;; The rule is: a run of whitespace terminates an argument,
|
;;; extra trailing empty-string args.
|
||||||
;;; but *each* individual nul terminates an argument.
|
|
||||||
;;;
|
|
||||||
;;; \ followed by a nul is an error (it's not possible to knock-down nul
|
|
||||||
;;; in UNIX).
|
|
||||||
|
|
||||||
;;; (meta-arg-process-arglist args)
|
;;; (meta-arg-process-arglist args)
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
@ -69,7 +65,6 @@
|
||||||
|
|
||||||
(define (read-secondary-args port)
|
(define (read-secondary-args port)
|
||||||
(let lp ((args '()))
|
(let lp ((args '()))
|
||||||
(skip-char-set char-set:meta-arg-separators port)
|
|
||||||
(let ((c (peek-char port)))
|
(let ((c (peek-char port)))
|
||||||
(if (or (eof-object? c) (char=? c #\newline))
|
(if (or (eof-object? c) (char=? c #\newline))
|
||||||
(reverse args)
|
(reverse args)
|
||||||
|
@ -81,32 +76,38 @@
|
||||||
(define (read-secondary-arg port)
|
(define (read-secondary-arg port)
|
||||||
(let lp ((chars '()))
|
(let lp ((chars '()))
|
||||||
(let ((c (peek-char port)))
|
(let ((c (peek-char port)))
|
||||||
(cond ((or (eof-object? c)
|
(cond ((or (eof-object? c) (char=? c #\newline))
|
||||||
(char-set-contains? char-set:whitespace c))
|
|
||||||
(apply string (reverse chars))) ; Leave C in stream.
|
|
||||||
|
|
||||||
((char=? c ascii/nul)
|
|
||||||
(read-char port) ; Consume C.
|
|
||||||
(apply string (reverse chars)))
|
(apply string (reverse chars)))
|
||||||
|
|
||||||
((char=? c #\\)
|
((char=? c #\space)
|
||||||
(read-char port)
|
(read-char port)
|
||||||
(let ((c (read-backslash-sequence port)))
|
(apply string (reverse chars)))
|
||||||
(if (char=? c ascii/nul)
|
|
||||||
(apply string (reverse chars))
|
((char=? c tab)
|
||||||
(lp (cons c chars)))))
|
(error "Illegal tab character in meta-arg argument line."))
|
||||||
|
|
||||||
|
(else (lp (cons ((cond ((char=? c #\\)
|
||||||
|
(read-char port)
|
||||||
|
read-backslash-sequence)
|
||||||
|
(else read-char))
|
||||||
|
port)
|
||||||
|
chars)))))))
|
||||||
|
|
||||||
(else (lp (cons (read-char port) chars)))))))
|
|
||||||
|
|
||||||
(define (read-backslash-sequence port)
|
(define (read-backslash-sequence port)
|
||||||
(let ((c1 (read-char port))
|
(let ((c1 (read-char port))
|
||||||
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg"))))
|
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line"))))
|
||||||
(cond ((eof-object? c1) (eof-lose))
|
(cond ((eof-object? c1) (eof-lose))
|
||||||
|
|
||||||
|
;; This would be better handled by a char-map abstraction.
|
||||||
((char=? c1 #\n) #\newline)
|
((char=? c1 #\n) #\newline)
|
||||||
|
((char=? c1 #\r) carriage-return)
|
||||||
|
((char=? c1 #\t) tab)
|
||||||
|
((char=? c1 #\b) backspace)
|
||||||
|
;; ...whatever. Look up complete table.
|
||||||
|
|
||||||
((char=? c1 ascii/nul)
|
;; \, space, tab, newline.
|
||||||
(error "Cannot backslash nul byte in meta-arg"))
|
((char-set-contains? char-set:simple-knockdown c1) c1)
|
||||||
|
|
||||||
((char-set-contains? char-set:octal-digits c1)
|
((char-set-contains? char-set:octal-digits c1)
|
||||||
(let ((c2 (read-char port)))
|
(let ((c2 (read-char port)))
|
||||||
|
@ -117,13 +118,17 @@
|
||||||
(* 8 (+ (octet->int c2)
|
(* 8 (+ (octet->int c2)
|
||||||
(* 8 (octet->int c1)))))))))))
|
(* 8 (octet->int c1)))))))))))
|
||||||
|
|
||||||
|
|
||||||
(else c1))))
|
(else (error "Illegal \\ escape sequence in meta-arg argument line."
|
||||||
|
c1)))))
|
||||||
|
|
||||||
(define (octet->int c) (- (char->ascii c) (char->ascii #\0)))
|
(define (octet->int c) (- (char->ascii c) (char->ascii #\0)))
|
||||||
|
|
||||||
(define ascii/nul (ascii->char 0))
|
|
||||||
|
|
||||||
(define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))
|
(define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))
|
||||||
|
|
||||||
(define char-set:meta-arg-separators (string->char-set " \t"))
|
(define char-set:simple-knockdown (string->char-set "\\ \n\t"))
|
||||||
|
|
||||||
|
;;; Yechh.
|
||||||
|
(define tab (ascii->char 9))
|
||||||
|
(define carriage-return (ascii->char 13))
|
||||||
|
(define backspace (ascii->char 8))
|
||||||
|
|
Loading…
Reference in New Issue