Completely redesigned meta-arg syntax.
This commit is contained in:
parent
737ebc5afe
commit
579ee12924
152
proc2.c
152
proc2.c
|
@ -55,31 +55,31 @@
|
|||
** comment character when it begins to scan the second line.
|
||||
**
|
||||
** Arguments are parsed from the second line as follows:
|
||||
** Arguments are white-space separated. The only special character is \,
|
||||
** the knock-down character. \nnn, for three octal digits n, reads as the
|
||||
** char whose ASCII code is nnn. \n is newline. \ followed by anything else
|
||||
** is just that character -- including \, space, tab, and newline. It is an
|
||||
** error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
|
||||
** "3Q" -- it's an error.
|
||||
**
|
||||
** The argument line is terminated by newline or end-of-file.
|
||||
** - The only special chars are space, tab, newline, and \.
|
||||
** - Every space char terminates an argument.
|
||||
** Multiple spaces therefore introduce empty-string arguments.
|
||||
** - A newline terminates the argument list, and will also terminate a
|
||||
** non-empty argument (but a newline following a space does not introduce
|
||||
** a final "" argument; it only terminates the argument list).
|
||||
** - Tab is not allowed.
|
||||
** This is to prevent you from being screwed by thinking you had several
|
||||
** spaces where you really had a tab, and vice-versa.
|
||||
** - The only other special character is \, the knock-down character.
|
||||
** \ escapes \, space, tab, and newline, turning off their special
|
||||
** functions. The ANSI C escape sequences, such as \n and \t are
|
||||
** supported; these also produce argument-constituents -- \n doesn't act
|
||||
** like a terminating newline. \nnn for *exactly* three octal digits reads
|
||||
** as the char whose ASCII code is nnn. It is an error if \ is followed by
|
||||
** just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
|
||||
** constituent chars. \ followed by other chars is not allowed (so we can
|
||||
** extend the escape-code space later if we like).
|
||||
**
|
||||
** Nul bytes & empty strings -- completeness at all costs:
|
||||
** Not that it is very useful, but how does one get empty arguments ("")
|
||||
** with this syntax? Well, ASCII nuls are taken to terminate arguments
|
||||
** -- this is a fairly deeply-embedded property of UNIX. Each nul
|
||||
** encountered on the argument line immediately terminates the current
|
||||
** argument. So, three nuls surrounded by whitespace produces 3 empty
|
||||
** arguments in series. This nul termination happens after \nnn processing,
|
||||
** so you can use a line like
|
||||
** #!/bin/interpreter \
|
||||
** foo \000bar \000\000baz\000 quux
|
||||
** to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
|
||||
** The rule is: a run of whitespace terminates an argument,
|
||||
** but *each* individual nul terminates an argument.
|
||||
** You have to construct these line-2 arg lines carefully. For example,
|
||||
** beware of trailing spaces at the end of the line. They'll give you
|
||||
** extra trailing empty-string args.
|
||||
**
|
||||
** \ followed by a nul is an error (it's not possible to knock-down nul
|
||||
** in UNIX).
|
||||
** You should also beware of including nul bytes into your arguments, since
|
||||
** C's pathetic excuse for a string data-type will lose if you try this.
|
||||
**
|
||||
**
|
||||
** Another way to get this sort of multiple-argument functionality, with
|
||||
|
@ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
|
|||
return realloc(vec, len*elt_size);
|
||||
}
|
||||
|
||||
/* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */
|
||||
/* The do ... while(0) is a trick to make this macro accept a terminating
|
||||
** semicolon.
|
||||
*/
|
||||
#define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \
|
||||
{elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
|
||||
index, sizeof(elt_t)); \
|
||||
if(mgv_tmp) vec = mgv_tmp; else goto lose;}
|
||||
do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
|
||||
index, sizeof(elt_t)); \
|
||||
if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0);
|
||||
|
||||
|
||||
/* process_meta_arg(fname, av)
|
||||
|
@ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
|
|||
** argument following the \ switch, i.e., the <fname> argument.
|
||||
*/
|
||||
|
||||
static char* read_arg(FILE*, int*);
|
||||
static char* read_arg(FILE*);
|
||||
|
||||
char **process_meta_arg(char **av)
|
||||
{
|
||||
char **argv, *arg, **ap;
|
||||
int c;
|
||||
FILE *script;
|
||||
int error_code; /* So ugly. */
|
||||
char *fname;
|
||||
int av_len;
|
||||
int argv_i=0, argv_len=100;
|
||||
|
@ -188,13 +190,15 @@ char **process_meta_arg(char **av)
|
|||
argv = Malloc(char*, argv_len);
|
||||
if( !argv ) goto lose3;
|
||||
|
||||
while( (arg=read_arg(script, &error_code)) ) {
|
||||
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1)
|
||||
while( EOF != (c=getc(script)) && '\n' != c ) {
|
||||
char *arg;
|
||||
ungetc(c,script);
|
||||
arg = read_arg(script);
|
||||
if( !arg ) goto lose2;
|
||||
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1);
|
||||
argv[argv_i++] = arg;
|
||||
}
|
||||
|
||||
if( error_code ) goto lose2;
|
||||
|
||||
for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */
|
||||
|
||||
/* Precisely re-size argv. */
|
||||
|
@ -218,64 +222,77 @@ char **process_meta_arg(char **av)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static char *read_arg(FILE *f, int *status_ptr)
|
||||
/* Read in one arg and it's terminating space.
|
||||
** If arg is terminated by a newline, leave the newline in
|
||||
** the stream so the outer loop can see it. Return a newly-allocated
|
||||
** string containing the arg; NULL if there's an error.
|
||||
*/
|
||||
static char *read_arg(FILE *f)
|
||||
{
|
||||
char *buf, *tmp;
|
||||
int buflen, i;
|
||||
int c;
|
||||
|
||||
*status_ptr = 0;
|
||||
|
||||
/* Skip whitespace. */
|
||||
while( EOF != (c=getc(f)) )
|
||||
if( c=='\n' ) return NULL;
|
||||
else if( !isspace(c) )
|
||||
{ungetc(c,f); break;}
|
||||
|
||||
if( c == EOF ) return NULL;
|
||||
|
||||
/* Allocate a buffer for the arg. */
|
||||
i = 0;
|
||||
buflen=20;
|
||||
if( !(buf = Malloc(char, buflen)) ) {
|
||||
*status_ptr = -1;
|
||||
return NULL;
|
||||
}
|
||||
if( !(buf = Malloc(char, buflen)) ) return NULL;
|
||||
|
||||
/* Read in the arg. */
|
||||
while( EOF != (c=getc(f)) && !isspace(c) ) {
|
||||
while(1) {
|
||||
int c = getc(f);
|
||||
|
||||
if( c == EOF || c == ' ' ) break;
|
||||
if( c == '\n' ) {ungetc(c, f); break;}
|
||||
|
||||
/* Do knock-down processing. */
|
||||
if( c == '\\' ) {
|
||||
int c1, c2, c3;
|
||||
if( EOF == (c1 = getc(f)) ) goto lose;
|
||||
if( isodigit(c1) ) {
|
||||
switch (c1=getc(f)) {
|
||||
case EOF:
|
||||
goto lose;
|
||||
|
||||
/* \nnn octal escape. */
|
||||
case '0': case '1':
|
||||
case '2': case '3':
|
||||
case '4': case '5':
|
||||
case '6': case '7':
|
||||
if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose;
|
||||
if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose;
|
||||
c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0');
|
||||
break;
|
||||
|
||||
/* ANSI C escapes. */
|
||||
case 'n': c='\n'; break;
|
||||
case 'r': c='\r'; break;
|
||||
case 't': c='\t'; break;
|
||||
case 'b': c='\b'; break;
|
||||
|
||||
/* Simple knock-down: \, space, tab, newline. */
|
||||
case '\\': case ' ':
|
||||
case '\t': case '\n':
|
||||
c=c1; break;
|
||||
|
||||
/* Nothing else allowed. */
|
||||
default: goto lose;
|
||||
}
|
||||
else if( c1 == 'n' ) c='\n';
|
||||
else c=c1;
|
||||
}
|
||||
|
||||
Maybe_Grow_Vec(buf, buflen, i, char, lose)
|
||||
/* No tab allowed. */
|
||||
else if( c == '\t' ) goto lose;
|
||||
|
||||
Maybe_Grow_Vec(buf, buflen, i, char, lose);
|
||||
buf[i++] = c;
|
||||
if( c == '\0' ) break; /* nul terminates args. */
|
||||
}
|
||||
|
||||
if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */
|
||||
|
||||
/* Null terminate the arg if it hasn't been done already. */
|
||||
if( c != '\0' ) {
|
||||
Maybe_Grow_Vec(buf, buflen, i, char, lose)
|
||||
buf[i++] = '\0';
|
||||
}
|
||||
/* Null terminate the arg. */
|
||||
Maybe_Grow_Vec(buf, buflen, i, char, lose);
|
||||
buf[i++] = '\0';
|
||||
|
||||
/* Precisely re-size buf and return. */
|
||||
if( tmp=Realloc(char,buf,i) ) return tmp;
|
||||
|
||||
lose:
|
||||
Free(buf);
|
||||
*status_ptr = -1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -354,11 +371,8 @@ main(int argc, char **argv)
|
|||
}
|
||||
|
||||
args_done:
|
||||
if( *argv ) fputs(*argv++, stdout);
|
||||
while( *argv ) {
|
||||
putchar(' ');
|
||||
fputs(*argv++, stdout);
|
||||
}
|
||||
if( *argv ) printf("\"%s\"", *argv++);
|
||||
while( *argv ) printf(" \"%s\"", *argv++);
|
||||
if( !n_flag ) putchar('\n');
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
|
|
@ -5,32 +5,28 @@
|
|||
|
||||
;;; Syntax of the line 2 argument line:
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Arguments are white-space separated. The only special character is \,
|
||||
;;; the knock-down character. \nnn, for three octal digits n, reads as the
|
||||
;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else
|
||||
;;; is just that character -- including \, space, tab, and newline. It is an
|
||||
;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
|
||||
;;; "3Q" -- it's an error. A backslash-encoded char is always an argument
|
||||
;;; constituent unless it is the nul char (\000).
|
||||
;;;
|
||||
;;; The argument line is terminated by newline or end-of-file.
|
||||
;;;
|
||||
;;; Nul bytes & empty strings -- completeness at all costs:
|
||||
;;; Not that it is very useful, but how does one get empty arguments ("")
|
||||
;;; with this syntax? Well, ASCII nuls are taken to terminate arguments
|
||||
;;; -- this is a fairly deeply-embedded property of UNIX. Each nul
|
||||
;;; encountered on the argument line immediately terminates the current
|
||||
;;; argument. So, three nuls surrounded by whitespace produces 3 empty
|
||||
;;; arguments in series. This nul termination happens after \nnn processing,
|
||||
;;; so you can use a line like
|
||||
;;; #!/bin/interpreter \
|
||||
;;; foo \000bar \000\000baz\000 quux
|
||||
;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
|
||||
;;; The rule is: a run of whitespace terminates an argument,
|
||||
;;; but *each* individual nul terminates an argument.
|
||||
;;;
|
||||
;;; \ followed by a nul is an error (it's not possible to knock-down nul
|
||||
;;; in UNIX).
|
||||
;;; - The only special chars are space, tab, newline, and \.
|
||||
;;; - Every space char terminates an argument.
|
||||
;;; Multiple spaces therefore introduce empty-string arguments.
|
||||
;;; - A newline terminates the argument list, and will also terminate a
|
||||
;;; non-empty argument (but a newline following a space does not introduce
|
||||
;;; a final "" argument; it only terminates the argument list).
|
||||
;;; - Tab is not allowed.
|
||||
;;; This is to prevent you from being screwed by thinking you had several
|
||||
;;; spaces where you really had a tab, and vice-versa.
|
||||
;;; - The only other special character is \, the knock-down character.
|
||||
;;; \ escapes \, space, tab, and newline, turning off their special
|
||||
;;; functions. The ANSI C escapes sequences, such as \n and \t are
|
||||
;;; supported; these also produce argument-constituents -- \n doesn't act
|
||||
;;; like a terminating newline. \nnn for *exactly* three octal digits reads
|
||||
;;; as the char whose ASCII code is nnn. It is an error if \ is followed by
|
||||
;;; just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
|
||||
;;; constituent chars. \ followed by other chars is not allowed (so we can
|
||||
;;; extend the escape-code space later if we like).
|
||||
;;;
|
||||
;;; You have to construct these line-2 arg lines carefully. For example,
|
||||
;;; beware of trailing spaces at the end of the line. They'll give you
|
||||
;;; extra trailing empty-string args.
|
||||
|
||||
;;; (meta-arg-process-arglist args)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
@ -69,7 +65,6 @@
|
|||
|
||||
(define (read-secondary-args port)
|
||||
(let lp ((args '()))
|
||||
(skip-char-set char-set:meta-arg-separators port)
|
||||
(let ((c (peek-char port)))
|
||||
(if (or (eof-object? c) (char=? c #\newline))
|
||||
(reverse args)
|
||||
|
@ -81,32 +76,38 @@
|
|||
(define (read-secondary-arg port)
|
||||
(let lp ((chars '()))
|
||||
(let ((c (peek-char port)))
|
||||
(cond ((or (eof-object? c)
|
||||
(char-set-contains? char-set:whitespace c))
|
||||
(apply string (reverse chars))) ; Leave C in stream.
|
||||
|
||||
((char=? c ascii/nul)
|
||||
(read-char port) ; Consume C.
|
||||
(cond ((or (eof-object? c) (char=? c #\newline))
|
||||
(apply string (reverse chars)))
|
||||
|
||||
((char=? c #\\)
|
||||
((char=? c #\space)
|
||||
(read-char port)
|
||||
(let ((c (read-backslash-sequence port)))
|
||||
(if (char=? c ascii/nul)
|
||||
(apply string (reverse chars))
|
||||
(lp (cons c chars)))))
|
||||
(apply string (reverse chars)))
|
||||
|
||||
((char=? c tab)
|
||||
(error "Illegal tab character in meta-arg argument line."))
|
||||
|
||||
(else (lp (cons ((cond ((char=? c #\\)
|
||||
(read-char port)
|
||||
read-backslash-sequence)
|
||||
(else read-char))
|
||||
port)
|
||||
chars)))))))
|
||||
|
||||
(else (lp (cons (read-char port) chars)))))))
|
||||
|
||||
(define (read-backslash-sequence port)
|
||||
(let ((c1 (read-char port))
|
||||
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg"))))
|
||||
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line"))))
|
||||
(cond ((eof-object? c1) (eof-lose))
|
||||
|
||||
;; This would be better handled by a char-map abstraction.
|
||||
((char=? c1 #\n) #\newline)
|
||||
((char=? c1 #\r) carriage-return)
|
||||
((char=? c1 #\t) tab)
|
||||
((char=? c1 #\b) backspace)
|
||||
;; ...whatever. Look up complete table.
|
||||
|
||||
((char=? c1 ascii/nul)
|
||||
(error "Cannot backslash nul byte in meta-arg"))
|
||||
;; \, space, tab, newline.
|
||||
((char-set-contains? char-set:simple-knockdown c1) c1)
|
||||
|
||||
((char-set-contains? char-set:octal-digits c1)
|
||||
(let ((c2 (read-char port)))
|
||||
|
@ -117,13 +118,17 @@
|
|||
(* 8 (+ (octet->int c2)
|
||||
(* 8 (octet->int c1)))))))))))
|
||||
|
||||
|
||||
(else c1))))
|
||||
|
||||
(else (error "Illegal \\ escape sequence in meta-arg argument line."
|
||||
c1)))))
|
||||
|
||||
(define (octet->int c) (- (char->ascii c) (char->ascii #\0)))
|
||||
|
||||
(define ascii/nul (ascii->char 0))
|
||||
|
||||
(define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))
|
||||
|
||||
(define char-set:meta-arg-separators (string->char-set " \t"))
|
||||
(define char-set:simple-knockdown (string->char-set "\\ \n\t"))
|
||||
|
||||
;;; Yechh.
|
||||
(define tab (ascii->char 9))
|
||||
(define carriage-return (ascii->char 13))
|
||||
(define backspace (ascii->char 8))
|
||||
|
|
Loading…
Reference in New Issue