Completely redesigned meta-arg syntax.

This commit is contained in:
shivers 1995-10-29 11:43:42 +00:00
parent 737ebc5afe
commit 579ee12924
2 changed files with 135 additions and 116 deletions

152
proc2.c
View File

@ -55,31 +55,31 @@
** comment character when it begins to scan the second line. ** comment character when it begins to scan the second line.
** **
** Arguments are parsed from the second line as follows: ** Arguments are parsed from the second line as follows:
** Arguments are white-space separated. The only special character is \, ** - The only special chars are space, tab, newline, and \.
** the knock-down character. \nnn, for three octal digits n, reads as the ** - Every space char terminates an argument.
** char whose ASCII code is nnn. \n is newline. \ followed by anything else ** Multiple spaces therefore introduce empty-string arguments.
** is just that character -- including \, space, tab, and newline. It is an ** - A newline terminates the argument list, and will also terminate a
** error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean ** non-empty argument (but a newline following a space does not introduce
** "3Q" -- it's an error. ** a final "" argument; it only terminates the argument list).
** ** - Tab is not allowed.
** The argument line is terminated by newline or end-of-file. ** This is to prevent you from being screwed by thinking you had several
** spaces where you really had a tab, and vice-versa.
** - The only other special character is \, the knock-down character.
** \ escapes \, space, tab, and newline, turning off their special
** functions. The ANSI C escape sequences, such as \n and \t are
** supported; these also produce argument-constituents -- \n doesn't act
** like a terminating newline. \nnn for *exactly* three octal digits reads
** as the char whose ASCII code is nnn. It is an error if \ is followed by
** just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
** constituent chars. \ followed by other chars is not allowed (so we can
** extend the escape-code space later if we like).
** **
** Nul bytes & empty strings -- completeness at all costs: ** You have to construct these line-2 arg lines carefully. For example,
** Not that it is very useful, but how does one get empty arguments ("") ** beware of trailing spaces at the end of the line. They'll give you
** with this syntax? Well, ASCII nuls are taken to terminate arguments ** extra trailing empty-string args.
** -- this is a fairly deeply-embedded property of UNIX. Each nul
** encountered on the argument line immediately terminates the current
** argument. So, three nuls surrounded by whitespace produces 3 empty
** arguments in series. This nul termination happens after \nnn processing,
** so you can use a line like
** #!/bin/interpreter \
** foo \000bar \000\000baz\000 quux
** to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
** The rule is: a run of whitespace terminates an argument,
** but *each* individual nul terminates an argument.
** **
** \ followed by a nul is an error (it's not possible to knock-down nul ** You should also beware of including nul bytes into your arguments, since
** in UNIX). ** C's pathetic excuse for a string data-type will lose if you try this.
** **
** **
** Another way to get this sort of multiple-argument functionality, with ** Another way to get this sort of multiple-argument functionality, with
@ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
return realloc(vec, len*elt_size); return realloc(vec, len*elt_size);
} }
/* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */ /* The do ... while(0) is a trick to make this macro accept a terminating
** semicolon.
*/
#define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \ #define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \
{elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
index, sizeof(elt_t)); \ index, sizeof(elt_t)); \
if(mgv_tmp) vec = mgv_tmp; else goto lose;} if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0);
/* process_meta_arg(fname, av) /* process_meta_arg(fname, av)
@ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
** argument following the \ switch, i.e., the <fname> argument. ** argument following the \ switch, i.e., the <fname> argument.
*/ */
static char* read_arg(FILE*, int*); static char* read_arg(FILE*);
char **process_meta_arg(char **av) char **process_meta_arg(char **av)
{ {
char **argv, *arg, **ap; char **argv, *arg, **ap;
int c;
FILE *script; FILE *script;
int error_code; /* So ugly. */
char *fname; char *fname;
int av_len; int av_len;
int argv_i=0, argv_len=100; int argv_i=0, argv_len=100;
@ -188,13 +190,15 @@ char **process_meta_arg(char **av)
argv = Malloc(char*, argv_len); argv = Malloc(char*, argv_len);
if( !argv ) goto lose3; if( !argv ) goto lose3;
while( (arg=read_arg(script, &error_code)) ) { while( EOF != (c=getc(script)) && '\n' != c ) {
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1) char *arg;
ungetc(c,script);
arg = read_arg(script);
if( !arg ) goto lose2;
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1);
argv[argv_i++] = arg; argv[argv_i++] = arg;
} }
if( error_code ) goto lose2;
for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */ for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */
/* Precisely re-size argv. */ /* Precisely re-size argv. */
@ -218,64 +222,77 @@ char **process_meta_arg(char **av)
return NULL; return NULL;
} }
static char *read_arg(FILE *f, int *status_ptr) /* Read in one arg and it's terminating space.
** If arg is terminated by a newline, leave the newline in
** the stream so the outer loop can see it. Return a newly-allocated
** string containing the arg; NULL if there's an error.
*/
static char *read_arg(FILE *f)
{ {
char *buf, *tmp; char *buf, *tmp;
int buflen, i; int buflen, i;
int c;
*status_ptr = 0;
/* Skip whitespace. */
while( EOF != (c=getc(f)) )
if( c=='\n' ) return NULL;
else if( !isspace(c) )
{ungetc(c,f); break;}
if( c == EOF ) return NULL;
/* Allocate a buffer for the arg. */ /* Allocate a buffer for the arg. */
i = 0; i = 0;
buflen=20; buflen=20;
if( !(buf = Malloc(char, buflen)) ) { if( !(buf = Malloc(char, buflen)) ) return NULL;
*status_ptr = -1;
return NULL;
}
/* Read in the arg. */ /* Read in the arg. */
while( EOF != (c=getc(f)) && !isspace(c) ) { while(1) {
int c = getc(f);
if( c == EOF || c == ' ' ) break;
if( c == '\n' ) {ungetc(c, f); break;}
/* Do knock-down processing. */ /* Do knock-down processing. */
if( c == '\\' ) { if( c == '\\' ) {
int c1, c2, c3; int c1, c2, c3;
if( EOF == (c1 = getc(f)) ) goto lose; switch (c1=getc(f)) {
if( isodigit(c1) ) { case EOF:
goto lose;
/* \nnn octal escape. */
case '0': case '1':
case '2': case '3':
case '4': case '5':
case '6': case '7':
if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose; if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose;
if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose; if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose;
c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0'); c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0');
break;
/* ANSI C escapes. */
case 'n': c='\n'; break;
case 'r': c='\r'; break;
case 't': c='\t'; break;
case 'b': c='\b'; break;
/* Simple knock-down: \, space, tab, newline. */
case '\\': case ' ':
case '\t': case '\n':
c=c1; break;
/* Nothing else allowed. */
default: goto lose;
} }
else if( c1 == 'n' ) c='\n';
else c=c1;
} }
Maybe_Grow_Vec(buf, buflen, i, char, lose) /* No tab allowed. */
else if( c == '\t' ) goto lose;
Maybe_Grow_Vec(buf, buflen, i, char, lose);
buf[i++] = c; buf[i++] = c;
if( c == '\0' ) break; /* nul terminates args. */
} }
if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */ /* Null terminate the arg. */
Maybe_Grow_Vec(buf, buflen, i, char, lose);
/* Null terminate the arg if it hasn't been done already. */ buf[i++] = '\0';
if( c != '\0' ) {
Maybe_Grow_Vec(buf, buflen, i, char, lose)
buf[i++] = '\0';
}
/* Precisely re-size buf and return. */ /* Precisely re-size buf and return. */
if( tmp=Realloc(char,buf,i) ) return tmp; if( tmp=Realloc(char,buf,i) ) return tmp;
lose: lose:
Free(buf); Free(buf);
*status_ptr = -1;
return NULL; return NULL;
} }
@ -354,11 +371,8 @@ main(int argc, char **argv)
} }
args_done: args_done:
if( *argv ) fputs(*argv++, stdout); if( *argv ) printf("\"%s\"", *argv++);
while( *argv ) { while( *argv ) printf(" \"%s\"", *argv++);
putchar(' ');
fputs(*argv++, stdout);
}
if( !n_flag ) putchar('\n'); if( !n_flag ) putchar('\n');
} }
#endif /* 0 */ #endif /* 0 */

View File

@ -5,32 +5,28 @@
;;; Syntax of the line 2 argument line: ;;; Syntax of the line 2 argument line:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Arguments are white-space separated. The only special character is \, ;;; - The only special chars are space, tab, newline, and \.
;;; the knock-down character. \nnn, for three octal digits n, reads as the ;;; - Every space char terminates an argument.
;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else ;;; Multiple spaces therefore introduce empty-string arguments.
;;; is just that character -- including \, space, tab, and newline. It is an ;;; - A newline terminates the argument list, and will also terminate a
;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean ;;; non-empty argument (but a newline following a space does not introduce
;;; "3Q" -- it's an error. A backslash-encoded char is always an argument ;;; a final "" argument; it only terminates the argument list).
;;; constituent unless it is the nul char (\000). ;;; - Tab is not allowed.
;;; ;;; This is to prevent you from being screwed by thinking you had several
;;; The argument line is terminated by newline or end-of-file. ;;; spaces where you really had a tab, and vice-versa.
;;; ;;; - The only other special character is \, the knock-down character.
;;; Nul bytes & empty strings -- completeness at all costs: ;;; \ escapes \, space, tab, and newline, turning off their special
;;; Not that it is very useful, but how does one get empty arguments ("") ;;; functions. The ANSI C escapes sequences, such as \n and \t are
;;; with this syntax? Well, ASCII nuls are taken to terminate arguments ;;; supported; these also produce argument-constituents -- \n doesn't act
;;; -- this is a fairly deeply-embedded property of UNIX. Each nul ;;; like a terminating newline. \nnn for *exactly* three octal digits reads
;;; encountered on the argument line immediately terminates the current ;;; as the char whose ASCII code is nnn. It is an error if \ is followed by
;;; argument. So, three nuls surrounded by whitespace produces 3 empty ;;; just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
;;; arguments in series. This nul termination happens after \nnn processing, ;;; constituent chars. \ followed by other chars is not allowed (so we can
;;; so you can use a line like ;;; extend the escape-code space later if we like).
;;; #!/bin/interpreter \ ;;;
;;; foo \000bar \000\000baz\000 quux ;;; You have to construct these line-2 arg lines carefully. For example,
;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). ;;; beware of trailing spaces at the end of the line. They'll give you
;;; The rule is: a run of whitespace terminates an argument, ;;; extra trailing empty-string args.
;;; but *each* individual nul terminates an argument.
;;;
;;; \ followed by a nul is an error (it's not possible to knock-down nul
;;; in UNIX).
;;; (meta-arg-process-arglist args) ;;; (meta-arg-process-arglist args)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -69,7 +65,6 @@
(define (read-secondary-args port) (define (read-secondary-args port)
(let lp ((args '())) (let lp ((args '()))
(skip-char-set char-set:meta-arg-separators port)
(let ((c (peek-char port))) (let ((c (peek-char port)))
(if (or (eof-object? c) (char=? c #\newline)) (if (or (eof-object? c) (char=? c #\newline))
(reverse args) (reverse args)
@ -81,32 +76,38 @@
(define (read-secondary-arg port) (define (read-secondary-arg port)
(let lp ((chars '())) (let lp ((chars '()))
(let ((c (peek-char port))) (let ((c (peek-char port)))
(cond ((or (eof-object? c) (cond ((or (eof-object? c) (char=? c #\newline))
(char-set-contains? char-set:whitespace c))
(apply string (reverse chars))) ; Leave C in stream.
((char=? c ascii/nul)
(read-char port) ; Consume C.
(apply string (reverse chars))) (apply string (reverse chars)))
((char=? c #\\) ((char=? c #\space)
(read-char port) (read-char port)
(let ((c (read-backslash-sequence port))) (apply string (reverse chars)))
(if (char=? c ascii/nul)
(apply string (reverse chars)) ((char=? c tab)
(lp (cons c chars))))) (error "Illegal tab character in meta-arg argument line."))
(else (lp (cons ((cond ((char=? c #\\)
(read-char port)
read-backslash-sequence)
(else read-char))
port)
chars)))))))
(else (lp (cons (read-char port) chars)))))))
(define (read-backslash-sequence port) (define (read-backslash-sequence port)
(let ((c1 (read-char port)) (let ((c1 (read-char port))
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg")))) (eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line"))))
(cond ((eof-object? c1) (eof-lose)) (cond ((eof-object? c1) (eof-lose))
;; This would be better handled by a char-map abstraction.
((char=? c1 #\n) #\newline) ((char=? c1 #\n) #\newline)
((char=? c1 #\r) carriage-return)
((char=? c1 #\t) tab)
((char=? c1 #\b) backspace)
;; ...whatever. Look up complete table.
((char=? c1 ascii/nul) ;; \, space, tab, newline.
(error "Cannot backslash nul byte in meta-arg")) ((char-set-contains? char-set:simple-knockdown c1) c1)
((char-set-contains? char-set:octal-digits c1) ((char-set-contains? char-set:octal-digits c1)
(let ((c2 (read-char port))) (let ((c2 (read-char port)))
@ -117,13 +118,17 @@
(* 8 (+ (octet->int c2) (* 8 (+ (octet->int c2)
(* 8 (octet->int c1))))))))))) (* 8 (octet->int c1)))))))))))
(else c1)))) (else (error "Illegal \\ escape sequence in meta-arg argument line."
c1)))))
(define (octet->int c) (- (char->ascii c) (char->ascii #\0))) (define (octet->int c) (- (char->ascii c) (char->ascii #\0)))
(define ascii/nul (ascii->char 0))
(define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)) (define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))
(define char-set:meta-arg-separators (string->char-set " \t")) (define char-set:simple-knockdown (string->char-set "\\ \n\t"))
;;; Yechh.
(define tab (ascii->char 9))
(define carriage-return (ascii->char 13))
(define backspace (ascii->char 8))