Completely redesigned meta-arg syntax.

This commit is contained in:
shivers 1995-10-29 11:43:42 +00:00
parent 737ebc5afe
commit 579ee12924
2 changed files with 135 additions and 116 deletions

148
proc2.c
View File

@ -55,31 +55,31 @@
** comment character when it begins to scan the second line.
**
** Arguments are parsed from the second line as follows:
** Arguments are white-space separated. The only special character is \,
** the knock-down character. \nnn, for three octal digits n, reads as the
** char whose ASCII code is nnn. \n is newline. \ followed by anything else
** is just that character -- including \, space, tab, and newline. It is an
** error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
** "3Q" -- it's an error.
** - The only special chars are space, tab, newline, and \.
** - Every space char terminates an argument.
** Multiple spaces therefore introduce empty-string arguments.
** - A newline terminates the argument list, and will also terminate a
** non-empty argument (but a newline following a space does not introduce
** a final "" argument; it only terminates the argument list).
** - Tab is not allowed.
** This is to prevent you from being screwed by thinking you had several
** spaces where you really had a tab, and vice-versa.
** - The only other special character is \, the knock-down character.
** \ escapes \, space, tab, and newline, turning off their special
** functions. The ANSI C escape sequences, such as \n and \t are
** supported; these also produce argument-constituents -- \n doesn't act
** like a terminating newline. \nnn for *exactly* three octal digits reads
** as the char whose ASCII code is nnn. It is an error if \ is followed by
** just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
** constituent chars. \ followed by other chars is not allowed (so we can
** extend the escape-code space later if we like).
**
** The argument line is terminated by newline or end-of-file.
** You have to construct these line-2 arg lines carefully. For example,
** beware of trailing spaces at the end of the line. They'll give you
** extra trailing empty-string args.
**
** Nul bytes & empty strings -- completeness at all costs:
** Not that it is very useful, but how does one get empty arguments ("")
** with this syntax? Well, ASCII nuls are taken to terminate arguments
** -- this is a fairly deeply-embedded property of UNIX. Each nul
** encountered on the argument line immediately terminates the current
** argument. So, three nuls surrounded by whitespace produces 3 empty
** arguments in series. This nul termination happens after \nnn processing,
** so you can use a line like
** #!/bin/interpreter \
** foo \000bar \000\000baz\000 quux
** to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
** The rule is: a run of whitespace terminates an argument,
** but *each* individual nul terminates an argument.
**
** \ followed by a nul is an error (it's not possible to knock-down nul
** in UNIX).
** You should also beware of including nul bytes into your arguments, since
** C's pathetic excuse for a string data-type will lose if you try this.
**
**
** Another way to get this sort of multiple-argument functionality, with
@ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
return realloc(vec, len*elt_size);
}
/* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */
/* The do ... while(0) is a trick to make this macro accept a terminating
** semicolon.
*/
#define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \
{elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
index, sizeof(elt_t)); \
if(mgv_tmp) vec = mgv_tmp; else goto lose;}
if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0);
/* process_meta_arg(fname, av)
@ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
** argument following the \ switch, i.e., the <fname> argument.
*/
static char* read_arg(FILE*, int*);
static char* read_arg(FILE*);
char **process_meta_arg(char **av)
{
char **argv, *arg, **ap;
int c;
FILE *script;
int error_code; /* So ugly. */
char *fname;
int av_len;
int argv_i=0, argv_len=100;
@ -188,13 +190,15 @@ char **process_meta_arg(char **av)
argv = Malloc(char*, argv_len);
if( !argv ) goto lose3;
while( (arg=read_arg(script, &error_code)) ) {
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1)
while( EOF != (c=getc(script)) && '\n' != c ) {
char *arg;
ungetc(c,script);
arg = read_arg(script);
if( !arg ) goto lose2;
Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1);
argv[argv_i++] = arg;
}
if( error_code ) goto lose2;
for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */
/* Precisely re-size argv. */
@ -218,64 +222,77 @@ char **process_meta_arg(char **av)
return NULL;
}
static char *read_arg(FILE *f, int *status_ptr)
/* Read in one arg and it's terminating space.
** If arg is terminated by a newline, leave the newline in
** the stream so the outer loop can see it. Return a newly-allocated
** string containing the arg; NULL if there's an error.
*/
static char *read_arg(FILE *f)
{
char *buf, *tmp;
int buflen, i;
int c;
*status_ptr = 0;
/* Skip whitespace. */
while( EOF != (c=getc(f)) )
if( c=='\n' ) return NULL;
else if( !isspace(c) )
{ungetc(c,f); break;}
if( c == EOF ) return NULL;
/* Allocate a buffer for the arg. */
i = 0;
buflen=20;
if( !(buf = Malloc(char, buflen)) ) {
*status_ptr = -1;
return NULL;
}
if( !(buf = Malloc(char, buflen)) ) return NULL;
/* Read in the arg. */
while( EOF != (c=getc(f)) && !isspace(c) ) {
while(1) {
int c = getc(f);
if( c == EOF || c == ' ' ) break;
if( c == '\n' ) {ungetc(c, f); break;}
/* Do knock-down processing. */
if( c == '\\' ) {
int c1, c2, c3;
if( EOF == (c1 = getc(f)) ) goto lose;
if( isodigit(c1) ) {
switch (c1=getc(f)) {
case EOF:
goto lose;
/* \nnn octal escape. */
case '0': case '1':
case '2': case '3':
case '4': case '5':
case '6': case '7':
if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose;
if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose;
c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0');
break;
/* ANSI C escapes. */
case 'n': c='\n'; break;
case 'r': c='\r'; break;
case 't': c='\t'; break;
case 'b': c='\b'; break;
/* Simple knock-down: \, space, tab, newline. */
case '\\': case ' ':
case '\t': case '\n':
c=c1; break;
/* Nothing else allowed. */
default: goto lose;
}
else if( c1 == 'n' ) c='\n';
else c=c1;
}
Maybe_Grow_Vec(buf, buflen, i, char, lose)
/* No tab allowed. */
else if( c == '\t' ) goto lose;
Maybe_Grow_Vec(buf, buflen, i, char, lose);
buf[i++] = c;
if( c == '\0' ) break; /* nul terminates args. */
}
if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */
/* Null terminate the arg if it hasn't been done already. */
if( c != '\0' ) {
Maybe_Grow_Vec(buf, buflen, i, char, lose)
/* Null terminate the arg. */
Maybe_Grow_Vec(buf, buflen, i, char, lose);
buf[i++] = '\0';
}
/* Precisely re-size buf and return. */
if( tmp=Realloc(char,buf,i) ) return tmp;
lose:
Free(buf);
*status_ptr = -1;
return NULL;
}
@ -354,11 +371,8 @@ main(int argc, char **argv)
}
args_done:
if( *argv ) fputs(*argv++, stdout);
while( *argv ) {
putchar(' ');
fputs(*argv++, stdout);
}
if( *argv ) printf("\"%s\"", *argv++);
while( *argv ) printf(" \"%s\"", *argv++);
if( !n_flag ) putchar('\n');
}
#endif /* 0 */

View File

@ -5,32 +5,28 @@
;;; Syntax of the line 2 argument line:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Arguments are white-space separated. The only special character is \,
;;; the knock-down character. \nnn, for three octal digits n, reads as the
;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else
;;; is just that character -- including \, space, tab, and newline. It is an
;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
;;; "3Q" -- it's an error. A backslash-encoded char is always an argument
;;; constituent unless it is the nul char (\000).
;;; - The only special chars are space, tab, newline, and \.
;;; - Every space char terminates an argument.
;;; Multiple spaces therefore introduce empty-string arguments.
;;; - A newline terminates the argument list, and will also terminate a
;;; non-empty argument (but a newline following a space does not introduce
;;; a final "" argument; it only terminates the argument list).
;;; - Tab is not allowed.
;;; This is to prevent you from being screwed by thinking you had several
;;; spaces where you really had a tab, and vice-versa.
;;; - The only other special character is \, the knock-down character.
;;; \ escapes \, space, tab, and newline, turning off their special
;;; functions. The ANSI C escapes sequences, such as \n and \t are
;;; supported; these also produce argument-constituents -- \n doesn't act
;;; like a terminating newline. \nnn for *exactly* three octal digits reads
;;; as the char whose ASCII code is nnn. It is an error if \ is followed by
;;; just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always
;;; constituent chars. \ followed by other chars is not allowed (so we can
;;; extend the escape-code space later if we like).
;;;
;;; The argument line is terminated by newline or end-of-file.
;;;
;;; Nul bytes & empty strings -- completeness at all costs:
;;; Not that it is very useful, but how does one get empty arguments ("")
;;; with this syntax? Well, ASCII nuls are taken to terminate arguments
;;; -- this is a fairly deeply-embedded property of UNIX. Each nul
;;; encountered on the argument line immediately terminates the current
;;; argument. So, three nuls surrounded by whitespace produces 3 empty
;;; arguments in series. This nul termination happens after \nnn processing,
;;; so you can use a line like
;;; #!/bin/interpreter \
;;; foo \000bar \000\000baz\000 quux
;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
;;; The rule is: a run of whitespace terminates an argument,
;;; but *each* individual nul terminates an argument.
;;;
;;; \ followed by a nul is an error (it's not possible to knock-down nul
;;; in UNIX).
;;; You have to construct these line-2 arg lines carefully. For example,
;;; beware of trailing spaces at the end of the line. They'll give you
;;; extra trailing empty-string args.
;;; (meta-arg-process-arglist args)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -69,7 +65,6 @@
(define (read-secondary-args port)
(let lp ((args '()))
(skip-char-set char-set:meta-arg-separators port)
(let ((c (peek-char port)))
(if (or (eof-object? c) (char=? c #\newline))
(reverse args)
@ -81,32 +76,38 @@
(define (read-secondary-arg port)
(let lp ((chars '()))
(let ((c (peek-char port)))
(cond ((or (eof-object? c)
(char-set-contains? char-set:whitespace c))
(apply string (reverse chars))) ; Leave C in stream.
((char=? c ascii/nul)
(read-char port) ; Consume C.
(cond ((or (eof-object? c) (char=? c #\newline))
(apply string (reverse chars)))
((char=? c #\\)
((char=? c #\space)
(read-char port)
(let ((c (read-backslash-sequence port)))
(if (char=? c ascii/nul)
(apply string (reverse chars))
(lp (cons c chars)))))
(apply string (reverse chars)))
((char=? c tab)
(error "Illegal tab character in meta-arg argument line."))
(else (lp (cons ((cond ((char=? c #\\)
(read-char port)
read-backslash-sequence)
(else read-char))
port)
chars)))))))
(else (lp (cons (read-char port) chars)))))))
(define (read-backslash-sequence port)
(let ((c1 (read-char port))
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg"))))
(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line"))))
(cond ((eof-object? c1) (eof-lose))
;; This would be better handled by a char-map abstraction.
((char=? c1 #\n) #\newline)
((char=? c1 #\r) carriage-return)
((char=? c1 #\t) tab)
((char=? c1 #\b) backspace)
;; ...whatever. Look up complete table.
((char=? c1 ascii/nul)
(error "Cannot backslash nul byte in meta-arg"))
;; \, space, tab, newline.
((char-set-contains? char-set:simple-knockdown c1) c1)
((char-set-contains? char-set:octal-digits c1)
(let ((c2 (read-char port)))
@ -118,12 +119,16 @@
(* 8 (octet->int c1)))))))))))
(else c1))))
(else (error "Illegal \\ escape sequence in meta-arg argument line."
c1)))))
(define (octet->int c) (- (char->ascii c) (char->ascii #\0)))
(define ascii/nul (ascii->char 0))
(define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))
(define char-set:meta-arg-separators (string->char-set " \t"))
(define char-set:simple-knockdown (string->char-set "\\ \n\t"))
;;; Yechh.
(define tab (ascii->char 9))
(define carriage-return (ascii->char 13))
(define backspace (ascii->char 8))