diff --git a/proc2.c b/proc2.c index ea12d0d..f38b944 100644 --- a/proc2.c +++ b/proc2.c @@ -55,31 +55,31 @@ ** comment character when it begins to scan the second line. ** ** Arguments are parsed from the second line as follows: -** Arguments are white-space separated. The only special character is \, -** the knock-down character. \nnn, for three octal digits n, reads as the -** char whose ASCII code is nnn. \n is newline. \ followed by anything else -** is just that character -- including \, space, tab, and newline. It is an -** error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean -** "3Q" -- it's an error. -** -** The argument line is terminated by newline or end-of-file. +** - The only special chars are space, tab, newline, and \. +** - Every space char terminates an argument. +** Multiple spaces therefore introduce empty-string arguments. +** - A newline terminates the argument list, and will also terminate a +** non-empty argument (but a newline following a space does not introduce +** a final "" argument; it only terminates the argument list). +** - Tab is not allowed. +** This is to prevent you from being screwed by thinking you had several +** spaces where you really had a tab, and vice-versa. +** - The only other special character is \, the knock-down character. +** \ escapes \, space, tab, and newline, turning off their special +** functions. The ANSI C escape sequences, such as \n and \t are +** supported; these also produce argument-constituents -- \n doesn't act +** like a terminating newline. \nnn for *exactly* three octal digits reads +** as the char whose ASCII code is nnn. It is an error if \ is followed by +** just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always +** constituent chars. \ followed by other chars is not allowed (so we can +** extend the escape-code space later if we like). ** -** Nul bytes & empty strings -- completeness at all costs: -** Not that it is very useful, but how does one get empty arguments ("") -** with this syntax? Well, ASCII nuls are taken to terminate arguments -** -- this is a fairly deeply-embedded property of UNIX. Each nul -** encountered on the argument line immediately terminates the current -** argument. So, three nuls surrounded by whitespace produces 3 empty -** arguments in series. This nul termination happens after \nnn processing, -** so you can use a line like -** #!/bin/interpreter \ -** foo \000bar \000\000baz\000 quux -** to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). -** The rule is: a run of whitespace terminates an argument, -** but *each* individual nul terminates an argument. +** You have to construct these line-2 arg lines carefully. For example, +** beware of trailing spaces at the end of the line. They'll give you +** extra trailing empty-string args. ** -** \ followed by a nul is an error (it's not possible to knock-down nul -** in UNIX). +** You should also beware of including nul bytes into your arguments, since +** C's pathetic excuse for a string data-type will lose if you try this. ** ** ** Another way to get this sort of multiple-argument functionality, with @@ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size) return realloc(vec, len*elt_size); } -/* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */ +/* The do ... while(0) is a trick to make this macro accept a terminating +** semicolon. +*/ #define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \ - {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ - index, sizeof(elt_t)); \ - if(mgv_tmp) vec = mgv_tmp; else goto lose;} + do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ + index, sizeof(elt_t)); \ + if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0); /* process_meta_arg(fname, av) @@ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size) ** argument following the \ switch, i.e., the argument. */ -static char* read_arg(FILE*, int*); +static char* read_arg(FILE*); char **process_meta_arg(char **av) { char **argv, *arg, **ap; + int c; FILE *script; - int error_code; /* So ugly. */ char *fname; int av_len; int argv_i=0, argv_len=100; @@ -188,13 +190,15 @@ char **process_meta_arg(char **av) argv = Malloc(char*, argv_len); if( !argv ) goto lose3; - while( (arg=read_arg(script, &error_code)) ) { - Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1) + while( EOF != (c=getc(script)) && '\n' != c ) { + char *arg; + ungetc(c,script); + arg = read_arg(script); + if( !arg ) goto lose2; + Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1); argv[argv_i++] = arg; } - if( error_code ) goto lose2; - for(av_len=0; av[av_len]; av_len++); /* Compute length of av. */ /* Precisely re-size argv. */ @@ -218,64 +222,77 @@ char **process_meta_arg(char **av) return NULL; } -static char *read_arg(FILE *f, int *status_ptr) +/* Read in one arg and it's terminating space. +** If arg is terminated by a newline, leave the newline in +** the stream so the outer loop can see it. Return a newly-allocated +** string containing the arg; NULL if there's an error. +*/ +static char *read_arg(FILE *f) { char *buf, *tmp; int buflen, i; - int c; - - *status_ptr = 0; - - /* Skip whitespace. */ - while( EOF != (c=getc(f)) ) - if( c=='\n' ) return NULL; - else if( !isspace(c) ) - {ungetc(c,f); break;} - - if( c == EOF ) return NULL; /* Allocate a buffer for the arg. */ i = 0; buflen=20; - if( !(buf = Malloc(char, buflen)) ) { - *status_ptr = -1; - return NULL; - } + if( !(buf = Malloc(char, buflen)) ) return NULL; /* Read in the arg. */ - while( EOF != (c=getc(f)) && !isspace(c) ) { + while(1) { + int c = getc(f); + + if( c == EOF || c == ' ' ) break; + if( c == '\n' ) {ungetc(c, f); break;} + /* Do knock-down processing. */ if( c == '\\' ) { int c1, c2, c3; - if( EOF == (c1 = getc(f)) ) goto lose; - if( isodigit(c1) ) { + switch (c1=getc(f)) { + case EOF: + goto lose; + + /* \nnn octal escape. */ + case '0': case '1': + case '2': case '3': + case '4': case '5': + case '6': case '7': if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose; if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose; c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0'); + break; + + /* ANSI C escapes. */ + case 'n': c='\n'; break; + case 'r': c='\r'; break; + case 't': c='\t'; break; + case 'b': c='\b'; break; + + /* Simple knock-down: \, space, tab, newline. */ + case '\\': case ' ': + case '\t': case '\n': + c=c1; break; + + /* Nothing else allowed. */ + default: goto lose; } - else if( c1 == 'n' ) c='\n'; - else c=c1; } - Maybe_Grow_Vec(buf, buflen, i, char, lose) + /* No tab allowed. */ + else if( c == '\t' ) goto lose; + + Maybe_Grow_Vec(buf, buflen, i, char, lose); buf[i++] = c; - if( c == '\0' ) break; /* nul terminates args. */ } - if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */ - - /* Null terminate the arg if it hasn't been done already. */ - if( c != '\0' ) { - Maybe_Grow_Vec(buf, buflen, i, char, lose) - buf[i++] = '\0'; - } + /* Null terminate the arg. */ + Maybe_Grow_Vec(buf, buflen, i, char, lose); + buf[i++] = '\0'; /* Precisely re-size buf and return. */ if( tmp=Realloc(char,buf,i) ) return tmp; lose: Free(buf); - *status_ptr = -1; return NULL; } @@ -354,11 +371,8 @@ main(int argc, char **argv) } args_done: - if( *argv ) fputs(*argv++, stdout); - while( *argv ) { - putchar(' '); - fputs(*argv++, stdout); - } + if( *argv ) printf("\"%s\"", *argv++); + while( *argv ) printf(" \"%s\"", *argv++); if( !n_flag ) putchar('\n'); } #endif /* 0 */ diff --git a/scsh/meta-arg.scm b/scsh/meta-arg.scm index 8f701f7..7e07058 100644 --- a/scsh/meta-arg.scm +++ b/scsh/meta-arg.scm @@ -5,32 +5,28 @@ ;;; Syntax of the line 2 argument line: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Arguments are white-space separated. The only special character is \, -;;; the knock-down character. \nnn, for three octal digits n, reads as the -;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else -;;; is just that character -- including \, space, tab, and newline. It is an -;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean -;;; "3Q" -- it's an error. A backslash-encoded char is always an argument -;;; constituent unless it is the nul char (\000). -;;; -;;; The argument line is terminated by newline or end-of-file. -;;; -;;; Nul bytes & empty strings -- completeness at all costs: -;;; Not that it is very useful, but how does one get empty arguments ("") -;;; with this syntax? Well, ASCII nuls are taken to terminate arguments -;;; -- this is a fairly deeply-embedded property of UNIX. Each nul -;;; encountered on the argument line immediately terminates the current -;;; argument. So, three nuls surrounded by whitespace produces 3 empty -;;; arguments in series. This nul termination happens after \nnn processing, -;;; so you can use a line like -;;; #!/bin/interpreter \ -;;; foo \000bar \000\000baz\000 quux -;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). -;;; The rule is: a run of whitespace terminates an argument, -;;; but *each* individual nul terminates an argument. -;;; -;;; \ followed by a nul is an error (it's not possible to knock-down nul -;;; in UNIX). +;;; - The only special chars are space, tab, newline, and \. +;;; - Every space char terminates an argument. +;;; Multiple spaces therefore introduce empty-string arguments. +;;; - A newline terminates the argument list, and will also terminate a +;;; non-empty argument (but a newline following a space does not introduce +;;; a final "" argument; it only terminates the argument list). +;;; - Tab is not allowed. +;;; This is to prevent you from being screwed by thinking you had several +;;; spaces where you really had a tab, and vice-versa. +;;; - The only other special character is \, the knock-down character. +;;; \ escapes \, space, tab, and newline, turning off their special +;;; functions. The ANSI C escapes sequences, such as \n and \t are +;;; supported; these also produce argument-constituents -- \n doesn't act +;;; like a terminating newline. \nnn for *exactly* three octal digits reads +;;; as the char whose ASCII code is nnn. It is an error if \ is followed by +;;; just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always +;;; constituent chars. \ followed by other chars is not allowed (so we can +;;; extend the escape-code space later if we like). +;;; +;;; You have to construct these line-2 arg lines carefully. For example, +;;; beware of trailing spaces at the end of the line. They'll give you +;;; extra trailing empty-string args. ;;; (meta-arg-process-arglist args) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -69,7 +65,6 @@ (define (read-secondary-args port) (let lp ((args '())) - (skip-char-set char-set:meta-arg-separators port) (let ((c (peek-char port))) (if (or (eof-object? c) (char=? c #\newline)) (reverse args) @@ -81,32 +76,38 @@ (define (read-secondary-arg port) (let lp ((chars '())) (let ((c (peek-char port))) - (cond ((or (eof-object? c) - (char-set-contains? char-set:whitespace c)) - (apply string (reverse chars))) ; Leave C in stream. - - ((char=? c ascii/nul) - (read-char port) ; Consume C. + (cond ((or (eof-object? c) (char=? c #\newline)) (apply string (reverse chars))) - ((char=? c #\\) + ((char=? c #\space) (read-char port) - (let ((c (read-backslash-sequence port))) - (if (char=? c ascii/nul) - (apply string (reverse chars)) - (lp (cons c chars))))) + (apply string (reverse chars))) + + ((char=? c tab) + (error "Illegal tab character in meta-arg argument line.")) + + (else (lp (cons ((cond ((char=? c #\\) + (read-char port) + read-backslash-sequence) + (else read-char)) + port) + chars))))))) - (else (lp (cons (read-char port) chars))))))) (define (read-backslash-sequence port) (let ((c1 (read-char port)) - (eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg")))) + (eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line")))) (cond ((eof-object? c1) (eof-lose)) + ;; This would be better handled by a char-map abstraction. ((char=? c1 #\n) #\newline) + ((char=? c1 #\r) carriage-return) + ((char=? c1 #\t) tab) + ((char=? c1 #\b) backspace) + ;; ...whatever. Look up complete table. - ((char=? c1 ascii/nul) - (error "Cannot backslash nul byte in meta-arg")) + ;; \, space, tab, newline. + ((char-set-contains? char-set:simple-knockdown c1) c1) ((char-set-contains? char-set:octal-digits c1) (let ((c2 (read-char port))) @@ -117,13 +118,17 @@ (* 8 (+ (octet->int c2) (* 8 (octet->int c1))))))))))) - - (else c1)))) + + (else (error "Illegal \\ escape sequence in meta-arg argument line." + c1))))) (define (octet->int c) (- (char->ascii c) (char->ascii #\0))) -(define ascii/nul (ascii->char 0)) - (define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)) -(define char-set:meta-arg-separators (string->char-set " \t")) +(define char-set:simple-knockdown (string->char-set "\\ \n\t")) + +;;; Yechh. +(define tab (ascii->char 9)) +(define carriage-return (ascii->char 13)) +(define backspace (ascii->char 8))