Completely redesigned meta-arg syntax.
This commit is contained in:
		
							parent
							
								
									737ebc5afe
								
							
						
					
					
						commit
						579ee12924
					
				
							
								
								
									
										152
									
								
								proc2.c
								
								
								
								
							
							
						
						
									
										152
									
								
								proc2.c
								
								
								
								
							|  | @ -55,31 +55,31 @@ | |||
| ** comment character when it begins to scan the second line. | ||||
| ** | ||||
| ** Arguments are parsed from the second line as follows: | ||||
| **   Arguments are white-space separated. The only special character is \, | ||||
| **   the knock-down character. \nnn, for three octal digits n, reads as the | ||||
| **   char whose ASCII code is nnn. \n is newline. \ followed by anything else | ||||
| **   is just that character -- including \, space, tab, and newline. It is an | ||||
| **   error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean | ||||
| **   "3Q" -- it's an error.  | ||||
| **  | ||||
| **   The argument line is terminated by newline or end-of-file. | ||||
| ** - The only special chars are space, tab, newline, and \. | ||||
| ** - Every space char terminates an argument.  | ||||
| **   Multiple spaces therefore introduce empty-string arguments. | ||||
| ** - A newline terminates the argument list, and will also terminate a | ||||
| **   non-empty argument (but a newline following a space does not introduce | ||||
| **   a final "" argument; it only terminates the argument list). | ||||
| ** - Tab is not allowed. | ||||
| **   This is to prevent you from being screwed by thinking you had several | ||||
| **   spaces where you really had a tab, and vice-versa. | ||||
| ** - The only other special character is \, the knock-down character.  | ||||
| **   \ escapes \, space, tab, and newline, turning off their special  | ||||
| **   functions. The ANSI C escape sequences, such as \n and \t are  | ||||
| **   supported; these also produce argument-constituents -- \n doesn't act  | ||||
| **   like a terminating newline. \nnn for *exactly* three octal digits reads  | ||||
| **   as the char whose ASCII code is nnn. It is an error if \ is followed by  | ||||
| **   just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always  | ||||
| **   constituent chars. \ followed by other chars is not allowed (so we can | ||||
| **   extend the escape-code space later if we like). | ||||
| ** | ||||
| **   Nul bytes & empty strings -- completeness at all costs: | ||||
| **   Not that it is very useful, but how does one get empty arguments ("") | ||||
| **   with this syntax? Well, ASCII nuls are taken to terminate arguments | ||||
| **   -- this is a fairly deeply-embedded property of UNIX. Each nul | ||||
| **   encountered on the argument line immediately terminates the current | ||||
| **   argument. So, three nuls surrounded by whitespace produces 3 empty | ||||
| **   arguments in series. This nul termination happens after \nnn processing, | ||||
| **   so you can use a line like | ||||
| **       #!/bin/interpreter \ | ||||
| **       foo \000bar \000\000baz\000 quux | ||||
| **   to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). | ||||
| **   The rule is: a run of whitespace terminates an argument, | ||||
| **   but *each* individual nul terminates an argument. | ||||
| ** You have to construct these line-2 arg lines carefully. For example, | ||||
| ** beware of trailing spaces at the end of the line. They'll give you | ||||
| ** extra trailing empty-string args. | ||||
| ** | ||||
| **   \ followed by a nul is an error (it's not possible to knock-down nul | ||||
| **   in UNIX). | ||||
| ** You should also beware of including nul bytes into your arguments, since | ||||
| ** C's pathetic excuse for a string data-type will lose if you try this. | ||||
| ** | ||||
| ** | ||||
| ** Another way to get this sort of multiple-argument functionality, with | ||||
|  | @ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size) | |||
|     return realloc(vec, len*elt_size); | ||||
|     } | ||||
| 
 | ||||
| /* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */ | ||||
| /* The do ... while(0) is a trick to make this macro accept a terminating
 | ||||
| ** semicolon. | ||||
| */ | ||||
| #define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \ | ||||
|     {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ | ||||
| 					    index, sizeof(elt_t)); \ | ||||
|      if(mgv_tmp) vec = mgv_tmp; else goto lose;} | ||||
|     do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ | ||||
| 					       index, sizeof(elt_t)); \ | ||||
| 	if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0); | ||||
| 
 | ||||
| 
 | ||||
| /* process_meta_arg(fname, av)
 | ||||
|  | @ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size) | |||
| ** argument following the \ switch, i.e., the <fname> argument. | ||||
| */ | ||||
| 
 | ||||
| static char* read_arg(FILE*, int*); | ||||
| static char* read_arg(FILE*); | ||||
| 
 | ||||
| char **process_meta_arg(char **av) | ||||
| { | ||||
|     char **argv, *arg, **ap; | ||||
|     int c; | ||||
|     FILE *script; | ||||
|     int error_code; /* So ugly. */ | ||||
|     char *fname; | ||||
|     int av_len; | ||||
|     int argv_i=0, argv_len=100; | ||||
|  | @ -188,13 +190,15 @@ char **process_meta_arg(char **av) | |||
|     argv = Malloc(char*, argv_len); | ||||
|     if( !argv ) goto lose3; | ||||
| 
 | ||||
|     while( (arg=read_arg(script, &error_code)) ) { | ||||
| 	Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1) | ||||
|     while( EOF != (c=getc(script)) && '\n' != c ) { | ||||
| 	char *arg; | ||||
| 	ungetc(c,script); | ||||
| 	arg = read_arg(script); | ||||
| 	if( !arg ) goto lose2; | ||||
| 	Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1); | ||||
| 	argv[argv_i++] = arg; | ||||
| 	} | ||||
| 
 | ||||
|     if( error_code ) goto lose2; | ||||
| 
 | ||||
|     for(av_len=0; av[av_len]; av_len++);	/* Compute length of av. */ | ||||
| 
 | ||||
|     /* Precisely re-size argv. */ | ||||
|  | @ -218,64 +222,77 @@ char **process_meta_arg(char **av) | |||
|     return NULL; | ||||
|     } | ||||
| 
 | ||||
| static char *read_arg(FILE *f, int *status_ptr) | ||||
| /* Read in one arg and it's terminating space.
 | ||||
| ** If arg is terminated by a newline, leave the newline in | ||||
| ** the stream so the outer loop can see it. Return a newly-allocated | ||||
| ** string containing the arg; NULL if there's an error. | ||||
| */ | ||||
| static char *read_arg(FILE *f) | ||||
| { | ||||
|     char *buf, *tmp; | ||||
|     int buflen, i; | ||||
|     int c; | ||||
| 
 | ||||
|     *status_ptr = 0; | ||||
| 
 | ||||
|     /* Skip whitespace. */ | ||||
|     while( EOF != (c=getc(f)) ) | ||||
| 	if( c=='\n' ) return NULL; | ||||
| 	else if( !isspace(c) ) | ||||
| 	    {ungetc(c,f); break;} | ||||
| 	 | ||||
|     if( c == EOF ) return NULL; | ||||
| 
 | ||||
|     /* Allocate a buffer for the arg. */ | ||||
|     i = 0; | ||||
|     buflen=20; | ||||
|     if( !(buf = Malloc(char, buflen)) ) { | ||||
| 	*status_ptr = -1; | ||||
| 	return NULL; | ||||
| 	} | ||||
|     if( !(buf = Malloc(char, buflen)) ) return NULL; | ||||
| 
 | ||||
|     /* Read in the arg. */ | ||||
|     while( EOF != (c=getc(f)) && !isspace(c) ) { | ||||
|     while(1) { | ||||
| 	int c = getc(f); | ||||
| 
 | ||||
| 	if( c == EOF || c == ' ' ) break; | ||||
| 	if( c == '\n' ) {ungetc(c, f); break;} | ||||
| 
 | ||||
| 	/* Do knock-down processing. */ | ||||
| 	if( c == '\\' ) { | ||||
| 	    int c1, c2, c3; | ||||
| 	    if( EOF == (c1 = getc(f)) ) goto lose; | ||||
| 	    if( isodigit(c1) ) { | ||||
| 	    switch (c1=getc(f)) { | ||||
| 	      case EOF: | ||||
| 		goto lose; | ||||
| 
 | ||||
| 		/* \nnn octal escape. */ | ||||
| 	      case '0':		case '1': | ||||
| 	      case '2':		case '3': | ||||
| 	      case '4':		case '5': | ||||
| 	      case '6':		case '7': | ||||
| 		if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose; | ||||
| 		if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose; | ||||
| 		c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0'); | ||||
| 		break; | ||||
| 
 | ||||
| 		/* ANSI C escapes. */ | ||||
| 	      case 'n':	c='\n'; break; | ||||
| 	      case 'r':	c='\r'; break; | ||||
| 	      case 't':	c='\t'; break; | ||||
| 	      case 'b':	c='\b'; break; | ||||
| 
 | ||||
| 		/* Simple knock-down: \, space, tab, newline. */ | ||||
| 	      case '\\':	case ' ': | ||||
| 	      case '\t':	case '\n': | ||||
| 		c=c1; break; | ||||
| 
 | ||||
| 		/* Nothing else allowed. */ | ||||
| 	      default: goto lose; | ||||
| 		} | ||||
| 	    else if( c1 == 'n' ) c='\n'; | ||||
| 	    else c=c1; | ||||
| 	    } | ||||
| 
 | ||||
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose) | ||||
| 	/* No tab allowed. */ | ||||
| 	else if( c == '\t' ) goto lose; | ||||
| 
 | ||||
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose); | ||||
| 	buf[i++] = c; | ||||
| 	if( c == '\0' ) break; /* nul terminates args. */ | ||||
| 	} | ||||
| 
 | ||||
|     if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */ | ||||
| 
 | ||||
|     /* Null terminate the arg if it hasn't been done already. */ | ||||
|     if( c != '\0' ) { | ||||
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose) | ||||
| 	buf[i++] = '\0'; | ||||
| 	} | ||||
|     /* Null terminate the arg. */ | ||||
|     Maybe_Grow_Vec(buf, buflen, i, char, lose); | ||||
|     buf[i++] = '\0'; | ||||
| 
 | ||||
|     /* Precisely re-size buf and return. */ | ||||
|     if( tmp=Realloc(char,buf,i) ) return tmp; | ||||
| 
 | ||||
|   lose: | ||||
|     Free(buf); | ||||
|     *status_ptr = -1; | ||||
|     return NULL; | ||||
|     } | ||||
| 
 | ||||
|  | @ -354,11 +371,8 @@ main(int argc, char **argv) | |||
| 	} | ||||
| 
 | ||||
|   args_done: | ||||
|     if( *argv ) fputs(*argv++, stdout); | ||||
|     while( *argv ) { | ||||
| 	putchar(' '); | ||||
| 	fputs(*argv++, stdout); | ||||
| 	} | ||||
|     if( *argv ) printf("\"%s\"", *argv++); | ||||
|     while( *argv ) printf(" \"%s\"", *argv++); | ||||
|     if( !n_flag ) putchar('\n'); | ||||
|     } | ||||
| #endif /* 0 */ | ||||
|  |  | |||
|  | @ -5,32 +5,28 @@ | |||
| 
 | ||||
| ;;; Syntax of the line 2 argument line: | ||||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||||
| ;;; Arguments are white-space separated. The only special character is \, | ||||
| ;;; the knock-down character. \nnn, for three octal digits n, reads as the | ||||
| ;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else | ||||
| ;;; is just that character -- including \, space, tab, and newline. It is an | ||||
| ;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean | ||||
| ;;; "3Q" -- it's an error. A backslash-encoded char is always an argument | ||||
| ;;; constituent unless it is the nul char (\000). | ||||
| ;;;  | ||||
| ;;; The argument line is terminated by newline or end-of-file. | ||||
| ;;;  | ||||
| ;;; Nul bytes & empty strings -- completeness at all costs: | ||||
| ;;; Not that it is very useful, but how does one get empty arguments ("") | ||||
| ;;; with this syntax? Well, ASCII nuls are taken to terminate arguments | ||||
| ;;; -- this is a fairly deeply-embedded property of UNIX. Each nul | ||||
| ;;; encountered on the argument line immediately terminates the current | ||||
| ;;; argument. So, three nuls surrounded by whitespace produces 3 empty | ||||
| ;;; arguments in series. This nul termination happens after \nnn processing, | ||||
| ;;; so you can use a line like | ||||
| ;;;     #!/bin/interpreter \ | ||||
| ;;;     foo \000bar \000\000baz\000 quux | ||||
| ;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). | ||||
| ;;; The rule is: a run of whitespace terminates an argument, | ||||
| ;;; but *each* individual nul terminates an argument. | ||||
| ;;;  | ||||
| ;;; \ followed by a nul is an error (it's not possible to knock-down nul | ||||
| ;;; in UNIX). | ||||
| ;;; - The only special chars are space, tab, newline, and \. | ||||
| ;;; - Every space char terminates an argument.  | ||||
| ;;;   Multiple spaces therefore introduce empty-string arguments. | ||||
| ;;; - A newline terminates the argument list, and will also terminate a | ||||
| ;;;   non-empty argument (but a newline following a space does not introduce | ||||
| ;;;   a final "" argument; it only terminates the argument list). | ||||
| ;;; - Tab is not allowed. | ||||
| ;;;   This is to prevent you from being screwed by thinking you had several | ||||
| ;;;   spaces where you really had a tab, and vice-versa. | ||||
| ;;; - The only other special character is \, the knock-down character.  | ||||
| ;;;   \ escapes \, space, tab, and newline, turning off their special  | ||||
| ;;;   functions. The ANSI C escapes sequences, such as \n and \t are  | ||||
| ;;;   supported; these also produce argument-constituents -- \n doesn't act  | ||||
| ;;;   like a terminating newline. \nnn for *exactly* three octal digits reads  | ||||
| ;;;   as the char whose ASCII code is nnn. It is an error if \ is followed by  | ||||
| ;;;   just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always  | ||||
| ;;;   constituent chars. \ followed by other chars is not allowed (so we can | ||||
| ;;;   extend the escape-code space later if we like). | ||||
| ;;; | ||||
| ;;; You have to construct these line-2 arg lines carefully. For example, | ||||
| ;;; beware of trailing spaces at the end of the line. They'll give you | ||||
| ;;; extra trailing empty-string args. | ||||
| 
 | ||||
| ;;; (meta-arg-process-arglist args) | ||||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||||
|  | @ -69,7 +65,6 @@ | |||
| 
 | ||||
| (define (read-secondary-args port) | ||||
|   (let lp ((args '())) | ||||
|     (skip-char-set char-set:meta-arg-separators port) | ||||
|     (let ((c (peek-char port))) | ||||
|       (if (or (eof-object? c) (char=? c #\newline)) | ||||
| 	  (reverse args) | ||||
|  | @ -81,32 +76,38 @@ | |||
| (define (read-secondary-arg port) | ||||
|   (let lp ((chars '())) | ||||
|     (let ((c (peek-char port))) | ||||
|       (cond ((or (eof-object? c) | ||||
| 		 (char-set-contains? char-set:whitespace c)) | ||||
| 	     (apply string (reverse chars)))		; Leave C in stream. | ||||
| 
 | ||||
| 	    ((char=? c ascii/nul) | ||||
| 	     (read-char port)				; Consume C. | ||||
|       (cond ((or (eof-object? c) (char=? c #\newline)) | ||||
| 	     (apply string (reverse chars))) | ||||
| 
 | ||||
| 	    ((char=? c #\\) | ||||
| 	    ((char=? c #\space) | ||||
| 	     (read-char port) | ||||
| 	     (let ((c (read-backslash-sequence port))) | ||||
| 	       (if (char=? c ascii/nul) | ||||
| 		   (apply string (reverse chars)) | ||||
| 		   (lp (cons c chars))))) | ||||
| 	     (apply string (reverse chars))) | ||||
| 
 | ||||
| 	    ((char=? c tab) | ||||
| 	     (error "Illegal tab character in meta-arg argument line.")) | ||||
| 
 | ||||
| 	    (else (lp (cons ((cond ((char=? c #\\) | ||||
| 				    (read-char port) | ||||
| 				    read-backslash-sequence) | ||||
| 				   (else read-char)) | ||||
| 			     port) | ||||
| 			    chars))))))) | ||||
| 
 | ||||
| 	    (else (lp (cons (read-char port) chars))))))) | ||||
| 
 | ||||
| (define (read-backslash-sequence port) | ||||
|   (let ((c1 (read-char port)) | ||||
| 	(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg")))) | ||||
| 	(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line")))) | ||||
|     (cond ((eof-object? c1) (eof-lose)) | ||||
| 
 | ||||
| 	  ;; This would be better handled by a char-map abstraction. | ||||
| 	  ((char=? c1 #\n) #\newline) | ||||
| 	  ((char=? c1 #\r) carriage-return) | ||||
| 	  ((char=? c1 #\t) tab) | ||||
| 	  ((char=? c1 #\b) backspace) | ||||
| 	  ;; ...whatever. Look up complete table. | ||||
| 
 | ||||
| 	  ((char=? c1 ascii/nul) | ||||
| 	   (error "Cannot backslash nul byte in meta-arg")) | ||||
| 	  ;; \, space, tab, newline. | ||||
| 	  ((char-set-contains? char-set:simple-knockdown c1) c1) | ||||
| 
 | ||||
| 	  ((char-set-contains? char-set:octal-digits c1) | ||||
| 	   (let ((c2 (read-char port))) | ||||
|  | @ -117,13 +118,17 @@ | |||
| 				       (* 8 (+ (octet->int c2) | ||||
| 					       (* 8 (octet->int c1))))))))))) | ||||
| 		    | ||||
| 
 | ||||
| 	  (else c1)))) | ||||
| 	   | ||||
| 	  (else (error "Illegal \\ escape sequence in meta-arg argument line." | ||||
| 		       c1))))) | ||||
| 
 | ||||
| (define (octet->int c) (- (char->ascii c) (char->ascii #\0))) | ||||
| 
 | ||||
| (define ascii/nul (ascii->char 0)) | ||||
| 
 | ||||
| (define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)) | ||||
| 
 | ||||
| (define char-set:meta-arg-separators (string->char-set " \t")) | ||||
| (define char-set:simple-knockdown (string->char-set "\\ \n\t")) | ||||
| 
 | ||||
| ;;; Yechh. | ||||
| (define tab (ascii->char 9)) | ||||
| (define carriage-return (ascii->char 13)) | ||||
| (define backspace (ascii->char 8)) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 shivers
						shivers