Completely redesigned meta-arg syntax.
This commit is contained in:
		
							parent
							
								
									737ebc5afe
								
							
						
					
					
						commit
						579ee12924
					
				
							
								
								
									
										152
									
								
								proc2.c
								
								
								
								
							
							
						
						
									
										152
									
								
								proc2.c
								
								
								
								
							|  | @ -55,31 +55,31 @@ | ||||||
| ** comment character when it begins to scan the second line. | ** comment character when it begins to scan the second line. | ||||||
| ** | ** | ||||||
| ** Arguments are parsed from the second line as follows: | ** Arguments are parsed from the second line as follows: | ||||||
| **   Arguments are white-space separated. The only special character is \, | ** - The only special chars are space, tab, newline, and \. | ||||||
| **   the knock-down character. \nnn, for three octal digits n, reads as the | ** - Every space char terminates an argument.  | ||||||
| **   char whose ASCII code is nnn. \n is newline. \ followed by anything else | **   Multiple spaces therefore introduce empty-string arguments. | ||||||
| **   is just that character -- including \, space, tab, and newline. It is an | ** - A newline terminates the argument list, and will also terminate a | ||||||
| **   error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean | **   non-empty argument (but a newline following a space does not introduce | ||||||
| **   "3Q" -- it's an error.  | **   a final "" argument; it only terminates the argument list). | ||||||
| **  | ** - Tab is not allowed. | ||||||
| **   The argument line is terminated by newline or end-of-file. | **   This is to prevent you from being screwed by thinking you had several | ||||||
|  | **   spaces where you really had a tab, and vice-versa. | ||||||
|  | ** - The only other special character is \, the knock-down character.  | ||||||
|  | **   \ escapes \, space, tab, and newline, turning off their special  | ||||||
|  | **   functions. The ANSI C escape sequences, such as \n and \t are  | ||||||
|  | **   supported; these also produce argument-constituents -- \n doesn't act  | ||||||
|  | **   like a terminating newline. \nnn for *exactly* three octal digits reads  | ||||||
|  | **   as the char whose ASCII code is nnn. It is an error if \ is followed by  | ||||||
|  | **   just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always  | ||||||
|  | **   constituent chars. \ followed by other chars is not allowed (so we can | ||||||
|  | **   extend the escape-code space later if we like). | ||||||
| ** | ** | ||||||
| **   Nul bytes & empty strings -- completeness at all costs: | ** You have to construct these line-2 arg lines carefully. For example, | ||||||
| **   Not that it is very useful, but how does one get empty arguments ("") | ** beware of trailing spaces at the end of the line. They'll give you | ||||||
| **   with this syntax? Well, ASCII nuls are taken to terminate arguments | ** extra trailing empty-string args. | ||||||
| **   -- this is a fairly deeply-embedded property of UNIX. Each nul |  | ||||||
| **   encountered on the argument line immediately terminates the current |  | ||||||
| **   argument. So, three nuls surrounded by whitespace produces 3 empty |  | ||||||
| **   arguments in series. This nul termination happens after \nnn processing, |  | ||||||
| **   so you can use a line like |  | ||||||
| **       #!/bin/interpreter \ |  | ||||||
| **       foo \000bar \000\000baz\000 quux |  | ||||||
| **   to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). |  | ||||||
| **   The rule is: a run of whitespace terminates an argument, |  | ||||||
| **   but *each* individual nul terminates an argument. |  | ||||||
| ** | ** | ||||||
| **   \ followed by a nul is an error (it's not possible to knock-down nul | ** You should also beware of including nul bytes into your arguments, since | ||||||
| **   in UNIX). | ** C's pathetic excuse for a string data-type will lose if you try this. | ||||||
| ** | ** | ||||||
| ** | ** | ||||||
| ** Another way to get this sort of multiple-argument functionality, with | ** Another way to get this sort of multiple-argument functionality, with | ||||||
|  | @ -148,11 +148,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size) | ||||||
|     return realloc(vec, len*elt_size); |     return realloc(vec, len*elt_size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| /* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */ | /* The do ... while(0) is a trick to make this macro accept a terminating
 | ||||||
|  | ** semicolon. | ||||||
|  | */ | ||||||
| #define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \ | #define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \ | ||||||
|     {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ |     do {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \ | ||||||
| 					    index, sizeof(elt_t)); \ | 					       index, sizeof(elt_t)); \ | ||||||
|      if(mgv_tmp) vec = mgv_tmp; else goto lose;} | 	if(mgv_tmp) vec = mgv_tmp; else goto lose;} while (0); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /* process_meta_arg(fname, av)
 | /* process_meta_arg(fname, av)
 | ||||||
|  | @ -165,13 +167,13 @@ static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size) | ||||||
| ** argument following the \ switch, i.e., the <fname> argument. | ** argument following the \ switch, i.e., the <fname> argument. | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static char* read_arg(FILE*, int*); | static char* read_arg(FILE*); | ||||||
| 
 | 
 | ||||||
| char **process_meta_arg(char **av) | char **process_meta_arg(char **av) | ||||||
| { | { | ||||||
|     char **argv, *arg, **ap; |     char **argv, *arg, **ap; | ||||||
|  |     int c; | ||||||
|     FILE *script; |     FILE *script; | ||||||
|     int error_code; /* So ugly. */ |  | ||||||
|     char *fname; |     char *fname; | ||||||
|     int av_len; |     int av_len; | ||||||
|     int argv_i=0, argv_len=100; |     int argv_i=0, argv_len=100; | ||||||
|  | @ -188,13 +190,15 @@ char **process_meta_arg(char **av) | ||||||
|     argv = Malloc(char*, argv_len); |     argv = Malloc(char*, argv_len); | ||||||
|     if( !argv ) goto lose3; |     if( !argv ) goto lose3; | ||||||
| 
 | 
 | ||||||
|     while( (arg=read_arg(script, &error_code)) ) { |     while( EOF != (c=getc(script)) && '\n' != c ) { | ||||||
| 	Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1) | 	char *arg; | ||||||
|  | 	ungetc(c,script); | ||||||
|  | 	arg = read_arg(script); | ||||||
|  | 	if( !arg ) goto lose2; | ||||||
|  | 	Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1); | ||||||
| 	argv[argv_i++] = arg; | 	argv[argv_i++] = arg; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|     if( error_code ) goto lose2; |  | ||||||
| 
 |  | ||||||
|     for(av_len=0; av[av_len]; av_len++);	/* Compute length of av. */ |     for(av_len=0; av[av_len]; av_len++);	/* Compute length of av. */ | ||||||
| 
 | 
 | ||||||
|     /* Precisely re-size argv. */ |     /* Precisely re-size argv. */ | ||||||
|  | @ -218,64 +222,77 @@ char **process_meta_arg(char **av) | ||||||
|     return NULL; |     return NULL; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| static char *read_arg(FILE *f, int *status_ptr) | /* Read in one arg and it's terminating space.
 | ||||||
|  | ** If arg is terminated by a newline, leave the newline in | ||||||
|  | ** the stream so the outer loop can see it. Return a newly-allocated | ||||||
|  | ** string containing the arg; NULL if there's an error. | ||||||
|  | */ | ||||||
|  | static char *read_arg(FILE *f) | ||||||
| { | { | ||||||
|     char *buf, *tmp; |     char *buf, *tmp; | ||||||
|     int buflen, i; |     int buflen, i; | ||||||
|     int c; |  | ||||||
| 
 |  | ||||||
|     *status_ptr = 0; |  | ||||||
| 
 |  | ||||||
|     /* Skip whitespace. */ |  | ||||||
|     while( EOF != (c=getc(f)) ) |  | ||||||
| 	if( c=='\n' ) return NULL; |  | ||||||
| 	else if( !isspace(c) ) |  | ||||||
| 	    {ungetc(c,f); break;} |  | ||||||
| 	 |  | ||||||
|     if( c == EOF ) return NULL; |  | ||||||
| 
 | 
 | ||||||
|     /* Allocate a buffer for the arg. */ |     /* Allocate a buffer for the arg. */ | ||||||
|     i = 0; |     i = 0; | ||||||
|     buflen=20; |     buflen=20; | ||||||
|     if( !(buf = Malloc(char, buflen)) ) { |     if( !(buf = Malloc(char, buflen)) ) return NULL; | ||||||
| 	*status_ptr = -1; |  | ||||||
| 	return NULL; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
|     /* Read in the arg. */ |     /* Read in the arg. */ | ||||||
|     while( EOF != (c=getc(f)) && !isspace(c) ) { |     while(1) { | ||||||
|  | 	int c = getc(f); | ||||||
|  | 
 | ||||||
|  | 	if( c == EOF || c == ' ' ) break; | ||||||
|  | 	if( c == '\n' ) {ungetc(c, f); break;} | ||||||
|  | 
 | ||||||
| 	/* Do knock-down processing. */ | 	/* Do knock-down processing. */ | ||||||
| 	if( c == '\\' ) { | 	if( c == '\\' ) { | ||||||
| 	    int c1, c2, c3; | 	    int c1, c2, c3; | ||||||
| 	    if( EOF == (c1 = getc(f)) ) goto lose; | 	    switch (c1=getc(f)) { | ||||||
| 	    if( isodigit(c1) ) { | 	      case EOF: | ||||||
|  | 		goto lose; | ||||||
|  | 
 | ||||||
|  | 		/* \nnn octal escape. */ | ||||||
|  | 	      case '0':		case '1': | ||||||
|  | 	      case '2':		case '3': | ||||||
|  | 	      case '4':		case '5': | ||||||
|  | 	      case '6':		case '7': | ||||||
| 		if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose; | 		if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose; | ||||||
| 		if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose; | 		if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose; | ||||||
| 		c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0'); | 		c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0'); | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 		/* ANSI C escapes. */ | ||||||
|  | 	      case 'n':	c='\n'; break; | ||||||
|  | 	      case 'r':	c='\r'; break; | ||||||
|  | 	      case 't':	c='\t'; break; | ||||||
|  | 	      case 'b':	c='\b'; break; | ||||||
|  | 
 | ||||||
|  | 		/* Simple knock-down: \, space, tab, newline. */ | ||||||
|  | 	      case '\\':	case ' ': | ||||||
|  | 	      case '\t':	case '\n': | ||||||
|  | 		c=c1; break; | ||||||
|  | 
 | ||||||
|  | 		/* Nothing else allowed. */ | ||||||
|  | 	      default: goto lose; | ||||||
| 		} | 		} | ||||||
| 	    else if( c1 == 'n' ) c='\n'; |  | ||||||
| 	    else c=c1; |  | ||||||
| 	    } | 	    } | ||||||
| 
 | 
 | ||||||
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose) | 	/* No tab allowed. */ | ||||||
|  | 	else if( c == '\t' ) goto lose; | ||||||
|  | 
 | ||||||
|  | 	Maybe_Grow_Vec(buf, buflen, i, char, lose); | ||||||
| 	buf[i++] = c; | 	buf[i++] = c; | ||||||
| 	if( c == '\0' ) break; /* nul terminates args. */ |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|     if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */ |     /* Null terminate the arg. */ | ||||||
| 
 |     Maybe_Grow_Vec(buf, buflen, i, char, lose); | ||||||
|     /* Null terminate the arg if it hasn't been done already. */ |     buf[i++] = '\0'; | ||||||
|     if( c != '\0' ) { |  | ||||||
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose) |  | ||||||
| 	buf[i++] = '\0'; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
|     /* Precisely re-size buf and return. */ |     /* Precisely re-size buf and return. */ | ||||||
|     if( tmp=Realloc(char,buf,i) ) return tmp; |     if( tmp=Realloc(char,buf,i) ) return tmp; | ||||||
| 
 | 
 | ||||||
|   lose: |   lose: | ||||||
|     Free(buf); |     Free(buf); | ||||||
|     *status_ptr = -1; |  | ||||||
|     return NULL; |     return NULL; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -354,11 +371,8 @@ main(int argc, char **argv) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|   args_done: |   args_done: | ||||||
|     if( *argv ) fputs(*argv++, stdout); |     if( *argv ) printf("\"%s\"", *argv++); | ||||||
|     while( *argv ) { |     while( *argv ) printf(" \"%s\"", *argv++); | ||||||
| 	putchar(' '); |  | ||||||
| 	fputs(*argv++, stdout); |  | ||||||
| 	} |  | ||||||
|     if( !n_flag ) putchar('\n'); |     if( !n_flag ) putchar('\n'); | ||||||
|     } |     } | ||||||
| #endif /* 0 */ | #endif /* 0 */ | ||||||
|  |  | ||||||
|  | @ -5,32 +5,28 @@ | ||||||
| 
 | 
 | ||||||
| ;;; Syntax of the line 2 argument line: | ;;; Syntax of the line 2 argument line: | ||||||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||||||
| ;;; Arguments are white-space separated. The only special character is \, | ;;; - The only special chars are space, tab, newline, and \. | ||||||
| ;;; the knock-down character. \nnn, for three octal digits n, reads as the | ;;; - Every space char terminates an argument.  | ||||||
| ;;; char whose ASCII code is nnn. \n is newline. \ followed by anything else | ;;;   Multiple spaces therefore introduce empty-string arguments. | ||||||
| ;;; is just that character -- including \, space, tab, and newline. It is an | ;;; - A newline terminates the argument list, and will also terminate a | ||||||
| ;;; error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean | ;;;   non-empty argument (but a newline following a space does not introduce | ||||||
| ;;; "3Q" -- it's an error. A backslash-encoded char is always an argument | ;;;   a final "" argument; it only terminates the argument list). | ||||||
| ;;; constituent unless it is the nul char (\000). | ;;; - Tab is not allowed. | ||||||
| ;;;  | ;;;   This is to prevent you from being screwed by thinking you had several | ||||||
| ;;; The argument line is terminated by newline or end-of-file. | ;;;   spaces where you really had a tab, and vice-versa. | ||||||
| ;;;  | ;;; - The only other special character is \, the knock-down character.  | ||||||
| ;;; Nul bytes & empty strings -- completeness at all costs: | ;;;   \ escapes \, space, tab, and newline, turning off their special  | ||||||
| ;;; Not that it is very useful, but how does one get empty arguments ("") | ;;;   functions. The ANSI C escapes sequences, such as \n and \t are  | ||||||
| ;;; with this syntax? Well, ASCII nuls are taken to terminate arguments | ;;;   supported; these also produce argument-constituents -- \n doesn't act  | ||||||
| ;;; -- this is a fairly deeply-embedded property of UNIX. Each nul | ;;;   like a terminating newline. \nnn for *exactly* three octal digits reads  | ||||||
| ;;; encountered on the argument line immediately terminates the current | ;;;   as the char whose ASCII code is nnn. It is an error if \ is followed by  | ||||||
| ;;; argument. So, three nuls surrounded by whitespace produces 3 empty | ;;;   just 1 or 2 octal digits: \3Q is an error. Octal-escapes are always  | ||||||
| ;;; arguments in series. This nul termination happens after \nnn processing, | ;;;   constituent chars. \ followed by other chars is not allowed (so we can | ||||||
| ;;; so you can use a line like | ;;;   extend the escape-code space later if we like). | ||||||
| ;;;     #!/bin/interpreter \ | ;;; | ||||||
| ;;;     foo \000bar \000\000baz\000 quux | ;;; You have to construct these line-2 arg lines carefully. For example, | ||||||
| ;;; to generate the arg list ("foo" "" "bar" "" "" "baz" "quux"). | ;;; beware of trailing spaces at the end of the line. They'll give you | ||||||
| ;;; The rule is: a run of whitespace terminates an argument, | ;;; extra trailing empty-string args. | ||||||
| ;;; but *each* individual nul terminates an argument. |  | ||||||
| ;;;  |  | ||||||
| ;;; \ followed by a nul is an error (it's not possible to knock-down nul |  | ||||||
| ;;; in UNIX). |  | ||||||
| 
 | 
 | ||||||
| ;;; (meta-arg-process-arglist args) | ;;; (meta-arg-process-arglist args) | ||||||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||||||
|  | @ -69,7 +65,6 @@ | ||||||
| 
 | 
 | ||||||
| (define (read-secondary-args port) | (define (read-secondary-args port) | ||||||
|   (let lp ((args '())) |   (let lp ((args '())) | ||||||
|     (skip-char-set char-set:meta-arg-separators port) |  | ||||||
|     (let ((c (peek-char port))) |     (let ((c (peek-char port))) | ||||||
|       (if (or (eof-object? c) (char=? c #\newline)) |       (if (or (eof-object? c) (char=? c #\newline)) | ||||||
| 	  (reverse args) | 	  (reverse args) | ||||||
|  | @ -81,32 +76,38 @@ | ||||||
| (define (read-secondary-arg port) | (define (read-secondary-arg port) | ||||||
|   (let lp ((chars '())) |   (let lp ((chars '())) | ||||||
|     (let ((c (peek-char port))) |     (let ((c (peek-char port))) | ||||||
|       (cond ((or (eof-object? c) |       (cond ((or (eof-object? c) (char=? c #\newline)) | ||||||
| 		 (char-set-contains? char-set:whitespace c)) |  | ||||||
| 	     (apply string (reverse chars)))		; Leave C in stream. |  | ||||||
| 
 |  | ||||||
| 	    ((char=? c ascii/nul) |  | ||||||
| 	     (read-char port)				; Consume C. |  | ||||||
| 	     (apply string (reverse chars))) | 	     (apply string (reverse chars))) | ||||||
| 
 | 
 | ||||||
| 	    ((char=? c #\\) | 	    ((char=? c #\space) | ||||||
| 	     (read-char port) | 	     (read-char port) | ||||||
| 	     (let ((c (read-backslash-sequence port))) | 	     (apply string (reverse chars))) | ||||||
| 	       (if (char=? c ascii/nul) | 
 | ||||||
| 		   (apply string (reverse chars)) | 	    ((char=? c tab) | ||||||
| 		   (lp (cons c chars))))) | 	     (error "Illegal tab character in meta-arg argument line.")) | ||||||
|  | 
 | ||||||
|  | 	    (else (lp (cons ((cond ((char=? c #\\) | ||||||
|  | 				    (read-char port) | ||||||
|  | 				    read-backslash-sequence) | ||||||
|  | 				   (else read-char)) | ||||||
|  | 			     port) | ||||||
|  | 			    chars))))))) | ||||||
| 
 | 
 | ||||||
| 	    (else (lp (cons (read-char port) chars))))))) |  | ||||||
| 
 | 
 | ||||||
| (define (read-backslash-sequence port) | (define (read-backslash-sequence port) | ||||||
|   (let ((c1 (read-char port)) |   (let ((c1 (read-char port)) | ||||||
| 	(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg")))) | 	(eof-lose (lambda () (error "Premature EOF within backslash-sequence in meta-arg argument line")))) | ||||||
|     (cond ((eof-object? c1) (eof-lose)) |     (cond ((eof-object? c1) (eof-lose)) | ||||||
| 
 | 
 | ||||||
|  | 	  ;; This would be better handled by a char-map abstraction. | ||||||
| 	  ((char=? c1 #\n) #\newline) | 	  ((char=? c1 #\n) #\newline) | ||||||
|  | 	  ((char=? c1 #\r) carriage-return) | ||||||
|  | 	  ((char=? c1 #\t) tab) | ||||||
|  | 	  ((char=? c1 #\b) backspace) | ||||||
|  | 	  ;; ...whatever. Look up complete table. | ||||||
| 
 | 
 | ||||||
| 	  ((char=? c1 ascii/nul) | 	  ;; \, space, tab, newline. | ||||||
| 	   (error "Cannot backslash nul byte in meta-arg")) | 	  ((char-set-contains? char-set:simple-knockdown c1) c1) | ||||||
| 
 | 
 | ||||||
| 	  ((char-set-contains? char-set:octal-digits c1) | 	  ((char-set-contains? char-set:octal-digits c1) | ||||||
| 	   (let ((c2 (read-char port))) | 	   (let ((c2 (read-char port))) | ||||||
|  | @ -117,13 +118,17 @@ | ||||||
| 				       (* 8 (+ (octet->int c2) | 				       (* 8 (+ (octet->int c2) | ||||||
| 					       (* 8 (octet->int c1))))))))))) | 					       (* 8 (octet->int c1))))))))))) | ||||||
| 		    | 		    | ||||||
| 
 | 	   | ||||||
| 	  (else c1)))) | 	  (else (error "Illegal \\ escape sequence in meta-arg argument line." | ||||||
|  | 		       c1))))) | ||||||
| 
 | 
 | ||||||
| (define (octet->int c) (- (char->ascii c) (char->ascii #\0))) | (define (octet->int c) (- (char->ascii c) (char->ascii #\0))) | ||||||
| 
 | 
 | ||||||
| (define ascii/nul (ascii->char 0)) |  | ||||||
| 
 |  | ||||||
| (define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)) | (define char-set:octal-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)) | ||||||
| 
 | 
 | ||||||
| (define char-set:meta-arg-separators (string->char-set " \t")) | (define char-set:simple-knockdown (string->char-set "\\ \n\t")) | ||||||
|  | 
 | ||||||
|  | ;;; Yechh. | ||||||
|  | (define tab (ascii->char 9)) | ||||||
|  | (define carriage-return (ascii->char 13)) | ||||||
|  | (define backspace (ascii->char 8)) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 shivers
						shivers