365 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			365 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
| /* Copyright (c) 1993 by Olin Shivers.
 | |
| **
 | |
| ** Please imagine a long, tedious, legalistic 5-page gnu-style copyright
 | |
| ** notice appearing here to the effect that you may use this code any
 | |
| ** way you like, as long as you don't charge money for it, remove this
 | |
| ** notice, or hold me liable for its results.
 | |
| */
 | |
| 
 | |
| /* If the above copyright notice is a problem for your app, send me mail. */
 | |
| 
 | |
| /* Using the #! interpreter hack in Unix for making scripts has a big
 | |
| ** problem: you only get 1 argument after the interpreter on the #! line. 
 | |
| ** This subroutine helps to fix that.
 | |
| **
 | |
| ** Below is a procedure that will arrange for a command-line switch of the 
 | |
| ** form \ <script> to stand for reading more args from line 2 of the file
 | |
| ** <script>. Replace the \ arg with these args. Now you can have Scheme,
 | |
| ** Postscript, Forth, Lisp, Smalltalk, tcl, etc. scripts that look like:
 | |
| **
 | |
| ** File foo:
 | |
| **         #!/usr/local/bin/scheme \
 | |
| **         -heap 4000000 -batch -script
 | |
| **         !#
 | |
| **         (define foo ...) ; Scheme code from here on.
 | |
| **         ...
 | |
| **
 | |
| ** With this program definition, executing
 | |
| **     foo arg1 arg2 arg3
 | |
| ** will turn into
 | |
| **     /usr/local/bin/scheme \ foo arg1 arg2 arg3
 | |
| ** which your Scheme interpreter main() (using this routine) will expand during
 | |
| ** argv processing into:
 | |
| **     /usr/local/bin/scheme -heap 4000000 -batch -script foo arg1 arg2 arg3
 | |
| ** That is, the argument processing in main() will *replace* the \ argument
 | |
| ** with the arguments read in from line 2 of foo. So we have dodged the
 | |
| ** only-one-argument-on-the-#!-line constraint.
 | |
| **
 | |
| ** The only other thing that needs to be done in this case is arrange for the
 | |
| ** interpreter to ignore these initial few non-Scheme lines. We can arrange
 | |
| ** for this in our Scheme example by defining a Scheme read macro #! that
 | |
| ** skips characters until newline, bang, splat (somewhat like the ; read macro
 | |
| ** skips characters until newline).
 | |
| **
 | |
| ** Using backslash as the meta-argument switch is handy for two reasons:
 | |
| ** - It is only one character. Since many Unix systems limit the #!
 | |
| **   line to 32 characters total, this is important.
 | |
| ** - It is a helpful visual pun -- implying a continuation line for the
 | |
| **   arguments.
 | |
| ** It is also very unlikely to be an already-used switch. However, -2
 | |
| ** is also a reasonable choice.
 | |
| **
 | |
| ** All you have to do to get this second-line meta-argument functionality is
 | |
| ** link this file in with your interpreter.  You can tweak this routine for
 | |
| ** various interpreters if you need to have it, for example, skip an initial
 | |
| ** comment character when it begins to scan the second line.
 | |
| **
 | |
| ** Arguments are parsed from the second line as follows:
 | |
| **   Arguments are white-space separated. The only special character is \,
 | |
| **   the knock-down character. \nnn, for three octal digits n, reads as the
 | |
| **   char whose ASCII code is nnn. \n is newline. \ followed by anything else
 | |
| **   is just that character -- including \, space, tab, and newline. It is an
 | |
| **   error if \ is followed by just 1 or 2 octal digits: \3Q doesn't mean
 | |
| **   "3Q" -- it's an error. 
 | |
| ** 
 | |
| **   The argument line is terminated by newline or end-of-file.
 | |
| **
 | |
| **   Nul bytes & empty strings -- completeness at all costs:
 | |
| **   Not that it is very useful, but how does one get empty arguments ("")
 | |
| **   with this syntax? Well, ASCII nuls are taken to terminate arguments
 | |
| **   -- this is a fairly deeply-embedded property of UNIX. Each nul
 | |
| **   encountered on the argument line immediately terminates the current
 | |
| **   argument. So, three nuls surrounded by whitespace produces 3 empty
 | |
| **   arguments in series. This nul termination happens after \nnn processing,
 | |
| **   so you can use a line like
 | |
| **       #!/bin/interpreter \
 | |
| **       foo \000bar \000\000baz\000 quux
 | |
| **   to generate the arg list ("foo" "" "bar" "" "" "baz" "quux").
 | |
| **   The rule is: a run of whitespace terminates an argument,
 | |
| **   but *each* individual nul terminates an argument.
 | |
| **
 | |
| **   \ followed by a nul is an error (it's not possible to knock-down nul
 | |
| **   in UNIX).
 | |
| **
 | |
| **
 | |
| ** Another way to get this sort of multiple-argument functionality, with
 | |
| ** the extra cost of starting up a shell, is to simply have the following
 | |
| ** trampoline at the beginning of your script:
 | |
| **     #!/bin/sh -
 | |
| **     exec /usr/local/bin/scheme -heap 4000000 -batch -script $0 $*
 | |
| **     !#
 | |
| ** (or use the indir program, same rough idea). This is less appropriate
 | |
| ** for interpreters intended to replace the shell.
 | |
| **
 | |
| ** Possible extensions:
 | |
| ** - I considered making the argument line syntax hairier -- adding ~user
 | |
| **   directory expansion and $(envvar) expansion. But I didn't do it.
 | |
| **
 | |
| ** - Not much error information. If something is wrong -- file can't
 | |
| **   be read, no second line, illegal syntax on second line, malloc
 | |
| **   loses -- you just get a NULL return value. You can examine errno
 | |
| **   if the problem is a Unix error (e.g., file error). But if the call
 | |
| **   fails for another reason (e.g., bad arg syntax on the second line),
 | |
| **   then errno won't help. This code could be modified to take an additional
 | |
| **   &error_code argument, and assign an integer into the var indicating
 | |
| **   just exactly what the problem was, if that's important to your 
 | |
| **   application. 
 | |
| **
 | |
| ** This code is fairly robust, careful code. ANSI standard C. No dependencies 
 | |
| ** on fixed-size buffers. It won't blow up if the inputs are pathological.
 | |
| ** It all type-checks. No core leaks. Feel free to customise it for the
 | |
| ** particular needs of a given interpreter; the core functionality is there.
 | |
| **
 | |
| ** See the end of this file for a sample program with an arg processing loop.
 | |
| ** Please send me bug reports, fixes, and improvements.
 | |
| **
 | |
| ** Some interpreters that might use this: tcl (wish, hope), perl, Smalltalk,
 | |
| ** little Schemes (scm, elk, s48, ...), big Schemes, Postscript, emacs, 
 | |
| ** Dylan, Lisp, Prolog.
 | |
| **     -Olin Shivers 2/93
 | |
| **     shivers@cs.cmu.edu
 | |
| **     shivers@csd.hku.hk
 | |
| */
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h> /* malloc */
 | |
| #include <ctype.h>
 | |
| 
 | |
| #define Alloc(type) 	((type *) malloc(sizeof(type)))
 | |
| #define Malloc(type,n)	((type *) malloc(sizeof(type)*(n)))
 | |
| #define Realloc(type,ptr,size) \
 | |
| 	((type *) realloc((void *)ptr, sizeof(type)*(size)))
 | |
| #define Free(p)		(free((void *)(p)))
 | |
| 
 | |
| /* Is character c an octal digit? */
 | |
| #define isodigit(c) (isdigit(c) && (c) != '8' && (c) != '9')
 | |
| 
 | |
| /* Double the vector if we've overflowed it. Return the vector.
 | |
| ** If we double the vector, lenptr is updated with the new length.
 | |
| ** If we fail, return NULL.
 | |
| */
 | |
| 
 | |
| static void *maybe_grow_vec(void *vec, int *lenptr, int index, int elt_size)
 | |
| {
 | |
|     int len = *lenptr;
 | |
|     if( index < len ) return vec;
 | |
|     len *= 2;
 | |
|     *lenptr = len; /* Update the length pointer. */
 | |
|     return realloc(vec, len*elt_size);
 | |
|     }
 | |
| 
 | |
| /* This is a stmt, so no semicolon. The vec parameter better not be mgv_tmp! */
 | |
| #define Maybe_Grow_Vec(vec, size, index, elt_t, lose) \
 | |
|     {elt_t *mgv_tmp =(elt_t*)maybe_grow_vec((void*)vec, &size, \
 | |
| 					    index, sizeof(elt_t)); \
 | |
|      if(mgv_tmp) vec = mgv_tmp; else goto lose;}
 | |
| 
 | |
| 
 | |
| /* process_meta_arg(fname, av)
 | |
| ** -----------------------
 | |
| ** The main routine.
 | |
| **
 | |
| ** Expand a \ <fname> switch. Return NULL on error, otherwise a new arg
 | |
| ** vector composed of (1) the args scanned in from line 2 of fname, followed
 | |
| ** by (2) the arguments in av. The argument vector av starts with the
 | |
| ** argument following the \ switch, i.e., the <fname> argument.
 | |
| */
 | |
| 
 | |
| static char* read_arg(FILE*, int*);
 | |
| 
 | |
| char **process_meta_arg(char **av)
 | |
| {
 | |
|     char **argv, *arg, **ap;
 | |
|     FILE *script;
 | |
|     int error_code; /* So ugly. */
 | |
|     char *fname;
 | |
|     int av_len;
 | |
|     int argv_i=0, argv_len=100;
 | |
| 
 | |
|     if( !*av ) return NULL;
 | |
|     fname = *av;
 | |
|     script = fopen(fname, "r");
 | |
|     if( !script ) return NULL;
 | |
| 
 | |
|     /* Skip line 1. */
 | |
|     while( '\n' != getc(script) )
 | |
| 	if( feof(script) || ferror(script) ) goto lose3;
 | |
| 
 | |
|     argv = Malloc(char*, argv_len);
 | |
|     if( !argv ) goto lose3;
 | |
| 
 | |
|     while( (arg=read_arg(script, &error_code)) ) {
 | |
| 	Maybe_Grow_Vec(argv, argv_len, argv_i, char*, lose1)
 | |
| 	argv[argv_i++] = arg;
 | |
| 	}
 | |
| 
 | |
|     if( error_code ) goto lose2;
 | |
| 
 | |
|     for(av_len=0; av[av_len]; av_len++);	/* Compute length of av. */
 | |
| 
 | |
|     /* Precisely re-size argv. */
 | |
|     if( NULL == (ap=Realloc(char*, argv, argv_len + av_len + 1)) ) goto lose2;
 | |
|     argv = ap;
 | |
| 
 | |
|     while( argv[argv_i++] = *av++ );	/* Copy over av & null terminate. */
 | |
| 
 | |
|     fclose(script);
 | |
|     return argv;
 | |
| 
 | |
| 
 | |
|     /* Exception handlers: free storage and lose. */
 | |
|   lose1:
 | |
|     Free(arg);
 | |
|   lose2:
 | |
|     while( argv_i ) Free(argv[--argv_i]);
 | |
|     Free(argv);
 | |
|  lose3:
 | |
|     fclose(script);
 | |
|     return NULL;
 | |
|     }
 | |
| 
 | |
| static char *read_arg(FILE *f, int *status_ptr)
 | |
| {
 | |
|     char *buf, *tmp;
 | |
|     int buflen, i;
 | |
|     int c;
 | |
| 
 | |
|     *status_ptr = 0;
 | |
| 
 | |
|     /* Skip whitespace. */
 | |
|     while( EOF != (c=getc(f)) )
 | |
| 	if( c=='\n' ) return NULL;
 | |
| 	else if( !isspace(c) )
 | |
| 	    {ungetc(c,f); break;}
 | |
| 	
 | |
|     if( c == EOF ) return NULL;
 | |
| 
 | |
|     /* Allocate a buffer for the arg. */
 | |
|     i = 0;
 | |
|     buflen=20;
 | |
|     if( !(buf = Malloc(char, buflen)) ) {
 | |
| 	*status_ptr = -1;
 | |
| 	return NULL;
 | |
| 	}
 | |
| 
 | |
|     /* Read in the arg. */
 | |
|     while( EOF != (c=getc(f)) && !isspace(c) ) {
 | |
| 	/* Do knock-down processing. */
 | |
| 	if( c == '\\' ) {
 | |
| 	    int c1, c2, c3;
 | |
| 	    if( EOF == (c1 = getc(f)) ) goto lose;
 | |
| 	    if( isodigit(c1) ) {
 | |
| 		if( EOF == (c2=getc(f)) || !isodigit(c2) ) goto lose;
 | |
| 		if( EOF == (c3=getc(f)) || !isodigit(c3) ) goto lose;
 | |
| 		c = ((c1-'0')<<6) | ((c2-'0')<<3) | (c3-'0');
 | |
| 		}
 | |
| 	    else if( c1 == 'n' ) c='\n';
 | |
| 	    else c=c1;
 | |
| 	    }
 | |
| 
 | |
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose)
 | |
| 	buf[i++] = c;
 | |
| 	if( c == '\0' ) break; /* nul terminates args. */
 | |
| 	}
 | |
| 
 | |
|     if( isspace(c) ) ungetc(c,f); /* Must preserve newline for next call. */
 | |
| 
 | |
|     /* Null terminate the arg if it hasn't been done already. */
 | |
|     if( c != '\0' ) {
 | |
| 	Maybe_Grow_Vec(buf, buflen, i, char, lose)
 | |
| 	buf[i++] = '\0';
 | |
| 	}
 | |
| 
 | |
|     /* Precisely re-size buf and return. */
 | |
|     if( tmp=Realloc(char,buf,i) ) return tmp;
 | |
| 
 | |
|   lose:
 | |
|     Free(buf);
 | |
|     *status_ptr = -1;
 | |
|     return NULL;
 | |
|     }
 | |
| 
 | |
| 
 | |
| /*****************************************************************************/
 | |
| #if 0
 | |
| /*
 | |
| ** Debugging test stub and example argument scanner. 
 | |
| ** Like echo, but with \ <fname> expansion.
 | |
| **/
 | |
| 
 | |
| char *prog_name;
 | |
| 
 | |
| static void usage(void)
 | |
| {
 | |
|     fprintf(stderr,
 | |
| 	    "Usage: %s [\\ <fname>] [-n] [--] arg1 ... argn\n",
 | |
| 	    prog_name);
 | |
|     exit(1);
 | |
|     }
 | |
| 
 | |
| /* Expand away a leading meta-arg if there is one. Die informatively on error.
 | |
| ** I can't think of a reason why you might want to have recursive meta
 | |
| ** arguments, but we handle this case to be complete.
 | |
| */
 | |
| static char **maybe_expand_meta_arg(char **argv)
 | |
| {
 | |
|     if( *argv )
 | |
| 	while( strcmp(*argv, "\\") == 0 ) {
 | |
| 	    argv++;
 | |
| 	    if( !*argv ) {
 | |
| 		fprintf(stderr, "%s: \\ switch without following filename.\n",
 | |
| 			prog_name);
 | |
| 		usage();
 | |
| 		}
 | |
| 	    argv = process_meta_arg(argv);
 | |
| 	    if( !argv ) {
 | |
| 		fprintf(stderr, "%s: unable to expand \\ <filename> switch.\n",
 | |
| 			prog_name);
 | |
| 		usage();
 | |
| 		}
 | |
| 	    }
 | |
|     return argv;
 | |
|     }
 | |
| 
 | |
| main(int argc, char **argv)
 | |
| {
 | |
|     int n_flag=0;
 | |
| 
 | |
|     prog_name = *argv++;
 | |
| 
 | |
|     /* Handle an initial meta-arg expansion. */
 | |
|     argv = maybe_expand_meta_arg(argv);
 | |
| 
 | |
|     /* Process switches. */
 | |
|     for(;*argv;argv++) {
 | |
| 	/* Process arg. */
 | |
| 	if( argv[0][0] == '-' )
 | |
| 	    switch( argv[0][1] ) {
 | |
| 	      /* -n means no terminating newline. */
 | |
| 	      case 'n':
 | |
| 		n_flag++;
 | |
| 		break;
 | |
| 
 | |
| 	      /* -- terminates args, so you can echo \, -n, -- args. */
 | |
| 	      case '-':
 | |
| 		argv++;
 | |
| 		goto args_done;
 | |
| 		break;
 | |
| 
 | |
| 	      default:
 | |
| 		fprintf(stderr, "%s: unknown flag %s.\n", prog_name, *argv);
 | |
| 		usage();
 | |
| 	      }
 | |
| 	else goto args_done; /* Not a switch. We are done. */
 | |
| 	}
 | |
| 
 | |
|   args_done:
 | |
|     if( *argv ) fputs(*argv++, stdout);
 | |
|     while( *argv ) {
 | |
| 	putchar(' ');
 | |
| 	fputs(*argv++, stdout);
 | |
| 	}
 | |
|     if( !n_flag ) putchar('\n');
 | |
|     }
 | |
| #endif /* 0 */
 |