804 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
			
		
		
	
	
			804 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
| Date: Mon, 1 Jul 1996 23:22:47 GMT
 | |
| From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
 | |
| To: shivers@lcs.mit.edu, bdc@ai.mit.edu
 | |
| Subject: scsh patch for precompiled regexps..
 | |
| 
 | |
| I meant to send this out months ago but I was just too hosed with work.
 | |
| 
 | |
| Here's what I have right now:
 | |
| 
 | |
| There are three pieces here:
 | |
| 	diffs to the "core" scsh
 | |
| 	diffs to Henry Spencer's latest regexp library
 | |
| 	a copy of Henry Spencer's latest regexp library..
 | |
| 
 | |
| It appears to work (it passes the same regression tests as the C library..).
 | |
| 
 | |
| Let me know if I didn't include something needed for this to work..
 | |
| 
 | |
| 				- Bill
 | |
| 
 | |
| diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
 | |
| *** scsh-0.4.2/scsh/re.scm	Fri Oct 27 04:58:56 1995
 | |
| --- scsh-0.4.2-regexp/scsh/re.scm	Sat Apr  6 21:07:41 1996
 | |
| ***************
 | |
| *** 34,49 ****
 | |
|   
 | |
|   ;;; Bogus stub definitions for low-level match routines:
 | |
|   
 | |
| ! (define regexp? string?)
 | |
| ! (define (make-regexp str) str)
 | |
|   
 | |
| ! (define (regexp-exec regexp str . maybe-start)
 | |
|     (let ((start (optional-arg maybe-start 0))
 | |
|   	(start-vec (make-vector 10))
 | |
|   	(end-vec (make-vector 10)))
 | |
| !     (and (%regexp-match regexp str start start-vec end-vec)
 | |
| ! 	 (make-regexp-match str start-vec end-vec))))
 | |
| ! 
 | |
|   
 | |
|   ;;; Convert a string into a regex pattern that matches that string exactly --
 | |
|   ;;; in other words, quote the special chars with backslashes.
 | |
| --- 34,53 ----
 | |
|   
 | |
|   ;;; Bogus stub definitions for low-level match routines:
 | |
|   
 | |
| ! (define-record iregexp
 | |
| !   string)
 | |
|   
 | |
| ! (define regexp? iregexp?)
 | |
| ! 
 | |
| ! (define (make-regexp str) 
 | |
| !   (make-iregexp (compile-regexp str)))
 | |
| ! 
 | |
| ! (define (regexp-exec r s . maybe-start)
 | |
|     (let ((start (optional-arg maybe-start 0))
 | |
|   	(start-vec (make-vector 10))
 | |
|   	(end-vec (make-vector 10)))
 | |
| !     (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
 | |
| ! 	 (make-regexp-match s start-vec end-vec))))
 | |
|   
 | |
|   ;;; Convert a string into a regex pattern that matches that string exactly --
 | |
|   ;;; in other words, quote the special chars with backslashes.
 | |
| ***************
 | |
| *** 58,75 ****
 | |
|   		  (cons #\\ result)
 | |
|   		  result))))))
 | |
|   
 | |
| ! (define-foreign %regexp-match/errno (reg_match (string regexp)
 | |
| ! 					       (string s)
 | |
| ! 					       (integer start)
 | |
| ! 					       (vector-desc start-vec)
 | |
| ! 					       (vector-desc end-vec))
 | |
| !   static-string ; Error string or #f if all is ok.
 | |
| !   bool)		; match?
 | |
| ! 
 | |
| ! (define (%regexp-match regexp string start start-vec end-vec)
 | |
| !   (receive (err match?) (%regexp-match/errno regexp string start
 | |
| ! 					     start-vec end-vec)
 | |
| !     (if err (error err %regexp-match regexp string start) match?)))
 | |
|   
 | |
|   
 | |
|   ;;; I do this one in C, I'm not sure why:
 | |
| --- 62,79 ----
 | |
|   		  (cons #\\ result)
 | |
|   		  result))))))
 | |
|   
 | |
| ! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
 | |
| ! ;;;					       (string s)
 | |
| ! ;;;					       (integer start)
 | |
| ! ;;;					       (vector-desc start-vec)
 | |
| ! ;;;					       (vector-desc end-vec))
 | |
| ! ;;;  static-string ; Error string or #f if all is ok.
 | |
| ! ;;;  bool)		; match?
 | |
| ! 
 | |
| ! ;;;(define (%regexp-match regexp string start start-vec end-vec)
 | |
| ! ;;;  (receive (err match?) (%regexp-match/errno regexp string start
 | |
| ! ;;;					     start-vec end-vec)
 | |
| ! ;;;    (if err (error err %regexp-match regexp string start) match?)))
 | |
|   
 | |
|   
 | |
|   ;;; I do this one in C, I'm not sure why:
 | |
| ***************
 | |
| *** 79,81 ****
 | |
| --- 83,166 ----
 | |
|     (filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
 | |
|     static-string	; error message -- #f if no error.
 | |
|     integer)	; number of files that pass the filter.
 | |
| + 
 | |
| + ;;; precompiled regexps.
 | |
| + 
 | |
| + (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
 | |
| +   static-string
 | |
| +   integer)
 | |
| + 
 | |
| + (define-foreign %regexp-compile (reg_comp_comp (string regexp)
 | |
| + 					       (string-desc re-buf))
 | |
| +   static-string)
 | |
| + 
 | |
| + (define (%regexp-exec-1 r s start sv ev)
 | |
| +   (receive (err match?) (%regexp-exec r s start sv ev)
 | |
| + 	   (if err (error err s start)
 | |
| + 	       match?)))
 | |
| + 
 | |
| + (define-foreign %regexp-exec (reg_exec (string-desc regexp)
 | |
| + 				       (string s)
 | |
| + 				       (integer start)
 | |
| + 				       (vector-desc start-vec)
 | |
| + 				       (vector-desc end-vec))
 | |
| +   static-string
 | |
| +   bool)
 | |
| + 
 | |
| + 
 | |
| + (define (compile-regexp e)
 | |
| +   (receive (err len)
 | |
| + 	   (%regexp-compiled-length e)
 | |
| + 	   (if err (error err e)
 | |
| + 	       (let ((buf (make-string len)))
 | |
| + 		 (%regexp-compile e buf)
 | |
| + 		 buf))))
 | |
| + 
 | |
| + 
 | |
| + 
 | |
| + (define-foreign %regexp-subst (reg_subst (string-desc regexp)
 | |
| + 					 (string m)
 | |
| + 					 (string s)
 | |
| + 					 (integer start)
 | |
| + 					 (vector-desc start-vec)
 | |
| + 					 (vector-desc end-vec)
 | |
| + 					 (string-desc outbuf))
 | |
| +   static-string
 | |
| +   integer)
 | |
| + 
 | |
| + (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
 | |
| + 						 (string m)
 | |
| + 						 (string s)
 | |
| + 						 (integer start)
 | |
| + 						 (vector-desc start-vec)
 | |
| + 						 (vector-desc end-vec))
 | |
| +   static-string
 | |
| +   integer)
 | |
| + 
 | |
| + 
 | |
| + (define (regexp-subst re match replacement)
 | |
| +   (let ((cr (iregexp:string re))
 | |
| + 	(matchstr (regexp-match:string match))
 | |
| + 	(startvec (regexp-match:start match))
 | |
| + 	(endvec (regexp-match:end match)))
 | |
| +     (receive (err outlen)
 | |
| + 	     (%regexp-subst-len cr
 | |
| + 				matchstr
 | |
| + 				replacement
 | |
| + 				0
 | |
| + 				startvec
 | |
| + 				endvec)
 | |
| + 	     (if err (error err matchstr replacement)
 | |
| + 		 (let ((outbuf (make-string outlen)))
 | |
| + 		   (receive (err outlen)
 | |
| + 			    (%regexp-subst cr
 | |
| + 					   matchstr
 | |
| + 					   replacement
 | |
| + 					   0
 | |
| + 					   startvec
 | |
| + 					   endvec
 | |
| + 					   outbuf)
 | |
| + 			    (if err (error err matchstr replacement)
 | |
| + 				(substring outbuf 0 outlen))))))))
 | |
| + 
 | |
| + 		   
 | |
| \ No newline at end of file
 | |
| diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
 | |
| *** scsh-0.4.2/scsh/re1.c	Fri Oct 27 04:58:58 1995
 | |
| --- scsh-0.4.2-regexp/scsh/re1.c	Sat Apr  6 21:01:15 1996
 | |
| ***************
 | |
| *** 19,24 ****
 | |
| --- 19,150 ----
 | |
|   /* Stash error msg in global. */
 | |
|   void regerror(char *msg) {regexp_error = msg;}
 | |
|   
 | |
| + /*
 | |
| + ** Return NULL normally, error string on error.
 | |
| + ** Stash number of bytes needed for compiled regexp into `*len'
 | |
| + */
 | |
| + 
 | |
| + char *reg_comp_len(const char *re, int *len)
 | |
| + {
 | |
| +     int l;
 | |
| + 
 | |
| +     regexp_error = NULL;
 | |
| +     *len = regcomp_len(re); 
 | |
| +     return regexp_error;
 | |
| + }
 | |
| + 
 | |
| + /*
 | |
| + ** Return NULL normally, error string on error.
 | |
| + ** Compile regexp into string described by `cr'.
 | |
| + */
 | |
| + 
 | |
| + char *reg_comp_comp(const char *re, scheme_value cr) 
 | |
| + {
 | |
| +     int len = STRING_LENGTH(cr);
 | |
| +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | |
| + 
 | |
| +     regexp_error = NULL;
 | |
| +     r = regcomp_comp(re, r, len); 
 | |
| +     return regexp_error;
 | |
| + }
 | |
| + 
 | |
| + /* Return NULL normally, error string on error.
 | |
| + ** Stash match info in start_vec and end_vec.
 | |
| + ** Returns boolean match/no-match in hit.
 | |
| + */
 | |
| + 
 | |
| + char *reg_exec(scheme_value cr, const char *string, int start,
 | |
| + 	       scheme_value start_vec, scheme_value end_vec,  int *hit)
 | |
| + {
 | |
| +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | |
| + 
 | |
| +     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 | |
| + 	return "Illegal start vector";
 | |
| + 	}
 | |
| +     
 | |
| +     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 | |
| + 	return "Illegal end vector";
 | |
| + 	}
 | |
| + 
 | |
| +     regexp_error = 0;
 | |
| +     *hit = 0;
 | |
| +     
 | |
| +     if( regexec(r, string+start) ) {
 | |
| + 	int i;
 | |
| + 	for(i=0; i<NSUBEXP; i++) {
 | |
| + 	    const char *s = r->startp[i];
 | |
| + 	    const char *e = r->endp[i];
 | |
| + 	    VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
 | |
| + 	    VECTOR_REF(end_vec,i)   = e?ENTER_FIXNUM(e - string):SCHFALSE;
 | |
| + 	    r->startp[i] = NULL;
 | |
| + 	    r->endp[i] = NULL;
 | |
| + 	    }
 | |
| + 	*hit = 1;
 | |
| + 	}
 | |
| +     return regexp_error;
 | |
| + }
 | |
| + 
 | |
| + char *reg_subst(scheme_value cr, const char *match,
 | |
| + 		const char *src, int start,
 | |
| + 		scheme_value start_vec, scheme_value end_vec,
 | |
| + 		scheme_value outbuf, int *len)
 | |
| + {
 | |
| +     int i;
 | |
| +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | |
| + 
 | |
| +     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 | |
| + 	return "Illegal start vector";
 | |
| + 	}
 | |
| +     
 | |
| +     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 | |
| + 	return "Illegal end vector";
 | |
| + 	}
 | |
| + 
 | |
| +     for (i=0; i<NSUBEXP; i++) 
 | |
| +     {
 | |
| + 	scheme_value se = VECTOR_REF(start_vec, i);
 | |
| + 	scheme_value ee = VECTOR_REF(end_vec, i);
 | |
| + 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
 | |
| + 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
 | |
| +     }
 | |
| +     
 | |
| +     regexp_error = NULL;
 | |
| +     regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
 | |
| +     *len = strlen(&STRING_REF(outbuf, 0));
 | |
| +     return regexp_error;
 | |
| + }
 | |
| + 
 | |
| + char *reg_subst_len(scheme_value cr, const char *match,
 | |
| + 		    const char *src, int start,
 | |
| + 		    scheme_value start_vec, scheme_value end_vec,
 | |
| + 		    int *len)
 | |
| + {
 | |
| +     int i;
 | |
| +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | |
| + 
 | |
| +     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 | |
| + 	return "Illegal start vector";
 | |
| + 	}
 | |
| +     
 | |
| +     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 | |
| + 	return "Illegal end vector";
 | |
| + 	}
 | |
| + 
 | |
| +     for (i=0; i<NSUBEXP; i++) 
 | |
| +     {
 | |
| + 	scheme_value se = VECTOR_REF(start_vec, i);
 | |
| + 	scheme_value ee = VECTOR_REF(end_vec, i);
 | |
| + 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
 | |
| + 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
 | |
| +     }
 | |
| +     
 | |
| +     regexp_error = NULL;
 | |
| +     *len = regsublen (r, src);
 | |
| +     return regexp_error;
 | |
| + }
 | |
| + 
 | |
| + 
 | |
| + #if 0
 | |
|   /* Return NULL normally, error string on error.
 | |
|   ** Stash match info in start_vec and end_vec.
 | |
|   ** Returns boolean match/no-match in hit.
 | |
| ***************
 | |
| *** 56,61 ****
 | |
| --- 182,188 ----
 | |
|       Free(prog);
 | |
|       return regexp_error;
 | |
|       }
 | |
| + #endif
 | |
|   
 | |
|   
 | |
|   char *filter_stringvec(const char *re, char const **stringvec,  int *nummatch)
 | |
| diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
 | |
| *** scsh-0.4.2/scsh/re1.h	Sun Oct 22 08:34:34 1995
 | |
| --- scsh-0.4.2-regexp/scsh/re1.h	Sat Apr  6 17:54:09 1996
 | |
| ***************
 | |
| *** 1,6 ****
 | |
| --- 1,21 ----
 | |
| + #if 0
 | |
|   char *reg_match(const char *re, const char *string, int start,
 | |
|   		scheme_value start_vec, scheme_value end_vec,
 | |
|   		int *hit);
 | |
| + #endif
 | |
|   
 | |
|   char *filter_stringvec(const char *re, char const **stringvec,
 | |
|   		       int *nummatch);
 | |
| + 
 | |
| + char *reg_comp_len(const char *re, int *len);
 | |
| + char *reg_comp_comp(const char *re, scheme_value cr);
 | |
| + 
 | |
| + char *reg_exec(scheme_value cr, const char *string, int start,
 | |
| + 	       scheme_value start_vec, scheme_value end_vec,  int *hit);
 | |
| + 
 | |
| + char *reg_subst(scheme_value cr, const char *match,
 | |
| + 		const char *src, int start,
 | |
| + 		scheme_value start_vec, scheme_value end_vec,
 | |
| + 		scheme_value outbuf, int *len);
 | |
| + 
 | |
| + 
 | |
| 
 | |
| Only in scsh-0.4.2-regexp/scsh: re2.scm
 | |
| diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
 | |
| *** scsh-0.4.2/scsh/scsh-interfaces.scm	Tue Oct 31 19:19:30 1995
 | |
| --- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm	Sat Apr  6 18:48:12 1996
 | |
| ***************
 | |
| *** 413,418 ****
 | |
| --- 413,419 ----
 | |
|   	  make-regexp
 | |
|   	  regexp?
 | |
|   	  regexp-exec
 | |
| + 	  regexp-subst
 | |
|   	  regexp-quote))
 | |
|   
 | |
|   
 | |
| 
 | |
| regexp library changes:
 | |
| 
 | |
| *** Makefile	1996/04/06 19:24:49	1.1
 | |
| --- Makefile	1996/04/06 20:46:26
 | |
| ***************
 | |
| *** 5,11 ****
 | |
|   # Things you might want to put in TEST:
 | |
|   # -DDEBUG		debugging hooks
 | |
|   # -I.			regexp.h from current directory, not /usr/include
 | |
| ! TEST=-I.
 | |
|   
 | |
|   # Things you might want to put in PROF:
 | |
|   # -pg			profiler
 | |
| --- 5,11 ----
 | |
|   # Things you might want to put in TEST:
 | |
|   # -DDEBUG		debugging hooks
 | |
|   # -I.			regexp.h from current directory, not /usr/include
 | |
| ! TEST=-I. -DDEBUG
 | |
|   
 | |
|   # Things you might want to put in PROF:
 | |
|   # -pg			profiler
 | |
| *** regexp.c	1996/04/06 19:24:49	1.1
 | |
| --- regexp.c	1996/04/06 22:34:55
 | |
| ***************
 | |
| *** 105,110 ****
 | |
| --- 105,111 ----
 | |
|    * Utility definitions.
 | |
|    */
 | |
|   #define	FAIL(m)		{ regerror(m); return(NULL); }
 | |
| + #define	FAILN(m)	{ regerror(m); return(-1); }
 | |
|   #define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
 | |
|   #define	META		"^$.[()|?+*\\"
 | |
|   
 | |
| ***************
 | |
| *** 162,173 ****
 | |
|   const char *exp;
 | |
|   {
 | |
|   	register regexp *r;
 | |
| ! 	register char *scan;
 | |
|   	int flags;
 | |
|   	struct comp co;
 | |
|   
 | |
|   	if (exp == NULL)
 | |
| ! 		FAIL("NULL argument to regcomp");
 | |
|   
 | |
|   	/* First pass: determine size, legality. */
 | |
|   	co.regparse = (char *)exp;
 | |
| --- 163,193 ----
 | |
|   const char *exp;
 | |
|   {
 | |
|   	register regexp *r;
 | |
| ! 	size_t len;
 | |
| ! 
 | |
| ! 	len = regcomp_len(exp);
 | |
| ! 	if (len <= 0)
 | |
| ! 	        return NULL;
 | |
| ! 
 | |
| ! 	/* Allocate space. */
 | |
| ! 	r = (regexp *)malloc(len);
 | |
| ! 
 | |
| ! 	if (r == NULL)
 | |
| ! 		FAIL("out of space");
 | |
| ! 	return regcomp_comp(exp, r, len);
 | |
| ! }
 | |
| ! 
 | |
| ! 
 | |
| ! size_t
 | |
| ! regcomp_len(exp)
 | |
| ! const char *exp;
 | |
| ! {
 | |
|   	int flags;
 | |
| + 	register regexp *r;
 | |
|   	struct comp co;
 | |
|   
 | |
|   	if (exp == NULL)
 | |
| ! 		FAILN("NULL argument to regcomp");
 | |
|   
 | |
|   	/* First pass: determine size, legality. */
 | |
|   	co.regparse = (char *)exp;
 | |
| ***************
 | |
| *** 178,198 ****
 | |
|   	co.regcode = co.regdummy;
 | |
|   	regc(&co, MAGIC);
 | |
|   	if (reg(&co, 0, &flags) == NULL)
 | |
| ! 		return(NULL);
 | |
|   
 | |
|   	/* Small enough for pointer-storage convention? */
 | |
|   	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
 | |
| ! 		FAIL("regexp too big");
 | |
|   
 | |
| ! 	/* Allocate space. */
 | |
| ! 	r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
 | |
| ! 	if (r == NULL)
 | |
| ! 		FAIL("out of space");
 | |
|   
 | |
|   	/* Second pass: emit code. */
 | |
|   	co.regparse = (char *)exp;
 | |
|   	co.regnpar = 1;
 | |
|   	co.regcode = r->program;
 | |
|   	regc(&co, MAGIC);
 | |
|   	if (reg(&co, 0, &flags) == NULL)
 | |
|   		return(NULL);
 | |
| --- 198,228 ----
 | |
|   	co.regcode = co.regdummy;
 | |
|   	regc(&co, MAGIC);
 | |
|   	if (reg(&co, 0, &flags) == NULL)
 | |
| ! 		return -1;
 | |
|   
 | |
|   	/* Small enough for pointer-storage convention? */
 | |
|   	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
 | |
| ! 		FAILN("regexp too big");
 | |
|   
 | |
| ! 	return (sizeof(regexp) + (size_t)co.regsize);
 | |
| ! }
 | |
| ! 
 | |
| ! 
 | |
| ! regexp *
 | |
| ! regcomp_comp(exp, r, len)
 | |
| ! const char *exp;
 | |
| ! register regexp *r;
 | |
| ! size_t len;
 | |
| ! {
 | |
| ! 	register char *scan;
 | |
| ! 	int flags;
 | |
| ! 	struct comp co;
 | |
|   
 | |
|   	/* Second pass: emit code. */
 | |
|   	co.regparse = (char *)exp;
 | |
|   	co.regnpar = 1;
 | |
|   	co.regcode = r->program;
 | |
| + 	co.regsize = len - sizeof(regexp);
 | |
|   	regc(&co, MAGIC);
 | |
|   	if (reg(&co, 0, &flags) == NULL)
 | |
|   		return(NULL);
 | |
| ***************
 | |
| *** 200,206 ****
 | |
|   	/* Dig out information for optimizations. */
 | |
|   	r->regstart = '\0';		/* Worst-case defaults. */
 | |
|   	r->reganch = 0;
 | |
| ! 	r->regmust = NULL;
 | |
|   	r->regmlen = 0;
 | |
|   	scan = r->program+1;		/* First BRANCH. */
 | |
|   	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
 | |
| --- 230,236 ----
 | |
|   	/* Dig out information for optimizations. */
 | |
|   	r->regstart = '\0';		/* Worst-case defaults. */
 | |
|   	r->reganch = 0;
 | |
| ! 	r->regmust = 0;
 | |
|   	r->regmlen = 0;
 | |
|   	scan = r->program+1;		/* First BRANCH. */
 | |
|   	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
 | |
| ***************
 | |
| *** 229,235 ****
 | |
|   					longest = OPERAND(scan);
 | |
|   					len = strlen(OPERAND(scan));
 | |
|   				}
 | |
| ! 			r->regmust = longest;
 | |
|   			r->regmlen = (int)len;
 | |
|   		}
 | |
|   	}
 | |
| --- 259,265 ----
 | |
|   					longest = OPERAND(scan);
 | |
|   					len = strlen(OPERAND(scan));
 | |
|   				}
 | |
| ! 			r->regmust = longest - r->program;
 | |
|   			r->regmlen = (int)len;
 | |
|   		}
 | |
|   	}
 | |
| ***************
 | |
| *** 648,655 ****
 | |
|   struct exec {
 | |
|   	char *reginput;		/* String-input pointer. */
 | |
|   	char *regbol;		/* Beginning of input, for ^ check. */
 | |
| ! 	char **regstartp;	/* Pointer to startp array. */
 | |
| ! 	char **regendp;		/* Ditto for endp. */
 | |
|   };
 | |
|   
 | |
|   /*
 | |
| --- 678,685 ----
 | |
|   struct exec {
 | |
|   	char *reginput;		/* String-input pointer. */
 | |
|   	char *regbol;		/* Beginning of input, for ^ check. */
 | |
| ! 	const char **regstartp;	/* Pointer to startp array. */
 | |
| ! 	const char **regendp;		/* Ditto for endp. */
 | |
|   };
 | |
|   
 | |
|   /*
 | |
| ***************
 | |
| *** 690,696 ****
 | |
|   	}
 | |
|   
 | |
|   	/* If there is a "must appear" string, look for it. */
 | |
| ! 	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
 | |
|   		return(0);
 | |
|   
 | |
|   	/* Mark beginning of line for ^ . */
 | |
| --- 720,727 ----
 | |
|   	}
 | |
|   
 | |
|   	/* If there is a "must appear" string, look for it. */
 | |
| ! 	if ((prog->regmlen > 0) &&
 | |
| ! 	    strstr(string, &prog->program[prog->regmust]) == NULL)
 | |
|   		return(0);
 | |
|   
 | |
|   	/* Mark beginning of line for ^ . */
 | |
| ***************
 | |
| *** 729,736 ****
 | |
|   char *string;
 | |
|   {
 | |
|   	register int i;
 | |
| ! 	register char **stp;
 | |
| ! 	register char **enp;
 | |
|   
 | |
|   	ep->reginput = string;
 | |
|   
 | |
| --- 760,767 ----
 | |
|   char *string;
 | |
|   {
 | |
|   	register int i;
 | |
| ! 	register const char **stp;
 | |
| ! 	register const char **enp;
 | |
|   
 | |
|   	ep->reginput = string;
 | |
|   
 | |
| ***************
 | |
| *** 1004,1011 ****
 | |
|   		printf("start `%c' ", r->regstart);
 | |
|   	if (r->reganch)
 | |
|   		printf("anchored ");
 | |
| ! 	if (r->regmust != NULL)
 | |
| ! 		printf("must have \"%s\"", r->regmust);
 | |
|   	printf("\n");
 | |
|   }
 | |
|   
 | |
| --- 1035,1042 ----
 | |
|   		printf("start `%c' ", r->regstart);
 | |
|   	if (r->reganch)
 | |
|   		printf("anchored ");
 | |
| ! 	if (r->regmlen > 0)
 | |
| ! 		printf("must have \"%s\"", &r->program[r->regmust]);
 | |
|   	printf("\n");
 | |
|   }
 | |
|   
 | |
| *** regexp.h	1996/04/06 19:24:49	1.1
 | |
| --- regexp.h	1996/04/07 01:52:19
 | |
| ***************
 | |
| *** 6,16 ****
 | |
|    */
 | |
|   #define NSUBEXP  10
 | |
|   typedef struct regexp {
 | |
| ! 	char *startp[NSUBEXP];
 | |
| ! 	char *endp[NSUBEXP];
 | |
|   	char regstart;		/* Internal use only. */
 | |
|   	char reganch;		/* Internal use only. */
 | |
| ! 	char *regmust;		/* Internal use only. */
 | |
|   	int regmlen;		/* Internal use only. */
 | |
|   	char program[1];	/* Unwarranted chumminess with compiler. */
 | |
|   } regexp;
 | |
| --- 6,16 ----
 | |
|    */
 | |
|   #define NSUBEXP  10
 | |
|   typedef struct regexp {
 | |
| ! 	const char *startp[NSUBEXP];
 | |
| ! 	const char *endp[NSUBEXP];
 | |
|   	char regstart;		/* Internal use only. */
 | |
|   	char reganch;		/* Internal use only. */
 | |
| ! 	int regmust;		/* Internal use only. */
 | |
|   	int regmlen;		/* Internal use only. */
 | |
|   	char program[1];	/* Unwarranted chumminess with compiler. */
 | |
|   } regexp;
 | |
| ***************
 | |
| *** 18,21 ****
 | |
| --- 18,27 ----
 | |
|   extern regexp *regcomp(const char *re);
 | |
|   extern int regexec(regexp *rp, const char *s);
 | |
|   extern void regsub(const regexp *rp, const char *src, char *dst);
 | |
| + extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
 | |
| + extern size_t regsublen(const regexp *rp, const char *src);
 | |
| + 
 | |
|   extern void regerror(char *message);
 | |
| + extern size_t regcomp_len(const char *exp);
 | |
| + extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
 | |
| + 
 | |
| *** regsub.c	1996/04/06 19:24:49	1.1
 | |
| --- regsub.c	1996/04/07 02:10:29
 | |
| ***************
 | |
| *** 11,25 ****
 | |
|   /*
 | |
|    - regsub - perform substitutions after a regexp match
 | |
|    */
 | |
|   void
 | |
| ! regsub(rp, source, dest)
 | |
|   const regexp *rp;
 | |
|   const char *source;
 | |
|   char *dest;
 | |
|   {
 | |
|   	register regexp * const prog = (regexp *)rp;
 | |
| ! 	register char *src = (char *)source;
 | |
|   	register char *dst = dest;
 | |
|   	register char c;
 | |
|   	register int no;
 | |
|   	register size_t len;
 | |
| --- 11,42 ----
 | |
|   /*
 | |
|    - regsub - perform substitutions after a regexp match
 | |
|    */
 | |
| + 
 | |
| + void regsub(rp, source, dest)
 | |
| + const regexp *rp;
 | |
| + const char *source;
 | |
| + char *dest;
 | |
| + {
 | |
| +         regnsub(rp, source, dest, BUFSIZ);
 | |
| + }
 | |
| + 
 | |
| + 
 | |
| + 
 | |
| + /*
 | |
| +  - regnsub - perform bounds-checked substitutions after a regexp match
 | |
| +  */
 | |
|   void
 | |
| ! regnsub(rp, source, dest, destlen)
 | |
|   const regexp *rp;
 | |
|   const char *source;
 | |
|   char *dest;
 | |
| + size_t destlen;
 | |
|   {
 | |
|   	register regexp * const prog = (regexp *)rp;
 | |
| ! 	register const char *src = (char *)source;
 | |
|   	register char *dst = dest;
 | |
| + 	char *dstend = dest + destlen;
 | |
| + 	char *odst;
 | |
|   	register char c;
 | |
|   	register int no;
 | |
|   	register size_t len;
 | |
| ***************
 | |
| *** 45,55 ****
 | |
|   			if (c == '\\' && (*src == '\\' || *src == '&'))
 | |
|   				c = *src++;
 | |
|   			*dst++ = c;
 | |
|   		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 | |
| ! 					prog->endp[no] > prog->startp[no]) {
 | |
|   			len = prog->endp[no] - prog->startp[no];
 | |
| ! 			(void) strncpy(dst, prog->startp[no], len);
 | |
|   			dst += len;
 | |
|   			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
 | |
|   				regerror("damaged match string");
 | |
|   				return;
 | |
| --- 62,83 ----
 | |
|   			if (c == '\\' && (*src == '\\' || *src == '&'))
 | |
|   				c = *src++;
 | |
|   			*dst++ = c;
 | |
| + 			if (dst >= dstend) 
 | |
| + 			{
 | |
| + 			    	regerror("output buffer too small");
 | |
| + 				return;
 | |
| + 			}
 | |
|   		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 | |
| ! 			   prog->endp[no] > prog->startp[no]) {
 | |
|   			len = prog->endp[no] - prog->startp[no];
 | |
| ! 			odst = dst;
 | |
|   			dst += len;
 | |
| + 			if (dst >= dstend) 
 | |
| + 			{
 | |
| + 			    	regerror("output buffer too small");
 | |
| + 				return;
 | |
| + 			}
 | |
| + 			(void) strncpy(odst, prog->startp[no], len);
 | |
|   			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
 | |
|   				regerror("damaged match string");
 | |
|   				return;
 | |
| ***************
 | |
| *** 58,60 ****
 | |
| --- 86,131 ----
 | |
|   	}
 | |
|   	*dst++ = '\0';
 | |
|   }
 | |
| + 
 | |
| + size_t regsublen(rp, source)
 | |
| + const regexp *rp;
 | |
| + const char *source;
 | |
| + {
 | |
| +     register regexp * const prog = (regexp *)rp;
 | |
| +     register char *src = (char *)source;
 | |
| +     register char c;
 | |
| +     register int no;
 | |
| +     register int len = 0;
 | |
| + 	
 | |
| +     if (prog == NULL || source == NULL) {
 | |
| + 	regerror("NULL parameter to regsublen");
 | |
| + 	return -1;
 | |
| +     }
 | |
| +     
 | |
| +     if ((unsigned char)*(prog->program) != MAGIC) {
 | |
| + 	regerror("damaged regexp");
 | |
| + 	return -1;
 | |
| +     }
 | |
| +     while ((c = *src++) != '\0') {
 | |
| + 	if (c == '&')
 | |
| + 	    no = 0;
 | |
| + 	else if (c == '\\' && isdigit(*src))
 | |
| + 	    no = *src++ - '0';
 | |
| + 	else
 | |
| + 	    no = -1;
 | |
| + 	if (no < 0) {		/* Ordinary character. */
 | |
| + 	    if (c == '\\' && (*src == '\\' || *src == '&'))
 | |
| + 		src++;
 | |
| + 	    len++;
 | |
| + 	} else {
 | |
| + 	    const char *s = prog->startp[no];
 | |
| + 	    const char *e = prog->endp[no];
 | |
| + 	    if ((s != NULL) && (e != NULL) && (e > s)) {
 | |
| + 		len += e-s;
 | |
| + 	    }
 | |
| + 	}
 | |
| +     }
 | |
| +     return len+1;
 | |
| + }
 | |
| + 
 | |
| + 
 | |
| 
 | |
| Original regexp code from henry:
 | |
| [unpacked & deleted -Olin]
 |