804 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
			
		
		
	
	
			804 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
Date: Mon, 1 Jul 1996 23:22:47 GMT
 | 
						|
From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
 | 
						|
To: shivers@lcs.mit.edu, bdc@ai.mit.edu
 | 
						|
Subject: scsh patch for precompiled regexps..
 | 
						|
 | 
						|
I meant to send this out months ago but I was just too hosed with work.
 | 
						|
 | 
						|
Here's what I have right now:
 | 
						|
 | 
						|
There are three pieces here:
 | 
						|
	diffs to the "core" scsh
 | 
						|
	diffs to Henry Spencer's latest regexp library
 | 
						|
	a copy of Henry Spencer's latest regexp library..
 | 
						|
 | 
						|
It appears to work (it passes the same regression tests as the C library..).
 | 
						|
 | 
						|
Let me know if I didn't include something needed for this to work..
 | 
						|
 | 
						|
				- Bill
 | 
						|
 | 
						|
diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
 | 
						|
*** scsh-0.4.2/scsh/re.scm	Fri Oct 27 04:58:56 1995
 | 
						|
--- scsh-0.4.2-regexp/scsh/re.scm	Sat Apr  6 21:07:41 1996
 | 
						|
***************
 | 
						|
*** 34,49 ****
 | 
						|
  
 | 
						|
  ;;; Bogus stub definitions for low-level match routines:
 | 
						|
  
 | 
						|
! (define regexp? string?)
 | 
						|
! (define (make-regexp str) str)
 | 
						|
  
 | 
						|
! (define (regexp-exec regexp str . maybe-start)
 | 
						|
    (let ((start (optional-arg maybe-start 0))
 | 
						|
  	(start-vec (make-vector 10))
 | 
						|
  	(end-vec (make-vector 10)))
 | 
						|
!     (and (%regexp-match regexp str start start-vec end-vec)
 | 
						|
! 	 (make-regexp-match str start-vec end-vec))))
 | 
						|
! 
 | 
						|
  
 | 
						|
  ;;; Convert a string into a regex pattern that matches that string exactly --
 | 
						|
  ;;; in other words, quote the special chars with backslashes.
 | 
						|
--- 34,53 ----
 | 
						|
  
 | 
						|
  ;;; Bogus stub definitions for low-level match routines:
 | 
						|
  
 | 
						|
! (define-record iregexp
 | 
						|
!   string)
 | 
						|
  
 | 
						|
! (define regexp? iregexp?)
 | 
						|
! 
 | 
						|
! (define (make-regexp str) 
 | 
						|
!   (make-iregexp (compile-regexp str)))
 | 
						|
! 
 | 
						|
! (define (regexp-exec r s . maybe-start)
 | 
						|
    (let ((start (optional-arg maybe-start 0))
 | 
						|
  	(start-vec (make-vector 10))
 | 
						|
  	(end-vec (make-vector 10)))
 | 
						|
!     (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
 | 
						|
! 	 (make-regexp-match s start-vec end-vec))))
 | 
						|
  
 | 
						|
  ;;; Convert a string into a regex pattern that matches that string exactly --
 | 
						|
  ;;; in other words, quote the special chars with backslashes.
 | 
						|
***************
 | 
						|
*** 58,75 ****
 | 
						|
  		  (cons #\\ result)
 | 
						|
  		  result))))))
 | 
						|
  
 | 
						|
! (define-foreign %regexp-match/errno (reg_match (string regexp)
 | 
						|
! 					       (string s)
 | 
						|
! 					       (integer start)
 | 
						|
! 					       (vector-desc start-vec)
 | 
						|
! 					       (vector-desc end-vec))
 | 
						|
!   static-string ; Error string or #f if all is ok.
 | 
						|
!   bool)		; match?
 | 
						|
! 
 | 
						|
! (define (%regexp-match regexp string start start-vec end-vec)
 | 
						|
!   (receive (err match?) (%regexp-match/errno regexp string start
 | 
						|
! 					     start-vec end-vec)
 | 
						|
!     (if err (error err %regexp-match regexp string start) match?)))
 | 
						|
  
 | 
						|
  
 | 
						|
  ;;; I do this one in C, I'm not sure why:
 | 
						|
--- 62,79 ----
 | 
						|
  		  (cons #\\ result)
 | 
						|
  		  result))))))
 | 
						|
  
 | 
						|
! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
 | 
						|
! ;;;					       (string s)
 | 
						|
! ;;;					       (integer start)
 | 
						|
! ;;;					       (vector-desc start-vec)
 | 
						|
! ;;;					       (vector-desc end-vec))
 | 
						|
! ;;;  static-string ; Error string or #f if all is ok.
 | 
						|
! ;;;  bool)		; match?
 | 
						|
! 
 | 
						|
! ;;;(define (%regexp-match regexp string start start-vec end-vec)
 | 
						|
! ;;;  (receive (err match?) (%regexp-match/errno regexp string start
 | 
						|
! ;;;					     start-vec end-vec)
 | 
						|
! ;;;    (if err (error err %regexp-match regexp string start) match?)))
 | 
						|
  
 | 
						|
  
 | 
						|
  ;;; I do this one in C, I'm not sure why:
 | 
						|
***************
 | 
						|
*** 79,81 ****
 | 
						|
--- 83,166 ----
 | 
						|
    (filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
 | 
						|
    static-string	; error message -- #f if no error.
 | 
						|
    integer)	; number of files that pass the filter.
 | 
						|
+ 
 | 
						|
+ ;;; precompiled regexps.
 | 
						|
+ 
 | 
						|
+ (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
 | 
						|
+   static-string
 | 
						|
+   integer)
 | 
						|
+ 
 | 
						|
+ (define-foreign %regexp-compile (reg_comp_comp (string regexp)
 | 
						|
+ 					       (string-desc re-buf))
 | 
						|
+   static-string)
 | 
						|
+ 
 | 
						|
+ (define (%regexp-exec-1 r s start sv ev)
 | 
						|
+   (receive (err match?) (%regexp-exec r s start sv ev)
 | 
						|
+ 	   (if err (error err s start)
 | 
						|
+ 	       match?)))
 | 
						|
+ 
 | 
						|
+ (define-foreign %regexp-exec (reg_exec (string-desc regexp)
 | 
						|
+ 				       (string s)
 | 
						|
+ 				       (integer start)
 | 
						|
+ 				       (vector-desc start-vec)
 | 
						|
+ 				       (vector-desc end-vec))
 | 
						|
+   static-string
 | 
						|
+   bool)
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ (define (compile-regexp e)
 | 
						|
+   (receive (err len)
 | 
						|
+ 	   (%regexp-compiled-length e)
 | 
						|
+ 	   (if err (error err e)
 | 
						|
+ 	       (let ((buf (make-string len)))
 | 
						|
+ 		 (%regexp-compile e buf)
 | 
						|
+ 		 buf))))
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ (define-foreign %regexp-subst (reg_subst (string-desc regexp)
 | 
						|
+ 					 (string m)
 | 
						|
+ 					 (string s)
 | 
						|
+ 					 (integer start)
 | 
						|
+ 					 (vector-desc start-vec)
 | 
						|
+ 					 (vector-desc end-vec)
 | 
						|
+ 					 (string-desc outbuf))
 | 
						|
+   static-string
 | 
						|
+   integer)
 | 
						|
+ 
 | 
						|
+ (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
 | 
						|
+ 						 (string m)
 | 
						|
+ 						 (string s)
 | 
						|
+ 						 (integer start)
 | 
						|
+ 						 (vector-desc start-vec)
 | 
						|
+ 						 (vector-desc end-vec))
 | 
						|
+   static-string
 | 
						|
+   integer)
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ (define (regexp-subst re match replacement)
 | 
						|
+   (let ((cr (iregexp:string re))
 | 
						|
+ 	(matchstr (regexp-match:string match))
 | 
						|
+ 	(startvec (regexp-match:start match))
 | 
						|
+ 	(endvec (regexp-match:end match)))
 | 
						|
+     (receive (err outlen)
 | 
						|
+ 	     (%regexp-subst-len cr
 | 
						|
+ 				matchstr
 | 
						|
+ 				replacement
 | 
						|
+ 				0
 | 
						|
+ 				startvec
 | 
						|
+ 				endvec)
 | 
						|
+ 	     (if err (error err matchstr replacement)
 | 
						|
+ 		 (let ((outbuf (make-string outlen)))
 | 
						|
+ 		   (receive (err outlen)
 | 
						|
+ 			    (%regexp-subst cr
 | 
						|
+ 					   matchstr
 | 
						|
+ 					   replacement
 | 
						|
+ 					   0
 | 
						|
+ 					   startvec
 | 
						|
+ 					   endvec
 | 
						|
+ 					   outbuf)
 | 
						|
+ 			    (if err (error err matchstr replacement)
 | 
						|
+ 				(substring outbuf 0 outlen))))))))
 | 
						|
+ 
 | 
						|
+ 		   
 | 
						|
\ No newline at end of file
 | 
						|
diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
 | 
						|
*** scsh-0.4.2/scsh/re1.c	Fri Oct 27 04:58:58 1995
 | 
						|
--- scsh-0.4.2-regexp/scsh/re1.c	Sat Apr  6 21:01:15 1996
 | 
						|
***************
 | 
						|
*** 19,24 ****
 | 
						|
--- 19,150 ----
 | 
						|
  /* Stash error msg in global. */
 | 
						|
  void regerror(char *msg) {regexp_error = msg;}
 | 
						|
  
 | 
						|
+ /*
 | 
						|
+ ** Return NULL normally, error string on error.
 | 
						|
+ ** Stash number of bytes needed for compiled regexp into `*len'
 | 
						|
+ */
 | 
						|
+ 
 | 
						|
+ char *reg_comp_len(const char *re, int *len)
 | 
						|
+ {
 | 
						|
+     int l;
 | 
						|
+ 
 | 
						|
+     regexp_error = NULL;
 | 
						|
+     *len = regcomp_len(re); 
 | 
						|
+     return regexp_error;
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ /*
 | 
						|
+ ** Return NULL normally, error string on error.
 | 
						|
+ ** Compile regexp into string described by `cr'.
 | 
						|
+ */
 | 
						|
+ 
 | 
						|
+ char *reg_comp_comp(const char *re, scheme_value cr) 
 | 
						|
+ {
 | 
						|
+     int len = STRING_LENGTH(cr);
 | 
						|
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | 
						|
+ 
 | 
						|
+     regexp_error = NULL;
 | 
						|
+     r = regcomp_comp(re, r, len); 
 | 
						|
+     return regexp_error;
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ /* Return NULL normally, error string on error.
 | 
						|
+ ** Stash match info in start_vec and end_vec.
 | 
						|
+ ** Returns boolean match/no-match in hit.
 | 
						|
+ */
 | 
						|
+ 
 | 
						|
+ char *reg_exec(scheme_value cr, const char *string, int start,
 | 
						|
+ 	       scheme_value start_vec, scheme_value end_vec,  int *hit)
 | 
						|
+ {
 | 
						|
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | 
						|
+ 
 | 
						|
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 | 
						|
+ 	return "Illegal start vector";
 | 
						|
+ 	}
 | 
						|
+     
 | 
						|
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 | 
						|
+ 	return "Illegal end vector";
 | 
						|
+ 	}
 | 
						|
+ 
 | 
						|
+     regexp_error = 0;
 | 
						|
+     *hit = 0;
 | 
						|
+     
 | 
						|
+     if( regexec(r, string+start) ) {
 | 
						|
+ 	int i;
 | 
						|
+ 	for(i=0; i<NSUBEXP; i++) {
 | 
						|
+ 	    const char *s = r->startp[i];
 | 
						|
+ 	    const char *e = r->endp[i];
 | 
						|
+ 	    VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
 | 
						|
+ 	    VECTOR_REF(end_vec,i)   = e?ENTER_FIXNUM(e - string):SCHFALSE;
 | 
						|
+ 	    r->startp[i] = NULL;
 | 
						|
+ 	    r->endp[i] = NULL;
 | 
						|
+ 	    }
 | 
						|
+ 	*hit = 1;
 | 
						|
+ 	}
 | 
						|
+     return regexp_error;
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ char *reg_subst(scheme_value cr, const char *match,
 | 
						|
+ 		const char *src, int start,
 | 
						|
+ 		scheme_value start_vec, scheme_value end_vec,
 | 
						|
+ 		scheme_value outbuf, int *len)
 | 
						|
+ {
 | 
						|
+     int i;
 | 
						|
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | 
						|
+ 
 | 
						|
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 | 
						|
+ 	return "Illegal start vector";
 | 
						|
+ 	}
 | 
						|
+     
 | 
						|
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 | 
						|
+ 	return "Illegal end vector";
 | 
						|
+ 	}
 | 
						|
+ 
 | 
						|
+     for (i=0; i<NSUBEXP; i++) 
 | 
						|
+     {
 | 
						|
+ 	scheme_value se = VECTOR_REF(start_vec, i);
 | 
						|
+ 	scheme_value ee = VECTOR_REF(end_vec, i);
 | 
						|
+ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
 | 
						|
+ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
 | 
						|
+     }
 | 
						|
+     
 | 
						|
+     regexp_error = NULL;
 | 
						|
+     regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
 | 
						|
+     *len = strlen(&STRING_REF(outbuf, 0));
 | 
						|
+     return regexp_error;
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ char *reg_subst_len(scheme_value cr, const char *match,
 | 
						|
+ 		    const char *src, int start,
 | 
						|
+ 		    scheme_value start_vec, scheme_value end_vec,
 | 
						|
+ 		    int *len)
 | 
						|
+ {
 | 
						|
+     int i;
 | 
						|
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
 | 
						|
+ 
 | 
						|
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 | 
						|
+ 	return "Illegal start vector";
 | 
						|
+ 	}
 | 
						|
+     
 | 
						|
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 | 
						|
+ 	return "Illegal end vector";
 | 
						|
+ 	}
 | 
						|
+ 
 | 
						|
+     for (i=0; i<NSUBEXP; i++) 
 | 
						|
+     {
 | 
						|
+ 	scheme_value se = VECTOR_REF(start_vec, i);
 | 
						|
+ 	scheme_value ee = VECTOR_REF(end_vec, i);
 | 
						|
+ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
 | 
						|
+ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
 | 
						|
+     }
 | 
						|
+     
 | 
						|
+     regexp_error = NULL;
 | 
						|
+     *len = regsublen (r, src);
 | 
						|
+     return regexp_error;
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ #if 0
 | 
						|
  /* Return NULL normally, error string on error.
 | 
						|
  ** Stash match info in start_vec and end_vec.
 | 
						|
  ** Returns boolean match/no-match in hit.
 | 
						|
***************
 | 
						|
*** 56,61 ****
 | 
						|
--- 182,188 ----
 | 
						|
      Free(prog);
 | 
						|
      return regexp_error;
 | 
						|
      }
 | 
						|
+ #endif
 | 
						|
  
 | 
						|
  
 | 
						|
  char *filter_stringvec(const char *re, char const **stringvec,  int *nummatch)
 | 
						|
diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
 | 
						|
*** scsh-0.4.2/scsh/re1.h	Sun Oct 22 08:34:34 1995
 | 
						|
--- scsh-0.4.2-regexp/scsh/re1.h	Sat Apr  6 17:54:09 1996
 | 
						|
***************
 | 
						|
*** 1,6 ****
 | 
						|
--- 1,21 ----
 | 
						|
+ #if 0
 | 
						|
  char *reg_match(const char *re, const char *string, int start,
 | 
						|
  		scheme_value start_vec, scheme_value end_vec,
 | 
						|
  		int *hit);
 | 
						|
+ #endif
 | 
						|
  
 | 
						|
  char *filter_stringvec(const char *re, char const **stringvec,
 | 
						|
  		       int *nummatch);
 | 
						|
+ 
 | 
						|
+ char *reg_comp_len(const char *re, int *len);
 | 
						|
+ char *reg_comp_comp(const char *re, scheme_value cr);
 | 
						|
+ 
 | 
						|
+ char *reg_exec(scheme_value cr, const char *string, int start,
 | 
						|
+ 	       scheme_value start_vec, scheme_value end_vec,  int *hit);
 | 
						|
+ 
 | 
						|
+ char *reg_subst(scheme_value cr, const char *match,
 | 
						|
+ 		const char *src, int start,
 | 
						|
+ 		scheme_value start_vec, scheme_value end_vec,
 | 
						|
+ 		scheme_value outbuf, int *len);
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
 | 
						|
Only in scsh-0.4.2-regexp/scsh: re2.scm
 | 
						|
diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
 | 
						|
*** scsh-0.4.2/scsh/scsh-interfaces.scm	Tue Oct 31 19:19:30 1995
 | 
						|
--- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm	Sat Apr  6 18:48:12 1996
 | 
						|
***************
 | 
						|
*** 413,418 ****
 | 
						|
--- 413,419 ----
 | 
						|
  	  make-regexp
 | 
						|
  	  regexp?
 | 
						|
  	  regexp-exec
 | 
						|
+ 	  regexp-subst
 | 
						|
  	  regexp-quote))
 | 
						|
  
 | 
						|
  
 | 
						|
 | 
						|
regexp library changes:
 | 
						|
 | 
						|
*** Makefile	1996/04/06 19:24:49	1.1
 | 
						|
--- Makefile	1996/04/06 20:46:26
 | 
						|
***************
 | 
						|
*** 5,11 ****
 | 
						|
  # Things you might want to put in TEST:
 | 
						|
  # -DDEBUG		debugging hooks
 | 
						|
  # -I.			regexp.h from current directory, not /usr/include
 | 
						|
! TEST=-I.
 | 
						|
  
 | 
						|
  # Things you might want to put in PROF:
 | 
						|
  # -pg			profiler
 | 
						|
--- 5,11 ----
 | 
						|
  # Things you might want to put in TEST:
 | 
						|
  # -DDEBUG		debugging hooks
 | 
						|
  # -I.			regexp.h from current directory, not /usr/include
 | 
						|
! TEST=-I. -DDEBUG
 | 
						|
  
 | 
						|
  # Things you might want to put in PROF:
 | 
						|
  # -pg			profiler
 | 
						|
*** regexp.c	1996/04/06 19:24:49	1.1
 | 
						|
--- regexp.c	1996/04/06 22:34:55
 | 
						|
***************
 | 
						|
*** 105,110 ****
 | 
						|
--- 105,111 ----
 | 
						|
   * Utility definitions.
 | 
						|
   */
 | 
						|
  #define	FAIL(m)		{ regerror(m); return(NULL); }
 | 
						|
+ #define	FAILN(m)	{ regerror(m); return(-1); }
 | 
						|
  #define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
 | 
						|
  #define	META		"^$.[()|?+*\\"
 | 
						|
  
 | 
						|
***************
 | 
						|
*** 162,173 ****
 | 
						|
  const char *exp;
 | 
						|
  {
 | 
						|
  	register regexp *r;
 | 
						|
! 	register char *scan;
 | 
						|
  	int flags;
 | 
						|
  	struct comp co;
 | 
						|
  
 | 
						|
  	if (exp == NULL)
 | 
						|
! 		FAIL("NULL argument to regcomp");
 | 
						|
  
 | 
						|
  	/* First pass: determine size, legality. */
 | 
						|
  	co.regparse = (char *)exp;
 | 
						|
--- 163,193 ----
 | 
						|
  const char *exp;
 | 
						|
  {
 | 
						|
  	register regexp *r;
 | 
						|
! 	size_t len;
 | 
						|
! 
 | 
						|
! 	len = regcomp_len(exp);
 | 
						|
! 	if (len <= 0)
 | 
						|
! 	        return NULL;
 | 
						|
! 
 | 
						|
! 	/* Allocate space. */
 | 
						|
! 	r = (regexp *)malloc(len);
 | 
						|
! 
 | 
						|
! 	if (r == NULL)
 | 
						|
! 		FAIL("out of space");
 | 
						|
! 	return regcomp_comp(exp, r, len);
 | 
						|
! }
 | 
						|
! 
 | 
						|
! 
 | 
						|
! size_t
 | 
						|
! regcomp_len(exp)
 | 
						|
! const char *exp;
 | 
						|
! {
 | 
						|
  	int flags;
 | 
						|
+ 	register regexp *r;
 | 
						|
  	struct comp co;
 | 
						|
  
 | 
						|
  	if (exp == NULL)
 | 
						|
! 		FAILN("NULL argument to regcomp");
 | 
						|
  
 | 
						|
  	/* First pass: determine size, legality. */
 | 
						|
  	co.regparse = (char *)exp;
 | 
						|
***************
 | 
						|
*** 178,198 ****
 | 
						|
  	co.regcode = co.regdummy;
 | 
						|
  	regc(&co, MAGIC);
 | 
						|
  	if (reg(&co, 0, &flags) == NULL)
 | 
						|
! 		return(NULL);
 | 
						|
  
 | 
						|
  	/* Small enough for pointer-storage convention? */
 | 
						|
  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
 | 
						|
! 		FAIL("regexp too big");
 | 
						|
  
 | 
						|
! 	/* Allocate space. */
 | 
						|
! 	r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
 | 
						|
! 	if (r == NULL)
 | 
						|
! 		FAIL("out of space");
 | 
						|
  
 | 
						|
  	/* Second pass: emit code. */
 | 
						|
  	co.regparse = (char *)exp;
 | 
						|
  	co.regnpar = 1;
 | 
						|
  	co.regcode = r->program;
 | 
						|
  	regc(&co, MAGIC);
 | 
						|
  	if (reg(&co, 0, &flags) == NULL)
 | 
						|
  		return(NULL);
 | 
						|
--- 198,228 ----
 | 
						|
  	co.regcode = co.regdummy;
 | 
						|
  	regc(&co, MAGIC);
 | 
						|
  	if (reg(&co, 0, &flags) == NULL)
 | 
						|
! 		return -1;
 | 
						|
  
 | 
						|
  	/* Small enough for pointer-storage convention? */
 | 
						|
  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
 | 
						|
! 		FAILN("regexp too big");
 | 
						|
  
 | 
						|
! 	return (sizeof(regexp) + (size_t)co.regsize);
 | 
						|
! }
 | 
						|
! 
 | 
						|
! 
 | 
						|
! regexp *
 | 
						|
! regcomp_comp(exp, r, len)
 | 
						|
! const char *exp;
 | 
						|
! register regexp *r;
 | 
						|
! size_t len;
 | 
						|
! {
 | 
						|
! 	register char *scan;
 | 
						|
! 	int flags;
 | 
						|
! 	struct comp co;
 | 
						|
  
 | 
						|
  	/* Second pass: emit code. */
 | 
						|
  	co.regparse = (char *)exp;
 | 
						|
  	co.regnpar = 1;
 | 
						|
  	co.regcode = r->program;
 | 
						|
+ 	co.regsize = len - sizeof(regexp);
 | 
						|
  	regc(&co, MAGIC);
 | 
						|
  	if (reg(&co, 0, &flags) == NULL)
 | 
						|
  		return(NULL);
 | 
						|
***************
 | 
						|
*** 200,206 ****
 | 
						|
  	/* Dig out information for optimizations. */
 | 
						|
  	r->regstart = '\0';		/* Worst-case defaults. */
 | 
						|
  	r->reganch = 0;
 | 
						|
! 	r->regmust = NULL;
 | 
						|
  	r->regmlen = 0;
 | 
						|
  	scan = r->program+1;		/* First BRANCH. */
 | 
						|
  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
 | 
						|
--- 230,236 ----
 | 
						|
  	/* Dig out information for optimizations. */
 | 
						|
  	r->regstart = '\0';		/* Worst-case defaults. */
 | 
						|
  	r->reganch = 0;
 | 
						|
! 	r->regmust = 0;
 | 
						|
  	r->regmlen = 0;
 | 
						|
  	scan = r->program+1;		/* First BRANCH. */
 | 
						|
  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
 | 
						|
***************
 | 
						|
*** 229,235 ****
 | 
						|
  					longest = OPERAND(scan);
 | 
						|
  					len = strlen(OPERAND(scan));
 | 
						|
  				}
 | 
						|
! 			r->regmust = longest;
 | 
						|
  			r->regmlen = (int)len;
 | 
						|
  		}
 | 
						|
  	}
 | 
						|
--- 259,265 ----
 | 
						|
  					longest = OPERAND(scan);
 | 
						|
  					len = strlen(OPERAND(scan));
 | 
						|
  				}
 | 
						|
! 			r->regmust = longest - r->program;
 | 
						|
  			r->regmlen = (int)len;
 | 
						|
  		}
 | 
						|
  	}
 | 
						|
***************
 | 
						|
*** 648,655 ****
 | 
						|
  struct exec {
 | 
						|
  	char *reginput;		/* String-input pointer. */
 | 
						|
  	char *regbol;		/* Beginning of input, for ^ check. */
 | 
						|
! 	char **regstartp;	/* Pointer to startp array. */
 | 
						|
! 	char **regendp;		/* Ditto for endp. */
 | 
						|
  };
 | 
						|
  
 | 
						|
  /*
 | 
						|
--- 678,685 ----
 | 
						|
  struct exec {
 | 
						|
  	char *reginput;		/* String-input pointer. */
 | 
						|
  	char *regbol;		/* Beginning of input, for ^ check. */
 | 
						|
! 	const char **regstartp;	/* Pointer to startp array. */
 | 
						|
! 	const char **regendp;		/* Ditto for endp. */
 | 
						|
  };
 | 
						|
  
 | 
						|
  /*
 | 
						|
***************
 | 
						|
*** 690,696 ****
 | 
						|
  	}
 | 
						|
  
 | 
						|
  	/* If there is a "must appear" string, look for it. */
 | 
						|
! 	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
 | 
						|
  		return(0);
 | 
						|
  
 | 
						|
  	/* Mark beginning of line for ^ . */
 | 
						|
--- 720,727 ----
 | 
						|
  	}
 | 
						|
  
 | 
						|
  	/* If there is a "must appear" string, look for it. */
 | 
						|
! 	if ((prog->regmlen > 0) &&
 | 
						|
! 	    strstr(string, &prog->program[prog->regmust]) == NULL)
 | 
						|
  		return(0);
 | 
						|
  
 | 
						|
  	/* Mark beginning of line for ^ . */
 | 
						|
***************
 | 
						|
*** 729,736 ****
 | 
						|
  char *string;
 | 
						|
  {
 | 
						|
  	register int i;
 | 
						|
! 	register char **stp;
 | 
						|
! 	register char **enp;
 | 
						|
  
 | 
						|
  	ep->reginput = string;
 | 
						|
  
 | 
						|
--- 760,767 ----
 | 
						|
  char *string;
 | 
						|
  {
 | 
						|
  	register int i;
 | 
						|
! 	register const char **stp;
 | 
						|
! 	register const char **enp;
 | 
						|
  
 | 
						|
  	ep->reginput = string;
 | 
						|
  
 | 
						|
***************
 | 
						|
*** 1004,1011 ****
 | 
						|
  		printf("start `%c' ", r->regstart);
 | 
						|
  	if (r->reganch)
 | 
						|
  		printf("anchored ");
 | 
						|
! 	if (r->regmust != NULL)
 | 
						|
! 		printf("must have \"%s\"", r->regmust);
 | 
						|
  	printf("\n");
 | 
						|
  }
 | 
						|
  
 | 
						|
--- 1035,1042 ----
 | 
						|
  		printf("start `%c' ", r->regstart);
 | 
						|
  	if (r->reganch)
 | 
						|
  		printf("anchored ");
 | 
						|
! 	if (r->regmlen > 0)
 | 
						|
! 		printf("must have \"%s\"", &r->program[r->regmust]);
 | 
						|
  	printf("\n");
 | 
						|
  }
 | 
						|
  
 | 
						|
*** regexp.h	1996/04/06 19:24:49	1.1
 | 
						|
--- regexp.h	1996/04/07 01:52:19
 | 
						|
***************
 | 
						|
*** 6,16 ****
 | 
						|
   */
 | 
						|
  #define NSUBEXP  10
 | 
						|
  typedef struct regexp {
 | 
						|
! 	char *startp[NSUBEXP];
 | 
						|
! 	char *endp[NSUBEXP];
 | 
						|
  	char regstart;		/* Internal use only. */
 | 
						|
  	char reganch;		/* Internal use only. */
 | 
						|
! 	char *regmust;		/* Internal use only. */
 | 
						|
  	int regmlen;		/* Internal use only. */
 | 
						|
  	char program[1];	/* Unwarranted chumminess with compiler. */
 | 
						|
  } regexp;
 | 
						|
--- 6,16 ----
 | 
						|
   */
 | 
						|
  #define NSUBEXP  10
 | 
						|
  typedef struct regexp {
 | 
						|
! 	const char *startp[NSUBEXP];
 | 
						|
! 	const char *endp[NSUBEXP];
 | 
						|
  	char regstart;		/* Internal use only. */
 | 
						|
  	char reganch;		/* Internal use only. */
 | 
						|
! 	int regmust;		/* Internal use only. */
 | 
						|
  	int regmlen;		/* Internal use only. */
 | 
						|
  	char program[1];	/* Unwarranted chumminess with compiler. */
 | 
						|
  } regexp;
 | 
						|
***************
 | 
						|
*** 18,21 ****
 | 
						|
--- 18,27 ----
 | 
						|
  extern regexp *regcomp(const char *re);
 | 
						|
  extern int regexec(regexp *rp, const char *s);
 | 
						|
  extern void regsub(const regexp *rp, const char *src, char *dst);
 | 
						|
+ extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
 | 
						|
+ extern size_t regsublen(const regexp *rp, const char *src);
 | 
						|
+ 
 | 
						|
  extern void regerror(char *message);
 | 
						|
+ extern size_t regcomp_len(const char *exp);
 | 
						|
+ extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
 | 
						|
+ 
 | 
						|
*** regsub.c	1996/04/06 19:24:49	1.1
 | 
						|
--- regsub.c	1996/04/07 02:10:29
 | 
						|
***************
 | 
						|
*** 11,25 ****
 | 
						|
  /*
 | 
						|
   - regsub - perform substitutions after a regexp match
 | 
						|
   */
 | 
						|
  void
 | 
						|
! regsub(rp, source, dest)
 | 
						|
  const regexp *rp;
 | 
						|
  const char *source;
 | 
						|
  char *dest;
 | 
						|
  {
 | 
						|
  	register regexp * const prog = (regexp *)rp;
 | 
						|
! 	register char *src = (char *)source;
 | 
						|
  	register char *dst = dest;
 | 
						|
  	register char c;
 | 
						|
  	register int no;
 | 
						|
  	register size_t len;
 | 
						|
--- 11,42 ----
 | 
						|
  /*
 | 
						|
   - regsub - perform substitutions after a regexp match
 | 
						|
   */
 | 
						|
+ 
 | 
						|
+ void regsub(rp, source, dest)
 | 
						|
+ const regexp *rp;
 | 
						|
+ const char *source;
 | 
						|
+ char *dest;
 | 
						|
+ {
 | 
						|
+         regnsub(rp, source, dest, BUFSIZ);
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
+ /*
 | 
						|
+  - regnsub - perform bounds-checked substitutions after a regexp match
 | 
						|
+  */
 | 
						|
  void
 | 
						|
! regnsub(rp, source, dest, destlen)
 | 
						|
  const regexp *rp;
 | 
						|
  const char *source;
 | 
						|
  char *dest;
 | 
						|
+ size_t destlen;
 | 
						|
  {
 | 
						|
  	register regexp * const prog = (regexp *)rp;
 | 
						|
! 	register const char *src = (char *)source;
 | 
						|
  	register char *dst = dest;
 | 
						|
+ 	char *dstend = dest + destlen;
 | 
						|
+ 	char *odst;
 | 
						|
  	register char c;
 | 
						|
  	register int no;
 | 
						|
  	register size_t len;
 | 
						|
***************
 | 
						|
*** 45,55 ****
 | 
						|
  			if (c == '\\' && (*src == '\\' || *src == '&'))
 | 
						|
  				c = *src++;
 | 
						|
  			*dst++ = c;
 | 
						|
  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 | 
						|
! 					prog->endp[no] > prog->startp[no]) {
 | 
						|
  			len = prog->endp[no] - prog->startp[no];
 | 
						|
! 			(void) strncpy(dst, prog->startp[no], len);
 | 
						|
  			dst += len;
 | 
						|
  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
 | 
						|
  				regerror("damaged match string");
 | 
						|
  				return;
 | 
						|
--- 62,83 ----
 | 
						|
  			if (c == '\\' && (*src == '\\' || *src == '&'))
 | 
						|
  				c = *src++;
 | 
						|
  			*dst++ = c;
 | 
						|
+ 			if (dst >= dstend) 
 | 
						|
+ 			{
 | 
						|
+ 			    	regerror("output buffer too small");
 | 
						|
+ 				return;
 | 
						|
+ 			}
 | 
						|
  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 | 
						|
! 			   prog->endp[no] > prog->startp[no]) {
 | 
						|
  			len = prog->endp[no] - prog->startp[no];
 | 
						|
! 			odst = dst;
 | 
						|
  			dst += len;
 | 
						|
+ 			if (dst >= dstend) 
 | 
						|
+ 			{
 | 
						|
+ 			    	regerror("output buffer too small");
 | 
						|
+ 				return;
 | 
						|
+ 			}
 | 
						|
+ 			(void) strncpy(odst, prog->startp[no], len);
 | 
						|
  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
 | 
						|
  				regerror("damaged match string");
 | 
						|
  				return;
 | 
						|
***************
 | 
						|
*** 58,60 ****
 | 
						|
--- 86,131 ----
 | 
						|
  	}
 | 
						|
  	*dst++ = '\0';
 | 
						|
  }
 | 
						|
+ 
 | 
						|
+ size_t regsublen(rp, source)
 | 
						|
+ const regexp *rp;
 | 
						|
+ const char *source;
 | 
						|
+ {
 | 
						|
+     register regexp * const prog = (regexp *)rp;
 | 
						|
+     register char *src = (char *)source;
 | 
						|
+     register char c;
 | 
						|
+     register int no;
 | 
						|
+     register int len = 0;
 | 
						|
+ 	
 | 
						|
+     if (prog == NULL || source == NULL) {
 | 
						|
+ 	regerror("NULL parameter to regsublen");
 | 
						|
+ 	return -1;
 | 
						|
+     }
 | 
						|
+     
 | 
						|
+     if ((unsigned char)*(prog->program) != MAGIC) {
 | 
						|
+ 	regerror("damaged regexp");
 | 
						|
+ 	return -1;
 | 
						|
+     }
 | 
						|
+     while ((c = *src++) != '\0') {
 | 
						|
+ 	if (c == '&')
 | 
						|
+ 	    no = 0;
 | 
						|
+ 	else if (c == '\\' && isdigit(*src))
 | 
						|
+ 	    no = *src++ - '0';
 | 
						|
+ 	else
 | 
						|
+ 	    no = -1;
 | 
						|
+ 	if (no < 0) {		/* Ordinary character. */
 | 
						|
+ 	    if (c == '\\' && (*src == '\\' || *src == '&'))
 | 
						|
+ 		src++;
 | 
						|
+ 	    len++;
 | 
						|
+ 	} else {
 | 
						|
+ 	    const char *s = prog->startp[no];
 | 
						|
+ 	    const char *e = prog->endp[no];
 | 
						|
+ 	    if ((s != NULL) && (e != NULL) && (e > s)) {
 | 
						|
+ 		len += e-s;
 | 
						|
+ 	    }
 | 
						|
+ 	}
 | 
						|
+     }
 | 
						|
+     return len+1;
 | 
						|
+ }
 | 
						|
+ 
 | 
						|
+ 
 | 
						|
 | 
						|
Original regexp code from henry:
 | 
						|
[unpacked & deleted -Olin]
 |