Date: Mon, 1 Jul 1996 23:22:47 GMT
From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
To: shivers@lcs.mit.edu, bdc@ai.mit.edu
Subject: scsh patch for precompiled regexps..

I meant to send this out months ago but I was just too hosed with work.

Here's what I have right now:

There are three pieces here:
	diffs to the "core" scsh
	diffs to Henry Spencer's latest regexp library
	a copy of Henry Spencer's latest regexp library..

It appears to work (it passes the same regression tests as the C library..).

Let me know if I didn't include something needed for this to work..

				- Bill

diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
*** scsh-0.4.2/scsh/re.scm	Fri Oct 27 04:58:56 1995
--- scsh-0.4.2-regexp/scsh/re.scm	Sat Apr  6 21:07:41 1996
***************
*** 34,49 ****
  
  ;;; Bogus stub definitions for low-level match routines:
  
! (define regexp? string?)
! (define (make-regexp str) str)
  
! (define (regexp-exec regexp str . maybe-start)
    (let ((start (optional-arg maybe-start 0))
  	(start-vec (make-vector 10))
  	(end-vec (make-vector 10)))
!     (and (%regexp-match regexp str start start-vec end-vec)
! 	 (make-regexp-match str start-vec end-vec))))
! 
  
  ;;; Convert a string into a regex pattern that matches that string exactly --
  ;;; in other words, quote the special chars with backslashes.
--- 34,53 ----
  
  ;;; Bogus stub definitions for low-level match routines:
  
! (define-record iregexp
!   string)
  
! (define regexp? iregexp?)
! 
! (define (make-regexp str) 
!   (make-iregexp (compile-regexp str)))
! 
! (define (regexp-exec r s . maybe-start)
    (let ((start (optional-arg maybe-start 0))
  	(start-vec (make-vector 10))
  	(end-vec (make-vector 10)))
!     (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
! 	 (make-regexp-match s start-vec end-vec))))
  
  ;;; Convert a string into a regex pattern that matches that string exactly --
  ;;; in other words, quote the special chars with backslashes.
***************
*** 58,75 ****
  		  (cons #\\ result)
  		  result))))))
  
! (define-foreign %regexp-match/errno (reg_match (string regexp)
! 					       (string s)
! 					       (integer start)
! 					       (vector-desc start-vec)
! 					       (vector-desc end-vec))
!   static-string ; Error string or #f if all is ok.
!   bool)		; match?
! 
! (define (%regexp-match regexp string start start-vec end-vec)
!   (receive (err match?) (%regexp-match/errno regexp string start
! 					     start-vec end-vec)
!     (if err (error err %regexp-match regexp string start) match?)))
  
  
  ;;; I do this one in C, I'm not sure why:
--- 62,79 ----
  		  (cons #\\ result)
  		  result))))))
  
! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
! ;;;					       (string s)
! ;;;					       (integer start)
! ;;;					       (vector-desc start-vec)
! ;;;					       (vector-desc end-vec))
! ;;;  static-string ; Error string or #f if all is ok.
! ;;;  bool)		; match?
! 
! ;;;(define (%regexp-match regexp string start start-vec end-vec)
! ;;;  (receive (err match?) (%regexp-match/errno regexp string start
! ;;;					     start-vec end-vec)
! ;;;    (if err (error err %regexp-match regexp string start) match?)))
  
  
  ;;; I do this one in C, I'm not sure why:
***************
*** 79,81 ****
--- 83,166 ----
    (filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
    static-string	; error message -- #f if no error.
    integer)	; number of files that pass the filter.
+ 
+ ;;; precompiled regexps.
+ 
+ (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
+   static-string
+   integer)
+ 
+ (define-foreign %regexp-compile (reg_comp_comp (string regexp)
+ 					       (string-desc re-buf))
+   static-string)
+ 
+ (define (%regexp-exec-1 r s start sv ev)
+   (receive (err match?) (%regexp-exec r s start sv ev)
+ 	   (if err (error err s start)
+ 	       match?)))
+ 
+ (define-foreign %regexp-exec (reg_exec (string-desc regexp)
+ 				       (string s)
+ 				       (integer start)
+ 				       (vector-desc start-vec)
+ 				       (vector-desc end-vec))
+   static-string
+   bool)
+ 
+ 
+ (define (compile-regexp e)
+   (receive (err len)
+ 	   (%regexp-compiled-length e)
+ 	   (if err (error err e)
+ 	       (let ((buf (make-string len)))
+ 		 (%regexp-compile e buf)
+ 		 buf))))
+ 
+ 
+ 
+ (define-foreign %regexp-subst (reg_subst (string-desc regexp)
+ 					 (string m)
+ 					 (string s)
+ 					 (integer start)
+ 					 (vector-desc start-vec)
+ 					 (vector-desc end-vec)
+ 					 (string-desc outbuf))
+   static-string
+   integer)
+ 
+ (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
+ 						 (string m)
+ 						 (string s)
+ 						 (integer start)
+ 						 (vector-desc start-vec)
+ 						 (vector-desc end-vec))
+   static-string
+   integer)
+ 
+ 
+ (define (regexp-subst re match replacement)
+   (let ((cr (iregexp:string re))
+ 	(matchstr (regexp-match:string match))
+ 	(startvec (regexp-match:start match))
+ 	(endvec (regexp-match:end match)))
+     (receive (err outlen)
+ 	     (%regexp-subst-len cr
+ 				matchstr
+ 				replacement
+ 				0
+ 				startvec
+ 				endvec)
+ 	     (if err (error err matchstr replacement)
+ 		 (let ((outbuf (make-string outlen)))
+ 		   (receive (err outlen)
+ 			    (%regexp-subst cr
+ 					   matchstr
+ 					   replacement
+ 					   0
+ 					   startvec
+ 					   endvec
+ 					   outbuf)
+ 			    (if err (error err matchstr replacement)
+ 				(substring outbuf 0 outlen))))))))
+ 
+ 		   
\ No newline at end of file
diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
*** scsh-0.4.2/scsh/re1.c	Fri Oct 27 04:58:58 1995
--- scsh-0.4.2-regexp/scsh/re1.c	Sat Apr  6 21:01:15 1996
***************
*** 19,24 ****
--- 19,150 ----
  /* Stash error msg in global. */
  void regerror(char *msg) {regexp_error = msg;}
  
+ /*
+ ** Return NULL normally, error string on error.
+ ** Stash number of bytes needed for compiled regexp into `*len'
+ */
+ 
+ char *reg_comp_len(const char *re, int *len)
+ {
+     int l;
+ 
+     regexp_error = NULL;
+     *len = regcomp_len(re); 
+     return regexp_error;
+ }
+ 
+ /*
+ ** Return NULL normally, error string on error.
+ ** Compile regexp into string described by `cr'.
+ */
+ 
+ char *reg_comp_comp(const char *re, scheme_value cr) 
+ {
+     int len = STRING_LENGTH(cr);
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     regexp_error = NULL;
+     r = regcomp_comp(re, r, len); 
+     return regexp_error;
+ }
+ 
+ /* Return NULL normally, error string on error.
+ ** Stash match info in start_vec and end_vec.
+ ** Returns boolean match/no-match in hit.
+ */
+ 
+ char *reg_exec(scheme_value cr, const char *string, int start,
+ 	       scheme_value start_vec, scheme_value end_vec,  int *hit)
+ {
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
+ 	return "Illegal start vector";
+ 	}
+     
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
+ 	return "Illegal end vector";
+ 	}
+ 
+     regexp_error = 0;
+     *hit = 0;
+     
+     if( regexec(r, string+start) ) {
+ 	int i;
+ 	for(i=0; i<NSUBEXP; i++) {
+ 	    const char *s = r->startp[i];
+ 	    const char *e = r->endp[i];
+ 	    VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
+ 	    VECTOR_REF(end_vec,i)   = e?ENTER_FIXNUM(e - string):SCHFALSE;
+ 	    r->startp[i] = NULL;
+ 	    r->endp[i] = NULL;
+ 	    }
+ 	*hit = 1;
+ 	}
+     return regexp_error;
+ }
+ 
+ char *reg_subst(scheme_value cr, const char *match,
+ 		const char *src, int start,
+ 		scheme_value start_vec, scheme_value end_vec,
+ 		scheme_value outbuf, int *len)
+ {
+     int i;
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
+ 	return "Illegal start vector";
+ 	}
+     
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
+ 	return "Illegal end vector";
+ 	}
+ 
+     for (i=0; i<NSUBEXP; i++) 
+     {
+ 	scheme_value se = VECTOR_REF(start_vec, i);
+ 	scheme_value ee = VECTOR_REF(end_vec, i);
+ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
+ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
+     }
+     
+     regexp_error = NULL;
+     regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
+     *len = strlen(&STRING_REF(outbuf, 0));
+     return regexp_error;
+ }
+ 
+ char *reg_subst_len(scheme_value cr, const char *match,
+ 		    const char *src, int start,
+ 		    scheme_value start_vec, scheme_value end_vec,
+ 		    int *len)
+ {
+     int i;
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
+ 	return "Illegal start vector";
+ 	}
+     
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
+ 	return "Illegal end vector";
+ 	}
+ 
+     for (i=0; i<NSUBEXP; i++) 
+     {
+ 	scheme_value se = VECTOR_REF(start_vec, i);
+ 	scheme_value ee = VECTOR_REF(end_vec, i);
+ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
+ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
+     }
+     
+     regexp_error = NULL;
+     *len = regsublen (r, src);
+     return regexp_error;
+ }
+ 
+ 
+ #if 0
  /* Return NULL normally, error string on error.
  ** Stash match info in start_vec and end_vec.
  ** Returns boolean match/no-match in hit.
***************
*** 56,61 ****
--- 182,188 ----
      Free(prog);
      return regexp_error;
      }
+ #endif
  
  
  char *filter_stringvec(const char *re, char const **stringvec,  int *nummatch)
diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
*** scsh-0.4.2/scsh/re1.h	Sun Oct 22 08:34:34 1995
--- scsh-0.4.2-regexp/scsh/re1.h	Sat Apr  6 17:54:09 1996
***************
*** 1,6 ****
--- 1,21 ----
+ #if 0
  char *reg_match(const char *re, const char *string, int start,
  		scheme_value start_vec, scheme_value end_vec,
  		int *hit);
+ #endif
  
  char *filter_stringvec(const char *re, char const **stringvec,
  		       int *nummatch);
+ 
+ char *reg_comp_len(const char *re, int *len);
+ char *reg_comp_comp(const char *re, scheme_value cr);
+ 
+ char *reg_exec(scheme_value cr, const char *string, int start,
+ 	       scheme_value start_vec, scheme_value end_vec,  int *hit);
+ 
+ char *reg_subst(scheme_value cr, const char *match,
+ 		const char *src, int start,
+ 		scheme_value start_vec, scheme_value end_vec,
+ 		scheme_value outbuf, int *len);
+ 
+ 

Only in scsh-0.4.2-regexp/scsh: re2.scm
diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
*** scsh-0.4.2/scsh/scsh-interfaces.scm	Tue Oct 31 19:19:30 1995
--- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm	Sat Apr  6 18:48:12 1996
***************
*** 413,418 ****
--- 413,419 ----
  	  make-regexp
  	  regexp?
  	  regexp-exec
+ 	  regexp-subst
  	  regexp-quote))
  
  

regexp library changes:

*** Makefile	1996/04/06 19:24:49	1.1
--- Makefile	1996/04/06 20:46:26
***************
*** 5,11 ****
  # Things you might want to put in TEST:
  # -DDEBUG		debugging hooks
  # -I.			regexp.h from current directory, not /usr/include
! TEST=-I.
  
  # Things you might want to put in PROF:
  # -pg			profiler
--- 5,11 ----
  # Things you might want to put in TEST:
  # -DDEBUG		debugging hooks
  # -I.			regexp.h from current directory, not /usr/include
! TEST=-I. -DDEBUG
  
  # Things you might want to put in PROF:
  # -pg			profiler
*** regexp.c	1996/04/06 19:24:49	1.1
--- regexp.c	1996/04/06 22:34:55
***************
*** 105,110 ****
--- 105,111 ----
   * Utility definitions.
   */
  #define	FAIL(m)		{ regerror(m); return(NULL); }
+ #define	FAILN(m)	{ regerror(m); return(-1); }
  #define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
  #define	META		"^$.[()|?+*\\"
  
***************
*** 162,173 ****
  const char *exp;
  {
  	register regexp *r;
! 	register char *scan;
  	int flags;
  	struct comp co;
  
  	if (exp == NULL)
! 		FAIL("NULL argument to regcomp");
  
  	/* First pass: determine size, legality. */
  	co.regparse = (char *)exp;
--- 163,193 ----
  const char *exp;
  {
  	register regexp *r;
! 	size_t len;
! 
! 	len = regcomp_len(exp);
! 	if (len <= 0)
! 	        return NULL;
! 
! 	/* Allocate space. */
! 	r = (regexp *)malloc(len);
! 
! 	if (r == NULL)
! 		FAIL("out of space");
! 	return regcomp_comp(exp, r, len);
! }
! 
! 
! size_t
! regcomp_len(exp)
! const char *exp;
! {
  	int flags;
+ 	register regexp *r;
  	struct comp co;
  
  	if (exp == NULL)
! 		FAILN("NULL argument to regcomp");
  
  	/* First pass: determine size, legality. */
  	co.regparse = (char *)exp;
***************
*** 178,198 ****
  	co.regcode = co.regdummy;
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
! 		return(NULL);
  
  	/* Small enough for pointer-storage convention? */
  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
! 		FAIL("regexp too big");
  
! 	/* Allocate space. */
! 	r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
! 	if (r == NULL)
! 		FAIL("out of space");
  
  	/* Second pass: emit code. */
  	co.regparse = (char *)exp;
  	co.regnpar = 1;
  	co.regcode = r->program;
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
  		return(NULL);
--- 198,228 ----
  	co.regcode = co.regdummy;
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
! 		return -1;
  
  	/* Small enough for pointer-storage convention? */
  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
! 		FAILN("regexp too big");
  
! 	return (sizeof(regexp) + (size_t)co.regsize);
! }
! 
! 
! regexp *
! regcomp_comp(exp, r, len)
! const char *exp;
! register regexp *r;
! size_t len;
! {
! 	register char *scan;
! 	int flags;
! 	struct comp co;
  
  	/* Second pass: emit code. */
  	co.regparse = (char *)exp;
  	co.regnpar = 1;
  	co.regcode = r->program;
+ 	co.regsize = len - sizeof(regexp);
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
  		return(NULL);
***************
*** 200,206 ****
  	/* Dig out information for optimizations. */
  	r->regstart = '\0';		/* Worst-case defaults. */
  	r->reganch = 0;
! 	r->regmust = NULL;
  	r->regmlen = 0;
  	scan = r->program+1;		/* First BRANCH. */
  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
--- 230,236 ----
  	/* Dig out information for optimizations. */
  	r->regstart = '\0';		/* Worst-case defaults. */
  	r->reganch = 0;
! 	r->regmust = 0;
  	r->regmlen = 0;
  	scan = r->program+1;		/* First BRANCH. */
  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
***************
*** 229,235 ****
  					longest = OPERAND(scan);
  					len = strlen(OPERAND(scan));
  				}
! 			r->regmust = longest;
  			r->regmlen = (int)len;
  		}
  	}
--- 259,265 ----
  					longest = OPERAND(scan);
  					len = strlen(OPERAND(scan));
  				}
! 			r->regmust = longest - r->program;
  			r->regmlen = (int)len;
  		}
  	}
***************
*** 648,655 ****
  struct exec {
  	char *reginput;		/* String-input pointer. */
  	char *regbol;		/* Beginning of input, for ^ check. */
! 	char **regstartp;	/* Pointer to startp array. */
! 	char **regendp;		/* Ditto for endp. */
  };
  
  /*
--- 678,685 ----
  struct exec {
  	char *reginput;		/* String-input pointer. */
  	char *regbol;		/* Beginning of input, for ^ check. */
! 	const char **regstartp;	/* Pointer to startp array. */
! 	const char **regendp;		/* Ditto for endp. */
  };
  
  /*
***************
*** 690,696 ****
  	}
  
  	/* If there is a "must appear" string, look for it. */
! 	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
  		return(0);
  
  	/* Mark beginning of line for ^ . */
--- 720,727 ----
  	}
  
  	/* If there is a "must appear" string, look for it. */
! 	if ((prog->regmlen > 0) &&
! 	    strstr(string, &prog->program[prog->regmust]) == NULL)
  		return(0);
  
  	/* Mark beginning of line for ^ . */
***************
*** 729,736 ****
  char *string;
  {
  	register int i;
! 	register char **stp;
! 	register char **enp;
  
  	ep->reginput = string;
  
--- 760,767 ----
  char *string;
  {
  	register int i;
! 	register const char **stp;
! 	register const char **enp;
  
  	ep->reginput = string;
  
***************
*** 1004,1011 ****
  		printf("start `%c' ", r->regstart);
  	if (r->reganch)
  		printf("anchored ");
! 	if (r->regmust != NULL)
! 		printf("must have \"%s\"", r->regmust);
  	printf("\n");
  }
  
--- 1035,1042 ----
  		printf("start `%c' ", r->regstart);
  	if (r->reganch)
  		printf("anchored ");
! 	if (r->regmlen > 0)
! 		printf("must have \"%s\"", &r->program[r->regmust]);
  	printf("\n");
  }
  
*** regexp.h	1996/04/06 19:24:49	1.1
--- regexp.h	1996/04/07 01:52:19
***************
*** 6,16 ****
   */
  #define NSUBEXP  10
  typedef struct regexp {
! 	char *startp[NSUBEXP];
! 	char *endp[NSUBEXP];
  	char regstart;		/* Internal use only. */
  	char reganch;		/* Internal use only. */
! 	char *regmust;		/* Internal use only. */
  	int regmlen;		/* Internal use only. */
  	char program[1];	/* Unwarranted chumminess with compiler. */
  } regexp;
--- 6,16 ----
   */
  #define NSUBEXP  10
  typedef struct regexp {
! 	const char *startp[NSUBEXP];
! 	const char *endp[NSUBEXP];
  	char regstart;		/* Internal use only. */
  	char reganch;		/* Internal use only. */
! 	int regmust;		/* Internal use only. */
  	int regmlen;		/* Internal use only. */
  	char program[1];	/* Unwarranted chumminess with compiler. */
  } regexp;
***************
*** 18,21 ****
--- 18,27 ----
  extern regexp *regcomp(const char *re);
  extern int regexec(regexp *rp, const char *s);
  extern void regsub(const regexp *rp, const char *src, char *dst);
+ extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
+ extern size_t regsublen(const regexp *rp, const char *src);
+ 
  extern void regerror(char *message);
+ extern size_t regcomp_len(const char *exp);
+ extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
+ 
*** regsub.c	1996/04/06 19:24:49	1.1
--- regsub.c	1996/04/07 02:10:29
***************
*** 11,25 ****
  /*
   - regsub - perform substitutions after a regexp match
   */
  void
! regsub(rp, source, dest)
  const regexp *rp;
  const char *source;
  char *dest;
  {
  	register regexp * const prog = (regexp *)rp;
! 	register char *src = (char *)source;
  	register char *dst = dest;
  	register char c;
  	register int no;
  	register size_t len;
--- 11,42 ----
  /*
   - regsub - perform substitutions after a regexp match
   */
+ 
+ void regsub(rp, source, dest)
+ const regexp *rp;
+ const char *source;
+ char *dest;
+ {
+         regnsub(rp, source, dest, BUFSIZ);
+ }
+ 
+ 
+ 
+ /*
+  - regnsub - perform bounds-checked substitutions after a regexp match
+  */
  void
! regnsub(rp, source, dest, destlen)
  const regexp *rp;
  const char *source;
  char *dest;
+ size_t destlen;
  {
  	register regexp * const prog = (regexp *)rp;
! 	register const char *src = (char *)source;
  	register char *dst = dest;
+ 	char *dstend = dest + destlen;
+ 	char *odst;
  	register char c;
  	register int no;
  	register size_t len;
***************
*** 45,55 ****
  			if (c == '\\' && (*src == '\\' || *src == '&'))
  				c = *src++;
  			*dst++ = c;
  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
! 					prog->endp[no] > prog->startp[no]) {
  			len = prog->endp[no] - prog->startp[no];
! 			(void) strncpy(dst, prog->startp[no], len);
  			dst += len;
  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
  				regerror("damaged match string");
  				return;
--- 62,83 ----
  			if (c == '\\' && (*src == '\\' || *src == '&'))
  				c = *src++;
  			*dst++ = c;
+ 			if (dst >= dstend) 
+ 			{
+ 			    	regerror("output buffer too small");
+ 				return;
+ 			}
  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
! 			   prog->endp[no] > prog->startp[no]) {
  			len = prog->endp[no] - prog->startp[no];
! 			odst = dst;
  			dst += len;
+ 			if (dst >= dstend) 
+ 			{
+ 			    	regerror("output buffer too small");
+ 				return;
+ 			}
+ 			(void) strncpy(odst, prog->startp[no], len);
  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
  				regerror("damaged match string");
  				return;
***************
*** 58,60 ****
--- 86,131 ----
  	}
  	*dst++ = '\0';
  }
+ 
+ size_t regsublen(rp, source)
+ const regexp *rp;
+ const char *source;
+ {
+     register regexp * const prog = (regexp *)rp;
+     register char *src = (char *)source;
+     register char c;
+     register int no;
+     register int len = 0;
+ 	
+     if (prog == NULL || source == NULL) {
+ 	regerror("NULL parameter to regsublen");
+ 	return -1;
+     }
+     
+     if ((unsigned char)*(prog->program) != MAGIC) {
+ 	regerror("damaged regexp");
+ 	return -1;
+     }
+     while ((c = *src++) != '\0') {
+ 	if (c == '&')
+ 	    no = 0;
+ 	else if (c == '\\' && isdigit(*src))
+ 	    no = *src++ - '0';
+ 	else
+ 	    no = -1;
+ 	if (no < 0) {		/* Ordinary character. */
+ 	    if (c == '\\' && (*src == '\\' || *src == '&'))
+ 		src++;
+ 	    len++;
+ 	} else {
+ 	    const char *s = prog->startp[no];
+ 	    const char *e = prog->endp[no];
+ 	    if ((s != NULL) && (e != NULL) && (e > s)) {
+ 		len += e-s;
+ 	    }
+ 	}
+     }
+     return len+1;
+ }
+ 
+ 

Original regexp code from henry:
[unpacked & deleted -Olin]