updated regexp

1996-09-24 01:29:51 +00:00 · 1996-09-24 01:29:51 +00:00 · a61145fa6a
parent 31e3c2e522
commit a61145fa6a
12 changed files with 2732 additions and 1483 deletions
--- a/scsh/regexp/COPYRIGHT
+++ b/scsh/regexp/COPYRIGHT
@ -0,0 +1,19 @@
+Copyright (c) 1986, 1993, 1995 by University of Toronto.
+Written by Henry Spencer.  Not derived from licensed software.
+
+Permission is granted to anyone to use this software for any
+purpose on any computer system, and to redistribute it in any way,
+subject to the following restrictions:
+
+1. The author is not responsible for the consequences of use of
+	this software, no matter how awful, even if they arise
+	from defects in it.
+
+2. The origin of this software must not be misrepresented, either
+	by explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not
+	be misrepresented (by explicit claim or omission) as being
+	the original software.
+
+4. This notice must not be removed or altered.
--- a/scsh/regexp/Makefile.in
+++ b/scsh/regexp/Makefile.in
@ -5,15 +5,9 @@ CFLAGS1 = @CFLAGS1@

 RANLIB  = @RANLIB@

-# Things you might want to put in ENV and LENV:
-# -Dvoid=int		compilers that don't do void
-# -DCHARBITS=0377	compilers that don't do unsigned char
-# -DSTATIC=extern	compilers that don't like "static foo();" as forward decl
-# -DSTRCSPN		library does not have strcspn()
-# -Dstrchr=index	library does not have strchr()
+# Things you might want to put in ENV:
 # -DERRAVAIL		have utzoo-compatible error() function and friends
-# ENV=-DSTRCSPN
-# LENV=-DSTRCSPN
+ENV=

 # Things you might want to put in TEST:
 # -DDEBUG		debugging hooks
@ -21,49 +15,25 @@ RANLIB  = @RANLIB@
 TEST=-I. -I$(srcdir)

 # Things you might want to put in PROF:
-# -Dstatic='/* */'	make everything global so profiler can see it.
-# -p			profiler
-PROF=
-
-INCDEST=/contrib/share/include
-LIBDEST=/contrib/system/lib
-MANDEST=/contrib/share/man/man3
-
-# CC = cc
-# CFLAGS1 = -O -Q
-
-LINTFLAGS=$(LENV) $(TEST) -ha
-# LDFLAGS=-i
+# -pg			profiler
+# PROF=

 CFLAGS=$(CFLAGS1) $(ENV) $(TEST) $(PROF)
+LDFLAGS=$(PROF)

-OBJ=regexp.o regsub.o
-LIBOBJ= $(OBJ) regerror.o
-LSRC=regexp.c regsub.c regerror.c
-DTR=README dMakefile regexp.3 regexp.h regexp.c regsub.c regerror.c \
-	regmagic.h try.c timer.c tests
-DEST = ..
+LIB=libregexp.a
+OBJ=regexp.o regsub.o regerror.o
+TMP=dtr.tmp

-# we don't use the library anymore -bri
-all: $(OBJ) # libregexp.a  try
+default:	r

-libregexp.a: $(LIBOBJ)
-	ar r libregexp.a $(LIBOBJ)
-	$(RANLIB) libregexp.a
-
-install:
-	install -c libregexp.a $(LIBDEST)/libregexp.a
-	$(RANLIB) $(LIBDEST)/libregexp.a
-	install -c regexp.h $(INCDEST)/regexp.h
-	install -c regexp.3 $(MANDEST)/regexp.3
-
-try:	try.o $(OBJ)
-	$(CC) $(LDFLAGS) try.o $(OBJ) -o try
+try:	try.o $(LIB)
+	$(CC) $(LDFLAGS) try.o $(LIB) -o try

 # Making timer will probably require putting stuff in $(PROF) and then
 # recompiling everything; the following is just the final stage.
-timer:	timer.o $(OBJ)
-	$(CC) $(LDFLAGS) $(PROF) timer.o $(OBJ) -o timer
+timer:	timer.o $(LIB)
+	$(CC) $(LDFLAGS) timer.o $(LIB) -o timer

 timer.o:	timer.c timer.t.h

@ -71,26 +41,78 @@ timer.t.h:	tests
 	sed 's/	/","/g;s/\\/&&/g;s/.*/{"&"},/' tests >timer.t.h

 # Regression test.
-r:	./try tests
-	@echo 'No news is good news...'
-	./try <tests
+r:	try tests
+	./try <tests		# no news is good news...

-lint:	timer.t.h
-	@echo 'Complaints about multiply-declared regerror() are legit.'
-	lint $(LINTFLAGS) $(LSRC) try.c
-	lint $(LINTFLAGS) $(LSRC) timer.c
+$(LIB):	$(OBJ)
+	ar cr $(LIB) $(OBJ)
+	$(RANLIB) libregexp.a

 regexp.o:	regexp.c regexp.h regmagic.h
 regsub.o:	regsub.c regexp.h regmagic.h

 clean:
-	rm -f *.o *.out *~ *.a core mon.out timer.t.h dMakefile dtr try timer
+	rm -f *.o core mon.out gmon.out timer.t.h dtr copy try timer r.*
+	rm -f residue rs.* re.1 rm.h re.h ch.soe ch.ps j badcom fig[012]
+	rm -f ch.sml fig[12].ps $(LIB)
+	rm -rf $(TMP)

-dtr:	r makedtr $(DTR)
-	makedtr $(DTR) >dtr
+# the rest of this is unlikely to be of use to you

-dMakefile:	Makefile
-	sed '/^L*ENV=/s/ *-DERRAVAIL//' Makefile >dMakefile
+BITS = r.1 rs.1 re.1 rm.h re.h
+OPT=-p -ms

-mv:	$(OBJ) regerror.o
-	mv $(OBJ) regerror.o $(DEST)
+ch.soe:	ch $(BITS)
+	soelim ch >$@
+
+ch.sml:	ch $(BITS) smlize splitfigs
+	splitfigs ch | soelim | smlize >$@
+
+fig0 fig1 fig2:	ch splitfigs
+	splitfigs ch >/dev/null
+
+f:	fig0 fig1 fig2 figs
+	groff -Tps -s $(OPT) figs | lpr
+
+fig1.ps:	fig0 fig1
+	( cat fig0 ; echo ".LP" ; cat fig1 ) | groff -Tps $(OPT) >$@
+
+fig2.ps:	fig0 fig2
+	( cat fig0 ; echo ".LP" ; cat fig2 ) | groff -Tps $(OPT) >$@
+
+fp:	fig1.ps fig2.ps
+
+r.1:	regexp.c splitter
+	splitter regexp.c
+
+rs.1:	regsub.c splitter
+	splitter regsub.c
+
+re.1:	regerror.c splitter
+	splitter regerror.c
+
+rm.h:	regmagic.h splitter
+	splitter regmagic.h
+
+re.h:	regexp.h splitter
+	splitter regexp.h
+
+PLAIN=COPYRIGHT README Makefile regexp.3 try.c timer.c tests
+FIX=regexp.h regexp.c regsub.c regerror.c regmagic.h
+DTR=$(PLAIN) $(FIX)
+
+dtr:	r $(DTR)
+	rm -rf $(TMP)
+	mkdir $(TMP)
+	cp $(PLAIN) $(TMP)
+	for f in $(FIX) ; do normalize $$f >$(TMP)/$$f ; done
+	( cd $(TMP) ; makedtr $(DTR) ) >$@
+	rm -rf $(TMP)
+
+ch.ps:	ch Makefile $(BITS)
+	groff -Tps $(OPT) ch >$@
+
+copy:	ch.soe ch.sml fp
+	makedtr REMARKS ch.sml fig*.ps ch.soe >$@
+
+go:	copy dtr
--- a/scsh/regexp/README
+++ b/scsh/regexp/README
@ -1,55 +1,37 @@
-This is a nearly-public-domain reimplementation of the V8 regexp(3) package.
+This is a revision of my well-known regular-expression package, regexp(3).
 It gives C programs the ability to use egrep-style regular expressions, and
 does it in a much cleaner fashion than the analogous routines in SysV.
+It is not, alas, fully POSIX.2-compliant; that is hard.  (I'm working on
+a full reimplementation that will do that.)

-	Copyright (c) 1986 by University of Toronto.
-	Written by Henry Spencer.  Not derived from licensed software.
+This version is the one which is examined and explained in one chapter of
+"Software Solutions in C" (Dale Schumacher, ed.; AP Professional 1994;
+ISBN 0-12-632360-7), plus a couple of insignificant updates, plus one
+significant bug fix (done 10 Nov 1995).

-	Permission is granted to anyone to use this software for any
-	purpose on any computer system, and to redistribute it freely,
-	subject to the following restrictions:
- 
-	1. The author is not responsible for the consequences of use of
-		this software, no matter how awful, even if they arise
-		from defects in it.
- 
-	2. The origin of this software must not be misrepresented, either
-		by explicit claim or by omission.
- 
-	3. Altered versions must be plainly marked as such, and must not
-		be misrepresented as being the original software.
- 
-Barring a couple of small items in the BUGS list, this implementation is
-believed 100% compatible with V8.  It should even be binary-compatible,
-sort of, since the only fields in a "struct regexp" that other people have
-any business touching are declared in exactly the same way at the same
-location in the struct (the beginning).
- 
-This implementation is *NOT* AT&T/Bell code, and is not derived from licensed
+Although this package was inspired by the Bell V8 regexp(3), this
+implementation is *NOT* AT&T/Bell code, and is not derived from licensed
 software.  Even though U of T is a V8 licensee.  This software is based on
 a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed
 here is a complete rewrite and hence is not covered by AT&T copyright).
-The software was nearly complete at the time of arrival of our V8 tape.
-I haven't even looked at V8 yet, although a friend elsewhere at U of T has
-been kind enough to run a few test programs using the V8 regexp(3) to resolve
-a few fine points.  I admit to some familiarity with regular-expression
-implementations of the past, but the only one that this code traces any
-ancestry to is the one published in Kernighan & Plauger (from which this
-one draws ideas but not code).
+I admit to some familiarity with regular-expression implementations of
+the past, but the only one that this code traces any ancestry to is the
+one published in Kernighan & Plauger's "Software Tools" (from which
+this one draws ideas but not code).

-Simplistically:  put this stuff into a source directory, copy regexp.h into
-/usr/include, inspect Makefile for compilation options that need changing
-to suit your local environment, and then do "make r".  This compiles the
-regexp(3) functions, compiles a test program, and runs a large set of
-regression tests.  If there are no complaints, then put regexp.o, regsub.o,
-and regerror.o into your C library, and regexp.3 into your manual-pages
-directory.
- 
-Note that if you don't put regexp.h into /usr/include *before* compiling,
-you'll have to add "-I." to CFLAGS before compiling.
+Simplistically:  put this stuff into a source directory, inspect Makefile
+for compilation options that need changing to suit your local environment,
+and then do "make".  This compiles the regexp(3) functions, builds a
+library containing them, compiles a test program, and runs a large set of
+regression tests.  If there are no complaints, then put regexp.h into
+/usr/include, add regexp.o, regsub.o, and regerror.o into your C library
+(or put libre.a into /usr/lib), and install regexp.3 (perhaps with slight
+modifications) in your manual-pages directory. 

 The files are:

+COPYRIGHT	copyright notice
+README		this text
 Makefile	instructions to make everything
 regexp.3	manual page
 regexp.h	header file, for /usr/include
@ -64,20 +46,11 @@ tests		test list for try and timer
 This implementation uses nondeterministic automata rather than the
 deterministic ones found in some other implementations, which makes it
 simpler, smaller, and faster at compiling regular expressions, but slower
-at executing them.  In theory, anyway.  This implementation does employ
-some special-case optimizations to make the simpler cases (which do make
-up the bulk of regular expressions actually used) run quickly.  In general,
-if you want blazing speed you're in the wrong place.  Replacing the insides
-of egrep with this stuff is probably a mistake; if you want your own egrep
-you're going to have to do a lot more work.  But if you want to use regular
-expressions a little bit in something else, you're in luck.  Note that many
-existing text editors use nondeterministic regular-expression implementations,
-so you're in good company.
+at executing them.  Many users have found the speed perfectly adequate,
+although replacing the insides of egrep with this code would be a mistake.

-This stuff should be pretty portable, given appropriate option settings.
-If your chars have less than 8 bits, you're going to have to change the
-internal representation of the automaton, although knowledge of the details
-of this is fairly localized.  There are no "reserved" char values except for
+This stuff should be pretty portable, given an ANSI C compiler and
+appropriate option settings.  There are no "reserved" char values except for
 NUL, and no special significance is attached to the top bit of chars.
 The string(3) functions are used a fair bit, on the grounds that they are
 probably faster than coding the operations in line.  Some attempts at code
--- a/scsh/regexp/patch-msg
+++ b/scsh/regexp/patch-msg
@ -0,0 +1,803 @@
+Date: Mon, 1 Jul 1996 23:22:47 GMT
+From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
+To: shivers@lcs.mit.edu, bdc@ai.mit.edu
+Subject: scsh patch for precompiled regexps..
+
+I meant to send this out months ago but I was just too hosed with work.
+
+Here's what I have right now:
+
+There are three pieces here:
+	diffs to the "core" scsh
+	diffs to Henry Spencer's latest regexp library
+	a copy of Henry Spencer's latest regexp library..
+
+It appears to work (it passes the same regression tests as the C library..).
+
+Let me know if I didn't include something needed for this to work..
+
+				- Bill
+
+diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
+*** scsh-0.4.2/scsh/re.scm	Fri Oct 27 04:58:56 1995
+--- scsh-0.4.2-regexp/scsh/re.scm	Sat Apr  6 21:07:41 1996
+***************
+*** 34,49 ****
+  
+  ;;; Bogus stub definitions for low-level match routines:
+  
+! (define regexp? string?)
+! (define (make-regexp str) str)
+  
+! (define (regexp-exec regexp str . maybe-start)
+    (let ((start (optional-arg maybe-start 0))
+  	(start-vec (make-vector 10))
+  	(end-vec (make-vector 10)))
+!     (and (%regexp-match regexp str start start-vec end-vec)
+! 	 (make-regexp-match str start-vec end-vec))))
+! 
+  
+  ;;; Convert a string into a regex pattern that matches that string exactly --
+  ;;; in other words, quote the special chars with backslashes.
+--- 34,53 ----
+  
+  ;;; Bogus stub definitions for low-level match routines:
+  
+! (define-record iregexp
+!   string)
+  
+! (define regexp? iregexp?)
+! 
+! (define (make-regexp str) 
+!   (make-iregexp (compile-regexp str)))
+! 
+! (define (regexp-exec r s . maybe-start)
+    (let ((start (optional-arg maybe-start 0))
+  	(start-vec (make-vector 10))
+  	(end-vec (make-vector 10)))
+!     (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
+! 	 (make-regexp-match s start-vec end-vec))))
+  
+  ;;; Convert a string into a regex pattern that matches that string exactly --
+  ;;; in other words, quote the special chars with backslashes.
+***************
+*** 58,75 ****
+  		  (cons #\\ result)
+  		  result))))))
+  
+! (define-foreign %regexp-match/errno (reg_match (string regexp)
+! 					       (string s)
+! 					       (integer start)
+! 					       (vector-desc start-vec)
+! 					       (vector-desc end-vec))
+!   static-string ; Error string or #f if all is ok.
+!   bool)		; match?
+! 
+! (define (%regexp-match regexp string start start-vec end-vec)
+!   (receive (err match?) (%regexp-match/errno regexp string start
+! 					     start-vec end-vec)
+!     (if err (error err %regexp-match regexp string start) match?)))
+  
+  
+  ;;; I do this one in C, I'm not sure why:
+--- 62,79 ----
+  		  (cons #\\ result)
+  		  result))))))
+  
+! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
+! ;;;					       (string s)
+! ;;;					       (integer start)
+! ;;;					       (vector-desc start-vec)
+! ;;;					       (vector-desc end-vec))
+! ;;;  static-string ; Error string or #f if all is ok.
+! ;;;  bool)		; match?
+! 
+! ;;;(define (%regexp-match regexp string start start-vec end-vec)
+! ;;;  (receive (err match?) (%regexp-match/errno regexp string start
+! ;;;					     start-vec end-vec)
+! ;;;    (if err (error err %regexp-match regexp string start) match?)))
+  
+  
+  ;;; I do this one in C, I'm not sure why:
+***************
+*** 79,81 ****
+--- 83,166 ----
+    (filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
+    static-string	; error message -- #f if no error.
+    integer)	; number of files that pass the filter.
+ 
+ ;;; precompiled regexps.
+ 
+ (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
+   static-string
+   integer)
+ 
+ (define-foreign %regexp-compile (reg_comp_comp (string regexp)
+ 					       (string-desc re-buf))
+   static-string)
+ 
+ (define (%regexp-exec-1 r s start sv ev)
+   (receive (err match?) (%regexp-exec r s start sv ev)
+ 	   (if err (error err s start)
+ 	       match?)))
+ 
+ (define-foreign %regexp-exec (reg_exec (string-desc regexp)
+ 				       (string s)
+ 				       (integer start)
+ 				       (vector-desc start-vec)
+ 				       (vector-desc end-vec))
+   static-string
+   bool)
+ 
+ 
+ (define (compile-regexp e)
+   (receive (err len)
+ 	   (%regexp-compiled-length e)
+ 	   (if err (error err e)
+ 	       (let ((buf (make-string len)))
+ 		 (%regexp-compile e buf)
+ 		 buf))))
+ 
+ 
+ 
+ (define-foreign %regexp-subst (reg_subst (string-desc regexp)
+ 					 (string m)
+ 					 (string s)
+ 					 (integer start)
+ 					 (vector-desc start-vec)
+ 					 (vector-desc end-vec)
+ 					 (string-desc outbuf))
+   static-string
+   integer)
+ 
+ (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
+ 						 (string m)
+ 						 (string s)
+ 						 (integer start)
+ 						 (vector-desc start-vec)
+ 						 (vector-desc end-vec))
+   static-string
+   integer)
+ 
+ 
+ (define (regexp-subst re match replacement)
+   (let ((cr (iregexp:string re))
+ 	(matchstr (regexp-match:string match))
+ 	(startvec (regexp-match:start match))
+ 	(endvec (regexp-match:end match)))
+     (receive (err outlen)
+ 	     (%regexp-subst-len cr
+ 				matchstr
+ 				replacement
+ 				0
+ 				startvec
+ 				endvec)
+ 	     (if err (error err matchstr replacement)
+ 		 (let ((outbuf (make-string outlen)))
+ 		   (receive (err outlen)
+ 			    (%regexp-subst cr
+ 					   matchstr
+ 					   replacement
+ 					   0
+ 					   startvec
+ 					   endvec
+ 					   outbuf)
+ 			    (if err (error err matchstr replacement)
+ 				(substring outbuf 0 outlen))))))))
+ 
+ 		   
+\ No newline at end of file
+diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
+*** scsh-0.4.2/scsh/re1.c	Fri Oct 27 04:58:58 1995
+--- scsh-0.4.2-regexp/scsh/re1.c	Sat Apr  6 21:01:15 1996
+***************
+*** 19,24 ****
+--- 19,150 ----
+  /* Stash error msg in global. */
+  void regerror(char *msg) {regexp_error = msg;}
+  
+ /*
+ ** Return NULL normally, error string on error.
+ ** Stash number of bytes needed for compiled regexp into `*len'
+ */
+ 
+ char *reg_comp_len(const char *re, int *len)
+ {
+     int l;
+ 
+     regexp_error = NULL;
+     *len = regcomp_len(re); 
+     return regexp_error;
+ }
+ 
+ /*
+ ** Return NULL normally, error string on error.
+ ** Compile regexp into string described by `cr'.
+ */
+ 
+ char *reg_comp_comp(const char *re, scheme_value cr) 
+ {
+     int len = STRING_LENGTH(cr);
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     regexp_error = NULL;
+     r = regcomp_comp(re, r, len); 
+     return regexp_error;
+ }
+ 
+ /* Return NULL normally, error string on error.
+ ** Stash match info in start_vec and end_vec.
+ ** Returns boolean match/no-match in hit.
+ */
+ 
+ char *reg_exec(scheme_value cr, const char *string, int start,
+ 	       scheme_value start_vec, scheme_value end_vec,  int *hit)
+ {
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
+ 	return "Illegal start vector";
+ 	}
+     
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
+ 	return "Illegal end vector";
+ 	}
+ 
+     regexp_error = 0;
+     *hit = 0;
+     
+     if( regexec(r, string+start) ) {
+ 	int i;
+ 	for(i=0; i<NSUBEXP; i++) {
+ 	    const char *s = r->startp[i];
+ 	    const char *e = r->endp[i];
+ 	    VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
+ 	    VECTOR_REF(end_vec,i)   = e?ENTER_FIXNUM(e - string):SCHFALSE;
+ 	    r->startp[i] = NULL;
+ 	    r->endp[i] = NULL;
+ 	    }
+ 	*hit = 1;
+ 	}
+     return regexp_error;
+ }
+ 
+ char *reg_subst(scheme_value cr, const char *match,
+ 		const char *src, int start,
+ 		scheme_value start_vec, scheme_value end_vec,
+ 		scheme_value outbuf, int *len)
+ {
+     int i;
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
+ 	return "Illegal start vector";
+ 	}
+     
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
+ 	return "Illegal end vector";
+ 	}
+ 
+     for (i=0; i<NSUBEXP; i++) 
+     {
+ 	scheme_value se = VECTOR_REF(start_vec, i);
+ 	scheme_value ee = VECTOR_REF(end_vec, i);
+ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
+ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
+     }
+     
+     regexp_error = NULL;
+     regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
+     *len = strlen(&STRING_REF(outbuf, 0));
+     return regexp_error;
+ }
+ 
+ char *reg_subst_len(scheme_value cr, const char *match,
+ 		    const char *src, int start,
+ 		    scheme_value start_vec, scheme_value end_vec,
+ 		    int *len)
+ {
+     int i;
+     regexp *r = (regexp *)&STRING_REF(cr, 0);
+ 
+     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
+ 	return "Illegal start vector";
+ 	}
+     
+     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
+ 	return "Illegal end vector";
+ 	}
+ 
+     for (i=0; i<NSUBEXP; i++) 
+     {
+ 	scheme_value se = VECTOR_REF(start_vec, i);
+ 	scheme_value ee = VECTOR_REF(end_vec, i);
+ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
+ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
+     }
+     
+     regexp_error = NULL;
+     *len = regsublen (r, src);
+     return regexp_error;
+ }
+ 
+ 
+ #if 0
+  /* Return NULL normally, error string on error.
+  ** Stash match info in start_vec and end_vec.
+  ** Returns boolean match/no-match in hit.
+***************
+*** 56,61 ****
+--- 182,188 ----
+      Free(prog);
+      return regexp_error;
+      }
+ #endif
+  
+  
+  char *filter_stringvec(const char *re, char const **stringvec,  int *nummatch)
+diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
+*** scsh-0.4.2/scsh/re1.h	Sun Oct 22 08:34:34 1995
+--- scsh-0.4.2-regexp/scsh/re1.h	Sat Apr  6 17:54:09 1996
+***************
+*** 1,6 ****
+--- 1,21 ----
+ #if 0
+  char *reg_match(const char *re, const char *string, int start,
+  		scheme_value start_vec, scheme_value end_vec,
+  		int *hit);
+ #endif
+  
+  char *filter_stringvec(const char *re, char const **stringvec,
+  		       int *nummatch);
+ 
+ char *reg_comp_len(const char *re, int *len);
+ char *reg_comp_comp(const char *re, scheme_value cr);
+ 
+ char *reg_exec(scheme_value cr, const char *string, int start,
+ 	       scheme_value start_vec, scheme_value end_vec,  int *hit);
+ 
+ char *reg_subst(scheme_value cr, const char *match,
+ 		const char *src, int start,
+ 		scheme_value start_vec, scheme_value end_vec,
+ 		scheme_value outbuf, int *len);
+ 
+ 
+
+Only in scsh-0.4.2-regexp/scsh: re2.scm
+diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
+*** scsh-0.4.2/scsh/scsh-interfaces.scm	Tue Oct 31 19:19:30 1995
+--- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm	Sat Apr  6 18:48:12 1996
+***************
+*** 413,418 ****
+--- 413,419 ----
+  	  make-regexp
+  	  regexp?
+  	  regexp-exec
+ 	  regexp-subst
+  	  regexp-quote))
+  
+  
+
+regexp library changes:
+
+*** Makefile	1996/04/06 19:24:49	1.1
+--- Makefile	1996/04/06 20:46:26
+***************
+*** 5,11 ****
+  # Things you might want to put in TEST:
+  # -DDEBUG		debugging hooks
+  # -I.			regexp.h from current directory, not /usr/include
+! TEST=-I.
+  
+  # Things you might want to put in PROF:
+  # -pg			profiler
+--- 5,11 ----
+  # Things you might want to put in TEST:
+  # -DDEBUG		debugging hooks
+  # -I.			regexp.h from current directory, not /usr/include
+! TEST=-I. -DDEBUG
+  
+  # Things you might want to put in PROF:
+  # -pg			profiler
+*** regexp.c	1996/04/06 19:24:49	1.1
+--- regexp.c	1996/04/06 22:34:55
+***************
+*** 105,110 ****
+--- 105,111 ----
+   * Utility definitions.
+   */
+  #define	FAIL(m)		{ regerror(m); return(NULL); }
+ #define	FAILN(m)	{ regerror(m); return(-1); }
+  #define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
+  #define	META		"^$.[()|?+*\\"
+  
+***************
+*** 162,173 ****
+  const char *exp;
+  {
+  	register regexp *r;
+! 	register char *scan;
+  	int flags;
+  	struct comp co;
+  
+  	if (exp == NULL)
+! 		FAIL("NULL argument to regcomp");
+  
+  	/* First pass: determine size, legality. */
+  	co.regparse = (char *)exp;
+--- 163,193 ----
+  const char *exp;
+  {
+  	register regexp *r;
+! 	size_t len;
+! 
+! 	len = regcomp_len(exp);
+! 	if (len <= 0)
+! 	        return NULL;
+! 
+! 	/* Allocate space. */
+! 	r = (regexp *)malloc(len);
+! 
+! 	if (r == NULL)
+! 		FAIL("out of space");
+! 	return regcomp_comp(exp, r, len);
+! }
+! 
+! 
+! size_t
+! regcomp_len(exp)
+! const char *exp;
+! {
+  	int flags;
+ 	register regexp *r;
+  	struct comp co;
+  
+  	if (exp == NULL)
+! 		FAILN("NULL argument to regcomp");
+  
+  	/* First pass: determine size, legality. */
+  	co.regparse = (char *)exp;
+***************
+*** 178,198 ****
+  	co.regcode = co.regdummy;
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+! 		return(NULL);
+  
+  	/* Small enough for pointer-storage convention? */
+  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
+! 		FAIL("regexp too big");
+  
+! 	/* Allocate space. */
+! 	r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
+! 	if (r == NULL)
+! 		FAIL("out of space");
+  
+  	/* Second pass: emit code. */
+  	co.regparse = (char *)exp;
+  	co.regnpar = 1;
+  	co.regcode = r->program;
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+  		return(NULL);
+--- 198,228 ----
+  	co.regcode = co.regdummy;
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+! 		return -1;
+  
+  	/* Small enough for pointer-storage convention? */
+  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
+! 		FAILN("regexp too big");
+  
+! 	return (sizeof(regexp) + (size_t)co.regsize);
+! }
+! 
+! 
+! regexp *
+! regcomp_comp(exp, r, len)
+! const char *exp;
+! register regexp *r;
+! size_t len;
+! {
+! 	register char *scan;
+! 	int flags;
+! 	struct comp co;
+  
+  	/* Second pass: emit code. */
+  	co.regparse = (char *)exp;
+  	co.regnpar = 1;
+  	co.regcode = r->program;
+ 	co.regsize = len - sizeof(regexp);
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+  		return(NULL);
+***************
+*** 200,206 ****
+  	/* Dig out information for optimizations. */
+  	r->regstart = '\0';		/* Worst-case defaults. */
+  	r->reganch = 0;
+! 	r->regmust = NULL;
+  	r->regmlen = 0;
+  	scan = r->program+1;		/* First BRANCH. */
+  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
+--- 230,236 ----
+  	/* Dig out information for optimizations. */
+  	r->regstart = '\0';		/* Worst-case defaults. */
+  	r->reganch = 0;
+! 	r->regmust = 0;
+  	r->regmlen = 0;
+  	scan = r->program+1;		/* First BRANCH. */
+  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
+***************
+*** 229,235 ****
+  					longest = OPERAND(scan);
+  					len = strlen(OPERAND(scan));
+  				}
+! 			r->regmust = longest;
+  			r->regmlen = (int)len;
+  		}
+  	}
+--- 259,265 ----
+  					longest = OPERAND(scan);
+  					len = strlen(OPERAND(scan));
+  				}
+! 			r->regmust = longest - r->program;
+  			r->regmlen = (int)len;
+  		}
+  	}
+***************
+*** 648,655 ****
+  struct exec {
+  	char *reginput;		/* String-input pointer. */
+  	char *regbol;		/* Beginning of input, for ^ check. */
+! 	char **regstartp;	/* Pointer to startp array. */
+! 	char **regendp;		/* Ditto for endp. */
+  };
+  
+  /*
+--- 678,685 ----
+  struct exec {
+  	char *reginput;		/* String-input pointer. */
+  	char *regbol;		/* Beginning of input, for ^ check. */
+! 	const char **regstartp;	/* Pointer to startp array. */
+! 	const char **regendp;		/* Ditto for endp. */
+  };
+  
+  /*
+***************
+*** 690,696 ****
+  	}
+  
+  	/* If there is a "must appear" string, look for it. */
+! 	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
+  		return(0);
+  
+  	/* Mark beginning of line for ^ . */
+--- 720,727 ----
+  	}
+  
+  	/* If there is a "must appear" string, look for it. */
+! 	if ((prog->regmlen > 0) &&
+! 	    strstr(string, &prog->program[prog->regmust]) == NULL)
+  		return(0);
+  
+  	/* Mark beginning of line for ^ . */
+***************
+*** 729,736 ****
+  char *string;
+  {
+  	register int i;
+! 	register char **stp;
+! 	register char **enp;
+  
+  	ep->reginput = string;
+  
+--- 760,767 ----
+  char *string;
+  {
+  	register int i;
+! 	register const char **stp;
+! 	register const char **enp;
+  
+  	ep->reginput = string;
+  
+***************
+*** 1004,1011 ****
+  		printf("start `%c' ", r->regstart);
+  	if (r->reganch)
+  		printf("anchored ");
+! 	if (r->regmust != NULL)
+! 		printf("must have \"%s\"", r->regmust);
+  	printf("\n");
+  }
+  
+--- 1035,1042 ----
+  		printf("start `%c' ", r->regstart);
+  	if (r->reganch)
+  		printf("anchored ");
+! 	if (r->regmlen > 0)
+! 		printf("must have \"%s\"", &r->program[r->regmust]);
+  	printf("\n");
+  }
+  
+*** regexp.h	1996/04/06 19:24:49	1.1
+--- regexp.h	1996/04/07 01:52:19
+***************
+*** 6,16 ****
+   */
+  #define NSUBEXP  10
+  typedef struct regexp {
+! 	char *startp[NSUBEXP];
+! 	char *endp[NSUBEXP];
+  	char regstart;		/* Internal use only. */
+  	char reganch;		/* Internal use only. */
+! 	char *regmust;		/* Internal use only. */
+  	int regmlen;		/* Internal use only. */
+  	char program[1];	/* Unwarranted chumminess with compiler. */
+  } regexp;
+--- 6,16 ----
+   */
+  #define NSUBEXP  10
+  typedef struct regexp {
+! 	const char *startp[NSUBEXP];
+! 	const char *endp[NSUBEXP];
+  	char regstart;		/* Internal use only. */
+  	char reganch;		/* Internal use only. */
+! 	int regmust;		/* Internal use only. */
+  	int regmlen;		/* Internal use only. */
+  	char program[1];	/* Unwarranted chumminess with compiler. */
+  } regexp;
+***************
+*** 18,21 ****
+--- 18,27 ----
+  extern regexp *regcomp(const char *re);
+  extern int regexec(regexp *rp, const char *s);
+  extern void regsub(const regexp *rp, const char *src, char *dst);
+ extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
+ extern size_t regsublen(const regexp *rp, const char *src);
+ 
+  extern void regerror(char *message);
+ extern size_t regcomp_len(const char *exp);
+ extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
+ 
+*** regsub.c	1996/04/06 19:24:49	1.1
+--- regsub.c	1996/04/07 02:10:29
+***************
+*** 11,25 ****
+  /*
+   - regsub - perform substitutions after a regexp match
+   */
+  void
+! regsub(rp, source, dest)
+  const regexp *rp;
+  const char *source;
+  char *dest;
+  {
+  	register regexp * const prog = (regexp *)rp;
+! 	register char *src = (char *)source;
+  	register char *dst = dest;
+  	register char c;
+  	register int no;
+  	register size_t len;
+--- 11,42 ----
+  /*
+   - regsub - perform substitutions after a regexp match
+   */
+ 
+ void regsub(rp, source, dest)
+ const regexp *rp;
+ const char *source;
+ char *dest;
+ {
+         regnsub(rp, source, dest, BUFSIZ);
+ }
+ 
+ 
+ 
+ /*
+  - regnsub - perform bounds-checked substitutions after a regexp match
+  */
+  void
+! regnsub(rp, source, dest, destlen)
+  const regexp *rp;
+  const char *source;
+  char *dest;
+ size_t destlen;
+  {
+  	register regexp * const prog = (regexp *)rp;
+! 	register const char *src = (char *)source;
+  	register char *dst = dest;
+ 	char *dstend = dest + destlen;
+ 	char *odst;
+  	register char c;
+  	register int no;
+  	register size_t len;
+***************
+*** 45,55 ****
+  			if (c == '\\' && (*src == '\\' || *src == '&'))
+  				c = *src++;
+  			*dst++ = c;
+  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
+! 					prog->endp[no] > prog->startp[no]) {
+  			len = prog->endp[no] - prog->startp[no];
+! 			(void) strncpy(dst, prog->startp[no], len);
+  			dst += len;
+  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
+  				regerror("damaged match string");
+  				return;
+--- 62,83 ----
+  			if (c == '\\' && (*src == '\\' || *src == '&'))
+  				c = *src++;
+  			*dst++ = c;
+ 			if (dst >= dstend) 
+ 			{
+ 			    	regerror("output buffer too small");
+ 				return;
+ 			}
+  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
+! 			   prog->endp[no] > prog->startp[no]) {
+  			len = prog->endp[no] - prog->startp[no];
+! 			odst = dst;
+  			dst += len;
+ 			if (dst >= dstend) 
+ 			{
+ 			    	regerror("output buffer too small");
+ 				return;
+ 			}
+ 			(void) strncpy(odst, prog->startp[no], len);
+  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
+  				regerror("damaged match string");
+  				return;
+***************
+*** 58,60 ****
+--- 86,131 ----
+  	}
+  	*dst++ = '\0';
+  }
+ 
+ size_t regsublen(rp, source)
+ const regexp *rp;
+ const char *source;
+ {
+     register regexp * const prog = (regexp *)rp;
+     register char *src = (char *)source;
+     register char c;
+     register int no;
+     register int len = 0;
+ 	
+     if (prog == NULL || source == NULL) {
+ 	regerror("NULL parameter to regsublen");
+ 	return -1;
+     }
+     
+     if ((unsigned char)*(prog->program) != MAGIC) {
+ 	regerror("damaged regexp");
+ 	return -1;
+     }
+     while ((c = *src++) != '\0') {
+ 	if (c == '&')
+ 	    no = 0;
+ 	else if (c == '\\' && isdigit(*src))
+ 	    no = *src++ - '0';
+ 	else
+ 	    no = -1;
+ 	if (no < 0) {		/* Ordinary character. */
+ 	    if (c == '\\' && (*src == '\\' || *src == '&'))
+ 		src++;
+ 	    len++;
+ 	} else {
+ 	    const char *s = prog->startp[no];
+ 	    const char *e = prog->endp[no];
+ 	    if ((s != NULL) && (e != NULL) && (e > s)) {
+ 		len += e-s;
+ 	    }
+ 	}
+     }
+     return len+1;
+ }
+ 
+ 
+
+Original regexp code from henry:
+[unpacked & deleted -Olin]
--- a/scsh/regexp/regerror.c
+++ b/scsh/regexp/regerror.c
@ -1,4 +1,8 @@
+/*
+ * regerror
+ */
 #include <stdio.h>
+#include <stdlib.h>

 void
 regerror(s)
@ -7,8 +11,8 @@ regerror(s)
 #ifdef ERRAVAIL
 	error("regexp: %s", s);
 #else
-    fprintf(stderr, "regexp(3): %s", s);
-    exit(1);
+	fprintf(stderr, "regexp(3): %s\n", s);
+	exit(EXIT_FAILURE);
 #endif
 	/* NOTREACHED */
 }
--- a/scsh/regexp/regexp.3
+++ b/scsh/regexp/regexp.3
@ -0,0 +1,186 @@
+.TH REGEXP 3 "2 Sept 1995"
+.SH NAME
+regcomp, regexec, regsub, regerror \- regular expression handler
+.SH SYNOPSIS
+.ft B
+.nf
+#include <regexp.h>
+
+regexp *regcomp(exp)
+const char *exp;
+
+int regexec(prog, string)
+regexp *prog;
+const char *string;
+
+void regsub(prog, source, dest)
+const regexp *prog;
+const char *source;
+char *dest;
+
+void regerror(msg)
+char *msg;
+.SH DESCRIPTION
+These functions implement
+.IR egrep (1)-style
+regular expressions and supporting facilities.
+.PP
+.I Regcomp
+compiles a regular expression into a structure of type
+.IR regexp ,
+and returns a pointer to it.
+The space has been allocated using
+.IR malloc (3)
+and may be released by
+.IR free .
+.PP
+.I Regexec
+matches a NUL-terminated \fIstring\fR against the compiled regular expression
+in \fIprog\fR.
+It returns 1 for success and 0 for failure, and adjusts the contents of
+\fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly.
+.PP
+The members of a
+.I regexp
+structure include at least the following (not necessarily in order):
+.PP
+.RS
+char *startp[NSUBEXP];
+.br
+char *endp[NSUBEXP];
+.RE
+.PP
+where
+.I NSUBEXP
+is defined (as 10) in the header file.
+Once a successful \fIregexec\fR has been done using the \fIregexp\fR,
+each \fIstartp\fR-\fIendp\fR pair describes one substring
+within the \fIstring\fR,
+with the \fIstartp\fR pointing to the first character of the substring and
+the \fIendp\fR pointing to the first character following the substring.
+The 0th substring is the substring of \fIstring\fR that matched the whole
+regular expression.
+The others are those substrings that matched parenthesized expressions
+within the regular expression, with parenthesized expressions numbered
+in left-to-right order of their opening parentheses.
+.PP
+.I Regsub
+copies \fIsource\fR to \fIdest\fR, making substitutions according to the
+most recent \fIregexec\fR performed using \fIprog\fR.
+Each instance of `&' in \fIsource\fR is replaced by the substring
+indicated by \fIstartp\fR[\fI0\fR] and
+\fIendp\fR[\fI0\fR].
+Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by
+the substring indicated by
+\fIstartp\fR[\fIn\fR] and
+\fIendp\fR[\fIn\fR].
+To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e';
+to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with
+another `\e'.
+.PP
+.I Regerror
+is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR,
+or \fIregsub\fR.
+The default \fIregerror\fR writes the string \fImsg\fR,
+with a suitable indicator of origin,
+on the standard
+error output
+and invokes \fIexit\fR(2).
+.I Regerror
+can be replaced by the user if other actions are desirable.
+.SH "REGULAR EXPRESSION SYNTAX"
+A regular expression is zero or more \fIbranches\fR, separated by `|'.
+It matches anything that matches one of the branches.
+.PP
+A branch is zero or more \fIpieces\fR, concatenated.
+It matches a match for the first, followed by a match for the second, etc.
+.PP
+A piece is an \fIatom\fR possibly followed by `*', `+', or `?'.
+An atom followed by `*' matches a sequence of 0 or more matches of the atom.
+An atom followed by `+' matches a sequence of 1 or more matches of the atom.
+An atom followed by `?' matches a match of the atom, or the null string.
+.PP
+An atom is a regular expression in parentheses (matching a match for the
+regular expression), a \fIrange\fR (see below), `.'
+(matching any single character), `^' (matching the null string at the
+beginning of the input string), `$' (matching the null string at the
+end of the input string), a `\e' followed by a single character (matching
+that character), or a single character with no other significance
+(matching that character).
+.PP
+A \fIrange\fR is a sequence of characters enclosed in `[]'.
+It normally matches any single character from the sequence.
+If the sequence begins with `^',
+it matches any single character \fInot\fR from the rest of the sequence.
+If two characters in the sequence are separated by `\-', this is shorthand
+for the full list of ASCII characters between them
+(e.g. `[0-9]' matches any decimal digit).
+To include a literal `]' in the sequence, make it the first character
+(following a possible `^').
+To include a literal `\-', make it the first or last character.
+.SH AMBIGUITY
+If a regular expression could match two different parts of the input string,
+it will match the one which begins earliest.
+If both begin in the same place but match different lengths, or match
+the same length in different ways, life gets messier, as follows.
+.PP
+In general, the possibilities in a list of branches are considered in
+left-to-right order, the possibilities for `*', `+', and `?' are
+considered longest-first, nested constructs are considered from the
+outermost in, and concatenated constructs are considered leftmost-first.
+The match that will be chosen is the one that uses the earliest
+possibility in the first choice that has to be made.
+If there is more than one choice, the next will be made in the same manner
+(earliest possibility) subject to the decision on the first choice.
+And so forth.
+.PP
+For example, `(ab|a)b*c' could match `abc' in one of two ways.
+The first choice is between `ab' and `a'; since `ab' is earlier, and does
+lead to a successful overall match, it is chosen.
+Since the `b' is already spoken for,
+the `b*' must match its last possibility\(emthe empty string\(emsince
+it must respect the earlier choice.
+.PP
+In the particular case where the regular expression does not use `|'
+and does not apply `*', `+', or `?' to parenthesized subexpressions,
+the net effect is that the longest possible
+match will be chosen.
+So `ab*', presented with `xabbbby', will match `abbbb'.
+Note that if `ab*' is tried against `xabyabbbz', it
+will match `ab' just after `x', due to the begins-earliest rule.
+(In effect, the decision on where to start the match is the first choice
+to be made, hence subsequent choices must respect it even if this leads them
+to less-preferred alternatives.)
+.SH SEE ALSO
+egrep(1), expr(1)
+.SH DIAGNOSTICS
+\fIRegcomp\fR returns NULL for a failure
+(\fIregerror\fR permitting),
+where failures are syntax errors, exceeding implementation limits,
+or applying `+' or `*' to a possibly-null operand.
+.SH HISTORY
+This is a revised version.
+Both code and manual page were
+originally written by Henry Spencer at University of Toronto.
+They are intended to be compatible with the Bell V8 \fIregexp\fR(3),
+but are not derived from Bell code.
+.SH BUGS
+Empty branches and empty regular expressions are not portable
+to other, otherwise-similar, implementations.
+.PP
+The ban on
+applying `*' or `+' to a possibly-null operand is an artifact of the
+simplistic implementation.
+.PP
+The match-choice rules are complex.
+A simple ``longest match'' rule would be preferable,
+but is harder to implement.
+.PP
+Although there is a general similarity to POSIX.2 ``extended'' regular
+expressions, neither the regular-expression syntax nor the programming
+interface is an exact match.
+.PP
+Due to emphasis on
+compactness and simplicity,
+it's not strikingly fast.
+It does give some attention to handling simple cases quickly.
--- a/scsh/regexp/regexp.c
+++ b/scsh/regexp/regexp.c
--- a/scsh/regexp/regexp.h
+++ b/scsh/regexp/regexp.h
@ -6,16 +6,22 @@
 */
 #define NSUBEXP  10
 typedef struct regexp {
-	char *startp[NSUBEXP];
-	char *endp[NSUBEXP];
+	const char *startp[NSUBEXP];
+	const char *endp[NSUBEXP];
 	char regstart;		/* Internal use only. */
 	char reganch;		/* Internal use only. */
-	char *regmust;		/* Internal use only. */
+	int regmust;		/* Internal use only. */
 	int regmlen;		/* Internal use only. */
 	char program[1];	/* Unwarranted chumminess with compiler. */
 } regexp;

-extern regexp *regcomp();
-extern int regexec();
-extern void regsub();
-extern void regerror();
+extern regexp *regcomp(const char *re);
+extern int regexec(regexp *rp, const char *s);
+extern void regsub(const regexp *rp, const char *src, char *dst);
+extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
+extern size_t regsublen(const regexp *rp, const char *src);
+
+extern void regerror(char *message);
+extern size_t regcomp_len(const char *exp);
+extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
+
--- a/scsh/regexp/regsub.c
+++ b/scsh/regexp/regsub.c
@ -1,66 +1,59 @@
 /*
- * regsub @(#)regsub.c	1.3 of 2 April 86 
- *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer.  Not
- * derived from licensed software. 
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions: 
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it. 
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission. 
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software. 
+ * regsub
 */
 #include <stdio.h>
-#ifdef AMIGA
-#include "regexp.h"
-#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
 #include <regexp.h>
-#endif
 #include "regmagic.h"

-#ifndef CHARBITS
-#define	UCHARAT(p)	((int)*(unsigned char *)(p))
-#else
-#define	UCHARAT(p)	((int)*(p)&CHARBITS)
-#endif
- 
 /*
- * - regsub - perform substitutions after a regexp match 
+ - regsub - perform substitutions after a regexp match
 */
-void
-regsub(prog, source, dest)
-    regexp         *prog;
-    char           *source;
+
+void regsub(rp, source, dest)
+const regexp *rp;
+const char *source;
 char *dest;
 {
-    register char  *src;
-    register char  *dst;
+        regnsub(rp, source, dest, BUFSIZ);
+}
+
+
+
+/*
+ - regnsub - perform bounds-checked substitutions after a regexp match
+ */
+void
+regnsub(rp, source, dest, destlen)
+const regexp *rp;
+const char *source;
+char *dest;
+size_t destlen;
+{
+	register regexp * const prog = (regexp *)rp;
+	register const char *src = (char *)source;
+	register char *dst = dest;
+	char *dstend = dest + destlen;
+	char *odst;
 	register char c;
 	register int no;
-    register int    len;
-    extern char    *strncpy();
+	register size_t len;

 	if (prog == NULL || source == NULL || dest == NULL) {
-	regerror("NULL parm to regsub");
+		regerror("NULL parameter to regsub");
 		return;
 	}
-    if (UCHARAT(prog->program) != MAGIC) {
-	regerror("damaged regexp fed to regsub");
+	if ((unsigned char)*(prog->program) != MAGIC) {
+		regerror("damaged regexp");
 		return;
 	}
-    src = source;
-    dst = dest;
+
 	while ((c = *src++) != '\0') {
 		if (c == '&')
 			no = 0;
-	else if (c == '\\' && '0' <= *src && *src <= '9')
+		else if (c == '\\' && isdigit(*src))
 			no = *src++ - '0';
 		else
 			no = -1;
@ -69,11 +62,23 @@ regsub(prog, source, dest)
 			if (c == '\\' && (*src == '\\' || *src == '&'))
 				c = *src++;
 			*dst++ = c;
-	} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
+			if (dst >= dstend) 
+			{
+			    	regerror("output buffer too small");
+				return;
+			}
+		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
+			   prog->endp[no] > prog->startp[no]) {
 			len = prog->endp[no] - prog->startp[no];
-	    (void) strncpy(dst, prog->startp[no], len);
+			odst = dst;
 			dst += len;
-	    if (len != 0 && *(dst - 1) == '\0') {	/* strncpy hit NUL. */
+			if (dst >= dstend) 
+			{
+			    	regerror("output buffer too small");
+				return;
+			}
+			(void) strncpy(odst, prog->startp[no], len);
+			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
 				regerror("damaged match string");
 				return;
 			}
@ -81,3 +86,46 @@ regsub(prog, source, dest)
 	}
 	*dst++ = '\0';
 }
+
+size_t regsublen(rp, source)
+const regexp *rp;
+const char *source;
+{
+    register regexp * const prog = (regexp *)rp;
+    register char *src = (char *)source;
+    register char c;
+    register int no;
+    register int len = 0;
+	
+    if (prog == NULL || source == NULL) {
+	regerror("NULL parameter to regsublen");
+	return -1;
+    }
+    
+    if ((unsigned char)*(prog->program) != MAGIC) {
+	regerror("damaged regexp");
+	return -1;
+    }
+    while ((c = *src++) != '\0') {
+	if (c == '&')
+	    no = 0;
+	else if (c == '\\' && isdigit(*src))
+	    no = *src++ - '0';
+	else
+	    no = -1;
+	if (no < 0) {		/* Ordinary character. */
+	    if (c == '\\' && (*src == '\\' || *src == '&'))
+		src++;
+	    len++;
+	} else {
+	    const char *s = prog->startp[no];
+	    const char *e = prog->endp[no];
+	    if ((s != NULL) && (e != NULL) && (e > s)) {
+		len += e-s;
+	    }
+	}
+    }
+    return len+1;
+}
+
+
--- a/scsh/regexp/tests
+++ b/scsh/regexp/tests
@ -0,0 +1,127 @@
+abc	abc	y	&	abc
+abc	xbc	n	-	-
+abc	axc	n	-	-
+abc	abx	n	-	-
+abc	xabcy	y	&	abc
+abc	ababc	y	&	abc
+ab*c	abc	y	&	abc
+ab*bc	abc	y	&	abc
+ab*bc	abbc	y	&	abbc
+ab*bc	abbbbc	y	&	abbbbc
+ab+bc	abbc	y	&	abbc
+ab+bc	abc	n	-	-
+ab+bc	abq	n	-	-
+ab+bc	abbbbc	y	&	abbbbc
+ab?bc	abbc	y	&	abbc
+ab?bc	abc	y	&	abc
+ab?bc	abbbbc	n	-	-
+ab?c	abc	y	&	abc
+^abc$	abc	y	&	abc
+^abc$	abcc	n	-	-
+^abc	abcc	y	&	abc
+^abc$	aabc	n	-	-
+abc$	aabc	y	&	abc
+^	abc	y	&	
+$	abc	y	&	
+a.c	abc	y	&	abc
+a.c	axc	y	&	axc
+a.*c	axyzc	y	&	axyzc
+a.*c	axyzd	n	-	-
+a[bc]d	abc	n	-	-
+a[bc]d	abd	y	&	abd
+a[b-d]e	abd	n	-	-
+a[b-d]e	ace	y	&	ace
+a[b-d]	aac	y	&	ac
+a[-b]	a-	y	&	a-
+a[b-]	a-	y	&	a-
+[k]	ab	n	-	-
+a[b-a]	-	c	-	-
+a[]b	-	c	-	-
+a[	-	c	-	-
+a]	a]	y	&	a]
+a[]]b	a]b	y	&	a]b
+a[^bc]d	aed	y	&	aed
+a[^bc]d	abd	n	-	-
+a[^-b]c	adc	y	&	adc
+a[^-b]c	a-c	n	-	-
+a[^]b]c	a]c	n	-	-
+a[^]b]c	adc	y	&	adc
+ab|cd	abc	y	&	ab
+ab|cd	abcd	y	&	ab
+()ef	def	y	&-\1	ef-
+()*	-	c	-	-
+*a	-	c	-	-
+^*	-	c	-	-
+$*	-	c	-	-
+(*)b	-	c	-	-
+$b	b	n	-	-
+a\	-	c	-	-
+a\(b	a(b	y	&-\1	a(b-
+a\(*b	ab	y	&	ab
+a\(*b	a((b	y	&	a((b
+a\\b	a\b	y	&	a\b
+abc)	-	c	-	-
+(abc	-	c	-	-
+((a))	abc	y	&-\1-\2	a-a-a
+(a)b(c)	abc	y	&-\1-\2	abc-a-c
+a+b+c	aabbabc	y	&	abc
+a**	-	c	-	-
+a*?	-	c	-	-
+(a*)*	-	c	-	-
+(a*)+	-	c	-	-
+(a|)*	-	c	-	-
+(a*|b)*	-	c	-	-
+(a+|b)*	ab	y	&-\1	ab-b
+(a+|b)+	ab	y	&-\1	ab-b
+(a+|b)?	ab	y	&-\1	a-a
+[^ab]*	cde	y	&	cde
+(^)*	-	c	-	-
+(ab|)*	-	c	-	-
+)(	-	c	-	-
+	abc	y	&	
+abc		n	-	-
+a*		y	&	
+abcd	abcd	y	&-\&-\\&	abcd-&-\abcd
+a(bc)d	abcd	y	\1-\\1-\\\1	bc-\1-\bc
+([abc])*d	abbbcd	y	&-\1	abbbcd-c
+([abc])*bcd	abcd	y	&-\1	abcd-a
+a|b|c|d|e	e	y	&	e
+(a|b|c|d|e)f	ef	y	&-\1	ef-e
+((a*|b))*	-	c	-	-
+abcd*efg	abcdefg	y	&	abcdefg
+ab*	xabyabbbz	y	&	ab
+ab*	xayabbbz	y	&	a
+(ab|cd)e	abcde	y	&-\1	cde-cd
+[abhgefdc]ij	hij	y	&	hij
+^(ab|cd)e	abcde	n	x\1y	xy
+(abc|)ef	abcdef	y	&-\1	ef-
+(a|b)c*d	abcd	y	&-\1	bcd-b
+(ab|ab*)bc	abc	y	&-\1	abc-a
+a([bc]*)c*	abc	y	&-\1	abc-bc
+a([bc]*)(c*d)	abcd	y	&-\1-\2	abcd-bc-d
+a([bc]+)(c*d)	abcd	y	&-\1-\2	abcd-bc-d
+a([bc]*)(c+d)	abcd	y	&-\1-\2	abcd-b-cd
+a[bcd]*dcdcde	adcdcde	y	&	adcdcde
+a[bcd]+dcdcde	adcdcde	n	-	-
+(ab|a)b*c	abc	y	&-\1	abc-ab
+((a)(b)c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
+[ -~]*	abc	y	&	abc
+[ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~ -~ -~ -~]*	abc	y	&	abc
+[a-zA-Z_][a-zA-Z0-9_]*	alpha	y	&	alpha
+^a(bc+|b[eh])g|.h$	abh	y	&-\1	bh-
+(bc+d$|ef*g.|h?i(j|k))	effgz	y	&-\1-\2	effgz-effgz-
+(bc+d$|ef*g.|h?i(j|k))	ij	y	&-\1-\2	ij-ij-j
+(bc+d$|ef*g.|h?i(j|k))	effg	n	-	-
+(bc+d$|ef*g.|h?i(j|k))	bcdd	n	-	-
+(bc+d$|ef*g.|h?i(j|k))	reffgz	y	&-\1-\2	effgz-effgz-
+((((((((((a))))))))))	-	c	-	-
+(((((((((a)))))))))	a	y	&	a
+multiple words of text	uh-uh	n	-	-
+multiple words	multiple words, yeah	y	&	multiple words
+(.*)c(.*)	abcde	y	&-\1-\2	abcde-ab-de
+\((.*), (.*)\)	(a, b)	y	(\2, \1)	(b, a)
--- a/scsh/regexp/timer.c
+++ b/scsh/regexp/timer.c
@ -0,0 +1,164 @@
+/*
+ * Simple timing program for regcomp().
+ * Usage: timer ncomp nexec nsub
+ *	or
+ *	timer ncomp nexec nsub regexp string [ answer [ sub ] ]
+ *
+ * The second form is for timing repetitions of a single test case.
+ * The first form's test data is a compiled-in copy of the "tests" file.
+ * Ncomp, nexec, nsub are how many times to do each regcomp, regexec,
+ * and regsub.  The way to time an operation individually is to do something
+ * like "timer 1 50 1".
+ */
+#include <stdio.h>
+
+struct try {
+	char *re, *str, *ans, *src, *dst;
+} tests[] = {
+#include "timer.t.h"
+{ NULL, NULL, NULL, NULL, NULL }
+};
+
+#include <regexp.h>
+
+int errreport = 0;		/* Report errors via errseen? */
+char *errseen = NULL;		/* Error message. */
+
+char *progname;
+
+/* ARGSUSED */
+main(argc, argv)
+int argc;
+char *argv[];
+{
+	int ncomp, nexec, nsub;
+	struct try one;
+	char dummy[512];
+
+	if (argc < 4) {
+		ncomp = 1;
+		nexec = 1;
+		nsub = 1;
+	} else {
+		ncomp = atoi(argv[1]);
+		nexec = atoi(argv[2]);
+		nsub = atoi(argv[3]);
+	}
+	
+	progname = argv[0];
+	if (argc > 5) {
+		one.re = argv[4];
+		one.str = argv[5];
+		if (argc > 6)
+			one.ans = argv[6];
+		else
+			one.ans = "y";
+		if (argc > 7) {	
+			one.src = argv[7];
+			one.dst = "xxx";
+		} else {
+			one.src = "x";
+			one.dst = "x";
+		}
+		errreport = 1;
+		try(one, ncomp, nexec, nsub);
+	} else
+		multiple(ncomp, nexec, nsub);
+	exit(0);
+}
+
+void
+regerror(s)
+char *s;
+{
+	if (errreport)
+		errseen = s;
+	else
+		error(s, "");
+}
+
+#ifndef ERRAVAIL
+error(s1, s2)
+char *s1;
+char *s2;
+{
+	fprintf(stderr, "regexp: ");
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, "\n");
+	exit(1);
+}
+#endif
+
+int lineno = 0;
+
+multiple(ncomp, nexec, nsub)
+int ncomp, nexec, nsub;
+{
+	register int i;
+	extern char *strchr();
+
+	errreport = 1;
+	for (i = 0; tests[i].re != NULL; i++) {
+		lineno++;
+		try(tests[i], ncomp, nexec, nsub);
+	}
+}
+
+try(fields, ncomp, nexec, nsub)
+struct try fields;
+int ncomp, nexec, nsub;
+{
+	regexp *r;
+	char dbuf[BUFSIZ];
+	register int i;
+
+	errseen = NULL;
+	r = regcomp(fields.re);
+	if (r == NULL) {
+		if (*fields.ans != 'c')
+			complain("regcomp failure in `%s'", fields.re);
+		return;
+	}
+	if (*fields.ans == 'c') {
+		complain("unexpected regcomp success in `%s'", fields.re);
+		free((char *)r);
+		return;
+	}
+	for (i = ncomp-1; i > 0; i--) {
+		free((char *)r);
+		r = regcomp(fields.re);
+	}
+	if (!regexec(r, fields.str)) {
+		if (*fields.ans != 'n')
+			complain("regexec failure in `%s'", "");
+		free((char *)r);
+		return;
+	}
+	if (*fields.ans == 'n') {
+		complain("unexpected regexec success", "");
+		free((char *)r);
+		return;
+	}
+	for (i = nexec-1; i > 0; i--)
+		(void) regexec(r, fields.str);
+	errseen = NULL;
+	for (i = nsub; i > 0; i--)
+		regsub(r, fields.src, dbuf);
+	if (errseen != NULL) {	
+		complain("regsub complaint", "");
+		free((char *)r);
+		return;
+	}
+	if (strcmp(dbuf, fields.dst) != 0)
+		complain("regsub result `%s' wrong", dbuf);
+	free((char *)r);
+}
+
+complain(s1, s2)
+char *s1;
+char *s2;
+{
+	fprintf(stderr, "try: %d: ", lineno);
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
+}
--- a/scsh/regexp/try.c
+++ b/scsh/regexp/try.c
@ -1,32 +1,15 @@
 /*
 * Simple test program for regexp(3) stuff.  Knows about debugging hooks.
+ * Usage: try re [string [output [-]]]
+ * The re is compiled and dumped, regexeced against the string, the result
+ * is applied to output using regsub().  The - triggers a running narrative
+ * from regexec().  Dumping and narrative don't happen unless DEBUG.
 *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer.  Not
- * derived from licensed software. 
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions: 
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it. 
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission. 
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software. 
- *
- * Usage: try re [string [output [-]]] The re is compiled and dumped, regexeced
- * against the string, the result is applied to output using regsub().  The -
- * triggers a running narrative from regexec().  Dumping and narrative don't
- * happen unless DEBUG. 
- *
- * If there are no arguments, stdin is assumed to be a stream of lines with five
- * fields:  a r.e., a string to match it against, a result code, a source
- * string for regsub, and the proper result.  Result codes are 'c' for
- * compile failure, 'y' for match success, 'n' for match failure. Field
- * separator is tab. 
+ * If there are no arguments, stdin is assumed to be a stream of lines with
+ * five fields:  a r.e., a string to match it against, a result code, a
+ * source string for regsub, and the proper result.  Result codes are 'c'
+ * for compile failure, 'y' for match success, 'n' for match failure.
+ * Field separator is tab.
 */
 #include <stdio.h>
 #include <regexp.h>
@ -62,6 +45,7 @@ main(argc, argv)
 		multiple();
 		exit(status);
 	}
+
 	r = regcomp(argv[1]);
 	if (r == NULL)
 		error("regcomp failure", "");
@ -204,7 +188,7 @@ try(fields)
 	}
 	if (!regexec(r, fields[1])) {
 		if (*fields[2] != 'n')
-	    complain("regexec failure in `%s'", "");
+			complain("regexec failure in `%s'", fields[0]);
 		free((char *)r);
 		return;
 	}