From a61145fa6ad4451a8c662aa5cc8bee32c364a219 Mon Sep 17 00:00:00 2001
From: bdc <bdc>
Date: Tue, 24 Sep 1996 01:29:51 +0000
Subject: [PATCH] updated regexp

---
 scsh/regexp/COPYRIGHT   |   19 +
 scsh/regexp/Makefile.in |  160 +--
 scsh/regexp/README      |   95 +-
 scsh/regexp/patch-msg   |  803 +++++++++++++++
 scsh/regexp/regerror.c  |   18 +-
 scsh/regexp/regexp.3    |  186 ++++
 scsh/regexp/regexp.c    | 2069 +++++++++++++++++++--------------------
 scsh/regexp/regexp.h    |   22 +-
 scsh/regexp/regsub.c    |  168 ++--
 scsh/regexp/tests       |  127 +++
 scsh/regexp/timer.c     |  164 ++++
 scsh/regexp/try.c       |  384 ++++----
 12 files changed, 2732 insertions(+), 1483 deletions(-)
 create mode 100644 scsh/regexp/COPYRIGHT
 create mode 100644 scsh/regexp/patch-msg
 create mode 100644 scsh/regexp/regexp.3
 create mode 100644 scsh/regexp/tests
 create mode 100644 scsh/regexp/timer.c
diff --git a/scsh/regexp/COPYRIGHT b/scsh/regexp/COPYRIGHT
new file mode 100644
index 0000000..36b9804
--- /dev/null
+++ b/scsh/regexp/COPYRIGHT
@@ -0,0 +1,19 @@
+Copyright (c) 1986, 1993, 1995 by University of Toronto.
+Written by Henry Spencer.  Not derived from licensed software.
+
+Permission is granted to anyone to use this software for any
+purpose on any computer system, and to redistribute it in any way,
+subject to the following restrictions:
+
+1. The author is not responsible for the consequences of use of
+	this software, no matter how awful, even if they arise
+	from defects in it.
+
+2. The origin of this software must not be misrepresented, either
+	by explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not
+	be misrepresented (by explicit claim or omission) as being
+	the original software.
+
+4. This notice must not be removed or altered.
diff --git a/scsh/regexp/Makefile.in b/scsh/regexp/Makefile.in
index c464295..6aed9ad 100644
--- a/scsh/regexp/Makefile.in
+++ b/scsh/regexp/Makefile.in
@@ -5,92 +5,114 @@ CFLAGS1 = @CFLAGS1@
 
 RANLIB  = @RANLIB@
 
-# Things you might want to put in ENV and LENV:
-# -Dvoid=int		compilers that don't do void
-# -DCHARBITS=0377	compilers that don't do unsigned char
-# -DSTATIC=extern	compilers that don't like "static foo();" as forward decl
-# -DSTRCSPN		library does not have strcspn()
-# -Dstrchr=index	library does not have strchr()
+# Things you might want to put in ENV:
 # -DERRAVAIL		have utzoo-compatible error() function and friends
-# ENV=-DSTRCSPN
-# LENV=-DSTRCSPN
- 
+ENV=
+
 # Things you might want to put in TEST:
 # -DDEBUG		debugging hooks
 # -I.			regexp.h from current directory, not /usr/include
 TEST=-I. -I$(srcdir)
- 
+
 # Things you might want to put in PROF:
-# -Dstatic='/* */'	make everything global so profiler can see it.
-# -p			profiler
-PROF=
+# -pg			profiler
+# PROF=
 
-INCDEST=/contrib/share/include
-LIBDEST=/contrib/system/lib
-MANDEST=/contrib/share/man/man3
+CFLAGS=$(CFLAGS1) $(ENV) $(TEST) $(PROF)
+LDFLAGS=$(PROF)
 
-# CC = cc
-# CFLAGS1 = -O -Q
+LIB=libregexp.a
+OBJ=regexp.o regsub.o regerror.o
+TMP=dtr.tmp
 
-LINTFLAGS=$(LENV) $(TEST) -ha
-# LDFLAGS=-i
+default:	r
 
-CFLAGS = $(CFLAGS1) $(ENV) $(TEST) $(PROF) 
+try:	try.o $(LIB)
+	$(CC) $(LDFLAGS) try.o $(LIB) -o try
 
-OBJ=regexp.o regsub.o
-LIBOBJ= $(OBJ) regerror.o
-LSRC=regexp.c regsub.c regerror.c
-DTR=README dMakefile regexp.3 regexp.h regexp.c regsub.c regerror.c \
-	regmagic.h try.c timer.c tests
-DEST = ..
-
-# we don't use the library anymore -bri
-all: $(OBJ) # libregexp.a  try
-
-libregexp.a: $(LIBOBJ)
-	ar r libregexp.a $(LIBOBJ)
-	$(RANLIB) libregexp.a
-
-install:
-	install -c libregexp.a $(LIBDEST)/libregexp.a
-	$(RANLIB) $(LIBDEST)/libregexp.a
-	install -c regexp.h $(INCDEST)/regexp.h
-	install -c regexp.3 $(MANDEST)/regexp.3
-
-try:	try.o $(OBJ)
-	$(CC) $(LDFLAGS) try.o $(OBJ) -o try
- 
 # Making timer will probably require putting stuff in $(PROF) and then
 # recompiling everything; the following is just the final stage.
-timer:	timer.o $(OBJ)
-	$(CC) $(LDFLAGS) $(PROF) timer.o $(OBJ) -o timer
- 
+timer:	timer.o $(LIB)
+	$(CC) $(LDFLAGS) timer.o $(LIB) -o timer
+
 timer.o:	timer.c timer.t.h
- 
+
 timer.t.h:	tests
 	sed 's/	/","/g;s/\\/&&/g;s/.*/{"&"},/' tests >timer.t.h
- 
+
 # Regression test.
-r:	./try tests
-	@echo 'No news is good news...'
-	./try <tests
- 
-lint:	timer.t.h
-	@echo 'Complaints about multiply-declared regerror() are legit.'
-	lint $(LINTFLAGS) $(LSRC) try.c
-	lint $(LINTFLAGS) $(LSRC) timer.c
- 
+r:	try tests
+	./try <tests		# no news is good news...
+
+$(LIB):	$(OBJ)
+	ar cr $(LIB) $(OBJ)
+	$(RANLIB) libregexp.a
+
 regexp.o:	regexp.c regexp.h regmagic.h
 regsub.o:	regsub.c regexp.h regmagic.h
- 
+
 clean:
-	rm -f *.o *.out *~ *.a core mon.out timer.t.h dMakefile dtr try timer
- 
-dtr:	r makedtr $(DTR)
-	makedtr $(DTR) >dtr
- 
-dMakefile:	Makefile
-	sed '/^L*ENV=/s/ *-DERRAVAIL//' Makefile >dMakefile
- 
-mv:	$(OBJ) regerror.o
-	mv $(OBJ) regerror.o $(DEST)
+	rm -f *.o core mon.out gmon.out timer.t.h dtr copy try timer r.*
+	rm -f residue rs.* re.1 rm.h re.h ch.soe ch.ps j badcom fig[012]
+	rm -f ch.sml fig[12].ps $(LIB)
+	rm -rf $(TMP)
+
+# the rest of this is unlikely to be of use to you
+
+BITS = r.1 rs.1 re.1 rm.h re.h
+OPT=-p -ms
+
+ch.soe:	ch $(BITS)
+	soelim ch >$@
+
+ch.sml:	ch $(BITS) smlize splitfigs
+	splitfigs ch | soelim | smlize >$@
+
+fig0 fig1 fig2:	ch splitfigs
+	splitfigs ch >/dev/null
+
+f:	fig0 fig1 fig2 figs
+	groff -Tps -s $(OPT) figs | lpr
+
+fig1.ps:	fig0 fig1
+	( cat fig0 ; echo ".LP" ; cat fig1 ) | groff -Tps $(OPT) >$@
+
+fig2.ps:	fig0 fig2
+	( cat fig0 ; echo ".LP" ; cat fig2 ) | groff -Tps $(OPT) >$@
+
+fp:	fig1.ps fig2.ps
+
+r.1:	regexp.c splitter
+	splitter regexp.c
+
+rs.1:	regsub.c splitter
+	splitter regsub.c
+
+re.1:	regerror.c splitter
+	splitter regerror.c
+
+rm.h:	regmagic.h splitter
+	splitter regmagic.h
+
+re.h:	regexp.h splitter
+	splitter regexp.h
+
+PLAIN=COPYRIGHT README Makefile regexp.3 try.c timer.c tests
+FIX=regexp.h regexp.c regsub.c regerror.c regmagic.h
+DTR=$(PLAIN) $(FIX)
+
+dtr:	r $(DTR)
+	rm -rf $(TMP)
+	mkdir $(TMP)
+	cp $(PLAIN) $(TMP)
+	for f in $(FIX) ; do normalize $$f >$(TMP)/$$f ; done
+	( cd $(TMP) ; makedtr $(DTR) ) >$@
+	rm -rf $(TMP)
+
+ch.ps:	ch Makefile $(BITS)
+	groff -Tps $(OPT) ch >$@
+
+copy:	ch.soe ch.sml fp
+	makedtr REMARKS ch.sml fig*.ps ch.soe >$@
+
+go:	copy dtr
diff --git a/scsh/regexp/README b/scsh/regexp/README
index a18c796..bcb9cf5 100644
--- a/scsh/regexp/README
+++ b/scsh/regexp/README
@@ -1,55 +1,37 @@
-This is a nearly-public-domain reimplementation of the V8 regexp(3) package.
+This is a revision of my well-known regular-expression package, regexp(3).
 It gives C programs the ability to use egrep-style regular expressions, and
 does it in a much cleaner fashion than the analogous routines in SysV.
- 
-	Copyright (c) 1986 by University of Toronto.
-	Written by Henry Spencer.  Not derived from licensed software.
- 
-	Permission is granted to anyone to use this software for any
-	purpose on any computer system, and to redistribute it freely,
-	subject to the following restrictions:
- 
-	1. The author is not responsible for the consequences of use of
-		this software, no matter how awful, even if they arise
-		from defects in it.
- 
-	2. The origin of this software must not be misrepresented, either
-		by explicit claim or by omission.
- 
-	3. Altered versions must be plainly marked as such, and must not
-		be misrepresented as being the original software.
- 
-Barring a couple of small items in the BUGS list, this implementation is
-believed 100% compatible with V8.  It should even be binary-compatible,
-sort of, since the only fields in a "struct regexp" that other people have
-any business touching are declared in exactly the same way at the same
-location in the struct (the beginning).
- 
-This implementation is *NOT* AT&T/Bell code, and is not derived from licensed
+It is not, alas, fully POSIX.2-compliant; that is hard.  (I'm working on
+a full reimplementation that will do that.)
+
+This version is the one which is examined and explained in one chapter of
+"Software Solutions in C" (Dale Schumacher, ed.; AP Professional 1994;
+ISBN 0-12-632360-7), plus a couple of insignificant updates, plus one
+significant bug fix (done 10 Nov 1995).
+
+Although this package was inspired by the Bell V8 regexp(3), this
+implementation is *NOT* AT&T/Bell code, and is not derived from licensed
 software.  Even though U of T is a V8 licensee.  This software is based on
 a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed
 here is a complete rewrite and hence is not covered by AT&T copyright).
-The software was nearly complete at the time of arrival of our V8 tape.
-I haven't even looked at V8 yet, although a friend elsewhere at U of T has
-been kind enough to run a few test programs using the V8 regexp(3) to resolve
-a few fine points.  I admit to some familiarity with regular-expression
-implementations of the past, but the only one that this code traces any
-ancestry to is the one published in Kernighan & Plauger (from which this
-one draws ideas but not code).
- 
-Simplistically:  put this stuff into a source directory, copy regexp.h into
-/usr/include, inspect Makefile for compilation options that need changing
-to suit your local environment, and then do "make r".  This compiles the
-regexp(3) functions, compiles a test program, and runs a large set of
-regression tests.  If there are no complaints, then put regexp.o, regsub.o,
-and regerror.o into your C library, and regexp.3 into your manual-pages
-directory.
- 
-Note that if you don't put regexp.h into /usr/include *before* compiling,
-you'll have to add "-I." to CFLAGS before compiling.
- 
+I admit to some familiarity with regular-expression implementations of
+the past, but the only one that this code traces any ancestry to is the
+one published in Kernighan & Plauger's "Software Tools" (from which
+this one draws ideas but not code).
+
+Simplistically:  put this stuff into a source directory, inspect Makefile
+for compilation options that need changing to suit your local environment,
+and then do "make".  This compiles the regexp(3) functions, builds a
+library containing them, compiles a test program, and runs a large set of
+regression tests.  If there are no complaints, then put regexp.h into
+/usr/include, add regexp.o, regsub.o, and regerror.o into your C library
+(or put libre.a into /usr/lib), and install regexp.3 (perhaps with slight
+modifications) in your manual-pages directory. 
+
 The files are:
- 
+
+COPYRIGHT	copyright notice
+README		this text
 Makefile	instructions to make everything
 regexp.3	manual page
 regexp.h	header file, for /usr/include
@@ -60,24 +42,15 @@ regmagic.h	internal header file
 try.c		source for test program
 timer.c		source for timing program
 tests		test list for try and timer
- 
+
 This implementation uses nondeterministic automata rather than the
 deterministic ones found in some other implementations, which makes it
 simpler, smaller, and faster at compiling regular expressions, but slower
-at executing them.  In theory, anyway.  This implementation does employ
-some special-case optimizations to make the simpler cases (which do make
-up the bulk of regular expressions actually used) run quickly.  In general,
-if you want blazing speed you're in the wrong place.  Replacing the insides
-of egrep with this stuff is probably a mistake; if you want your own egrep
-you're going to have to do a lot more work.  But if you want to use regular
-expressions a little bit in something else, you're in luck.  Note that many
-existing text editors use nondeterministic regular-expression implementations,
-so you're in good company.
- 
-This stuff should be pretty portable, given appropriate option settings.
-If your chars have less than 8 bits, you're going to have to change the
-internal representation of the automaton, although knowledge of the details
-of this is fairly localized.  There are no "reserved" char values except for
+at executing them.  Many users have found the speed perfectly adequate,
+although replacing the insides of egrep with this code would be a mistake.
+
+This stuff should be pretty portable, given an ANSI C compiler and
+appropriate option settings.  There are no "reserved" char values except for
 NUL, and no special significance is attached to the top bit of chars.
 The string(3) functions are used a fair bit, on the grounds that they are
 probably faster than coding the operations in line.  Some attempts at code
diff --git a/scsh/regexp/patch-msg b/scsh/regexp/patch-msg
new file mode 100644
index 0000000..36a7ff9
--- /dev/null
+++ b/scsh/regexp/patch-msg
@@ -0,0 +1,803 @@
+Date: Mon, 1 Jul 1996 23:22:47 GMT
+From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
+To: shivers@lcs.mit.edu, bdc@ai.mit.edu
+Subject: scsh patch for precompiled regexps..
+
+I meant to send this out months ago but I was just too hosed with work.
+
+Here's what I have right now:
+
+There are three pieces here:
+	diffs to the "core" scsh
+	diffs to Henry Spencer's latest regexp library
+	a copy of Henry Spencer's latest regexp library..
+
+It appears to work (it passes the same regression tests as the C library..).
+
+Let me know if I didn't include something needed for this to work..
+
+				- Bill
+
+diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
+*** scsh-0.4.2/scsh/re.scm	Fri Oct 27 04:58:56 1995
+--- scsh-0.4.2-regexp/scsh/re.scm	Sat Apr  6 21:07:41 1996
+***************
+*** 34,49 ****
+  
+  ;;; Bogus stub definitions for low-level match routines:
+  
+! (define regexp? string?)
+! (define (make-regexp str) str)
+  
+! (define (regexp-exec regexp str . maybe-start)
+    (let ((start (optional-arg maybe-start 0))
+  	(start-vec (make-vector 10))
+  	(end-vec (make-vector 10)))
+!     (and (%regexp-match regexp str start start-vec end-vec)
+! 	 (make-regexp-match str start-vec end-vec))))
+! 
+  
+  ;;; Convert a string into a regex pattern that matches that string exactly --
+  ;;; in other words, quote the special chars with backslashes.
+--- 34,53 ----
+  
+  ;;; Bogus stub definitions for low-level match routines:
+  
+! (define-record iregexp
+!   string)
+  
+! (define regexp? iregexp?)
+! 
+! (define (make-regexp str) 
+!   (make-iregexp (compile-regexp str)))
+! 
+! (define (regexp-exec r s . maybe-start)
+    (let ((start (optional-arg maybe-start 0))
+  	(start-vec (make-vector 10))
+  	(end-vec (make-vector 10)))
+!     (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
+! 	 (make-regexp-match s start-vec end-vec))))
+  
+  ;;; Convert a string into a regex pattern that matches that string exactly --
+  ;;; in other words, quote the special chars with backslashes.
+***************
+*** 58,75 ****
+  		  (cons #\\ result)
+  		  result))))))
+  
+! (define-foreign %regexp-match/errno (reg_match (string regexp)
+! 					       (string s)
+! 					       (integer start)
+! 					       (vector-desc start-vec)
+! 					       (vector-desc end-vec))
+!   static-string ; Error string or #f if all is ok.
+!   bool)		; match?
+! 
+! (define (%regexp-match regexp string start start-vec end-vec)
+!   (receive (err match?) (%regexp-match/errno regexp string start
+! 					     start-vec end-vec)
+!     (if err (error err %regexp-match regexp string start) match?)))
+  
+  
+  ;;; I do this one in C, I'm not sure why:
+--- 62,79 ----
+  		  (cons #\\ result)
+  		  result))))))
+  
+! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
+! ;;;					       (string s)
+! ;;;					       (integer start)
+! ;;;					       (vector-desc start-vec)
+! ;;;					       (vector-desc end-vec))
+! ;;;  static-string ; Error string or #f if all is ok.
+! ;;;  bool)		; match?
+! 
+! ;;;(define (%regexp-match regexp string start start-vec end-vec)
+! ;;;  (receive (err match?) (%regexp-match/errno regexp string start
+! ;;;					     start-vec end-vec)
+! ;;;    (if err (error err %regexp-match regexp string start) match?)))
+  
+  
+  ;;; I do this one in C, I'm not sure why:
+***************
+*** 79,81 ****
+--- 83,166 ----
+    (filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
+    static-string	; error message -- #f if no error.
+    integer)	; number of files that pass the filter.
++ 
++ ;;; precompiled regexps.
++ 
++ (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
++   static-string
++   integer)
++ 
++ (define-foreign %regexp-compile (reg_comp_comp (string regexp)
++ 					       (string-desc re-buf))
++   static-string)
++ 
++ (define (%regexp-exec-1 r s start sv ev)
++   (receive (err match?) (%regexp-exec r s start sv ev)
++ 	   (if err (error err s start)
++ 	       match?)))
++ 
++ (define-foreign %regexp-exec (reg_exec (string-desc regexp)
++ 				       (string s)
++ 				       (integer start)
++ 				       (vector-desc start-vec)
++ 				       (vector-desc end-vec))
++   static-string
++   bool)
++ 
++ 
++ (define (compile-regexp e)
++   (receive (err len)
++ 	   (%regexp-compiled-length e)
++ 	   (if err (error err e)
++ 	       (let ((buf (make-string len)))
++ 		 (%regexp-compile e buf)
++ 		 buf))))
++ 
++ 
++ 
++ (define-foreign %regexp-subst (reg_subst (string-desc regexp)
++ 					 (string m)
++ 					 (string s)
++ 					 (integer start)
++ 					 (vector-desc start-vec)
++ 					 (vector-desc end-vec)
++ 					 (string-desc outbuf))
++   static-string
++   integer)
++ 
++ (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
++ 						 (string m)
++ 						 (string s)
++ 						 (integer start)
++ 						 (vector-desc start-vec)
++ 						 (vector-desc end-vec))
++   static-string
++   integer)
++ 
++ 
++ (define (regexp-subst re match replacement)
++   (let ((cr (iregexp:string re))
++ 	(matchstr (regexp-match:string match))
++ 	(startvec (regexp-match:start match))
++ 	(endvec (regexp-match:end match)))
++     (receive (err outlen)
++ 	     (%regexp-subst-len cr
++ 				matchstr
++ 				replacement
++ 				0
++ 				startvec
++ 				endvec)
++ 	     (if err (error err matchstr replacement)
++ 		 (let ((outbuf (make-string outlen)))
++ 		   (receive (err outlen)
++ 			    (%regexp-subst cr
++ 					   matchstr
++ 					   replacement
++ 					   0
++ 					   startvec
++ 					   endvec
++ 					   outbuf)
++ 			    (if err (error err matchstr replacement)
++ 				(substring outbuf 0 outlen))))))))
++ 
++ 		   
+\ No newline at end of file
+diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
+*** scsh-0.4.2/scsh/re1.c	Fri Oct 27 04:58:58 1995
+--- scsh-0.4.2-regexp/scsh/re1.c	Sat Apr  6 21:01:15 1996
+***************
+*** 19,24 ****
+--- 19,150 ----
+  /* Stash error msg in global. */
+  void regerror(char *msg) {regexp_error = msg;}
+  
++ /*
++ ** Return NULL normally, error string on error.
++ ** Stash number of bytes needed for compiled regexp into `*len'
++ */
++ 
++ char *reg_comp_len(const char *re, int *len)
++ {
++     int l;
++ 
++     regexp_error = NULL;
++     *len = regcomp_len(re); 
++     return regexp_error;
++ }
++ 
++ /*
++ ** Return NULL normally, error string on error.
++ ** Compile regexp into string described by `cr'.
++ */
++ 
++ char *reg_comp_comp(const char *re, scheme_value cr) 
++ {
++     int len = STRING_LENGTH(cr);
++     regexp *r = (regexp *)&STRING_REF(cr, 0);
++ 
++     regexp_error = NULL;
++     r = regcomp_comp(re, r, len); 
++     return regexp_error;
++ }
++ 
++ /* Return NULL normally, error string on error.
++ ** Stash match info in start_vec and end_vec.
++ ** Returns boolean match/no-match in hit.
++ */
++ 
++ char *reg_exec(scheme_value cr, const char *string, int start,
++ 	       scheme_value start_vec, scheme_value end_vec,  int *hit)
++ {
++     regexp *r = (regexp *)&STRING_REF(cr, 0);
++ 
++     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
++ 	return "Illegal start vector";
++ 	}
++     
++     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
++ 	return "Illegal end vector";
++ 	}
++ 
++     regexp_error = 0;
++     *hit = 0;
++     
++     if( regexec(r, string+start) ) {
++ 	int i;
++ 	for(i=0; i<NSUBEXP; i++) {
++ 	    const char *s = r->startp[i];
++ 	    const char *e = r->endp[i];
++ 	    VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
++ 	    VECTOR_REF(end_vec,i)   = e?ENTER_FIXNUM(e - string):SCHFALSE;
++ 	    r->startp[i] = NULL;
++ 	    r->endp[i] = NULL;
++ 	    }
++ 	*hit = 1;
++ 	}
++     return regexp_error;
++ }
++ 
++ char *reg_subst(scheme_value cr, const char *match,
++ 		const char *src, int start,
++ 		scheme_value start_vec, scheme_value end_vec,
++ 		scheme_value outbuf, int *len)
++ {
++     int i;
++     regexp *r = (regexp *)&STRING_REF(cr, 0);
++ 
++     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
++ 	return "Illegal start vector";
++ 	}
++     
++     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
++ 	return "Illegal end vector";
++ 	}
++ 
++     for (i=0; i<NSUBEXP; i++) 
++     {
++ 	scheme_value se = VECTOR_REF(start_vec, i);
++ 	scheme_value ee = VECTOR_REF(end_vec, i);
++ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
++ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
++     }
++     
++     regexp_error = NULL;
++     regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
++     *len = strlen(&STRING_REF(outbuf, 0));
++     return regexp_error;
++ }
++ 
++ char *reg_subst_len(scheme_value cr, const char *match,
++ 		    const char *src, int start,
++ 		    scheme_value start_vec, scheme_value end_vec,
++ 		    int *len)
++ {
++     int i;
++     regexp *r = (regexp *)&STRING_REF(cr, 0);
++ 
++     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
++ 	return "Illegal start vector";
++ 	}
++     
++     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
++ 	return "Illegal end vector";
++ 	}
++ 
++     for (i=0; i<NSUBEXP; i++) 
++     {
++ 	scheme_value se = VECTOR_REF(start_vec, i);
++ 	scheme_value ee = VECTOR_REF(end_vec, i);
++ 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
++ 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
++     }
++     
++     regexp_error = NULL;
++     *len = regsublen (r, src);
++     return regexp_error;
++ }
++ 
++ 
++ #if 0
+  /* Return NULL normally, error string on error.
+  ** Stash match info in start_vec and end_vec.
+  ** Returns boolean match/no-match in hit.
+***************
+*** 56,61 ****
+--- 182,188 ----
+      Free(prog);
+      return regexp_error;
+      }
++ #endif
+  
+  
+  char *filter_stringvec(const char *re, char const **stringvec,  int *nummatch)
+diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
+*** scsh-0.4.2/scsh/re1.h	Sun Oct 22 08:34:34 1995
+--- scsh-0.4.2-regexp/scsh/re1.h	Sat Apr  6 17:54:09 1996
+***************
+*** 1,6 ****
+--- 1,21 ----
++ #if 0
+  char *reg_match(const char *re, const char *string, int start,
+  		scheme_value start_vec, scheme_value end_vec,
+  		int *hit);
++ #endif
+  
+  char *filter_stringvec(const char *re, char const **stringvec,
+  		       int *nummatch);
++ 
++ char *reg_comp_len(const char *re, int *len);
++ char *reg_comp_comp(const char *re, scheme_value cr);
++ 
++ char *reg_exec(scheme_value cr, const char *string, int start,
++ 	       scheme_value start_vec, scheme_value end_vec,  int *hit);
++ 
++ char *reg_subst(scheme_value cr, const char *match,
++ 		const char *src, int start,
++ 		scheme_value start_vec, scheme_value end_vec,
++ 		scheme_value outbuf, int *len);
++ 
++ 
+
+Only in scsh-0.4.2-regexp/scsh: re2.scm
+diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
+*** scsh-0.4.2/scsh/scsh-interfaces.scm	Tue Oct 31 19:19:30 1995
+--- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm	Sat Apr  6 18:48:12 1996
+***************
+*** 413,418 ****
+--- 413,419 ----
+  	  make-regexp
+  	  regexp?
+  	  regexp-exec
++ 	  regexp-subst
+  	  regexp-quote))
+  
+  
+
+regexp library changes:
+
+*** Makefile	1996/04/06 19:24:49	1.1
+--- Makefile	1996/04/06 20:46:26
+***************
+*** 5,11 ****
+  # Things you might want to put in TEST:
+  # -DDEBUG		debugging hooks
+  # -I.			regexp.h from current directory, not /usr/include
+! TEST=-I.
+  
+  # Things you might want to put in PROF:
+  # -pg			profiler
+--- 5,11 ----
+  # Things you might want to put in TEST:
+  # -DDEBUG		debugging hooks
+  # -I.			regexp.h from current directory, not /usr/include
+! TEST=-I. -DDEBUG
+  
+  # Things you might want to put in PROF:
+  # -pg			profiler
+*** regexp.c	1996/04/06 19:24:49	1.1
+--- regexp.c	1996/04/06 22:34:55
+***************
+*** 105,110 ****
+--- 105,111 ----
+   * Utility definitions.
+   */
+  #define	FAIL(m)		{ regerror(m); return(NULL); }
++ #define	FAILN(m)	{ regerror(m); return(-1); }
+  #define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
+  #define	META		"^$.[()|?+*\\"
+  
+***************
+*** 162,173 ****
+  const char *exp;
+  {
+  	register regexp *r;
+! 	register char *scan;
+  	int flags;
+  	struct comp co;
+  
+  	if (exp == NULL)
+! 		FAIL("NULL argument to regcomp");
+  
+  	/* First pass: determine size, legality. */
+  	co.regparse = (char *)exp;
+--- 163,193 ----
+  const char *exp;
+  {
+  	register regexp *r;
+! 	size_t len;
+! 
+! 	len = regcomp_len(exp);
+! 	if (len <= 0)
+! 	        return NULL;
+! 
+! 	/* Allocate space. */
+! 	r = (regexp *)malloc(len);
+! 
+! 	if (r == NULL)
+! 		FAIL("out of space");
+! 	return regcomp_comp(exp, r, len);
+! }
+! 
+! 
+! size_t
+! regcomp_len(exp)
+! const char *exp;
+! {
+  	int flags;
++ 	register regexp *r;
+  	struct comp co;
+  
+  	if (exp == NULL)
+! 		FAILN("NULL argument to regcomp");
+  
+  	/* First pass: determine size, legality. */
+  	co.regparse = (char *)exp;
+***************
+*** 178,198 ****
+  	co.regcode = co.regdummy;
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+! 		return(NULL);
+  
+  	/* Small enough for pointer-storage convention? */
+  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
+! 		FAIL("regexp too big");
+  
+! 	/* Allocate space. */
+! 	r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
+! 	if (r == NULL)
+! 		FAIL("out of space");
+  
+  	/* Second pass: emit code. */
+  	co.regparse = (char *)exp;
+  	co.regnpar = 1;
+  	co.regcode = r->program;
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+  		return(NULL);
+--- 198,228 ----
+  	co.regcode = co.regdummy;
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+! 		return -1;
+  
+  	/* Small enough for pointer-storage convention? */
+  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
+! 		FAILN("regexp too big");
+  
+! 	return (sizeof(regexp) + (size_t)co.regsize);
+! }
+! 
+! 
+! regexp *
+! regcomp_comp(exp, r, len)
+! const char *exp;
+! register regexp *r;
+! size_t len;
+! {
+! 	register char *scan;
+! 	int flags;
+! 	struct comp co;
+  
+  	/* Second pass: emit code. */
+  	co.regparse = (char *)exp;
+  	co.regnpar = 1;
+  	co.regcode = r->program;
++ 	co.regsize = len - sizeof(regexp);
+  	regc(&co, MAGIC);
+  	if (reg(&co, 0, &flags) == NULL)
+  		return(NULL);
+***************
+*** 200,206 ****
+  	/* Dig out information for optimizations. */
+  	r->regstart = '\0';		/* Worst-case defaults. */
+  	r->reganch = 0;
+! 	r->regmust = NULL;
+  	r->regmlen = 0;
+  	scan = r->program+1;		/* First BRANCH. */
+  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
+--- 230,236 ----
+  	/* Dig out information for optimizations. */
+  	r->regstart = '\0';		/* Worst-case defaults. */
+  	r->reganch = 0;
+! 	r->regmust = 0;
+  	r->regmlen = 0;
+  	scan = r->program+1;		/* First BRANCH. */
+  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
+***************
+*** 229,235 ****
+  					longest = OPERAND(scan);
+  					len = strlen(OPERAND(scan));
+  				}
+! 			r->regmust = longest;
+  			r->regmlen = (int)len;
+  		}
+  	}
+--- 259,265 ----
+  					longest = OPERAND(scan);
+  					len = strlen(OPERAND(scan));
+  				}
+! 			r->regmust = longest - r->program;
+  			r->regmlen = (int)len;
+  		}
+  	}
+***************
+*** 648,655 ****
+  struct exec {
+  	char *reginput;		/* String-input pointer. */
+  	char *regbol;		/* Beginning of input, for ^ check. */
+! 	char **regstartp;	/* Pointer to startp array. */
+! 	char **regendp;		/* Ditto for endp. */
+  };
+  
+  /*
+--- 678,685 ----
+  struct exec {
+  	char *reginput;		/* String-input pointer. */
+  	char *regbol;		/* Beginning of input, for ^ check. */
+! 	const char **regstartp;	/* Pointer to startp array. */
+! 	const char **regendp;		/* Ditto for endp. */
+  };
+  
+  /*
+***************
+*** 690,696 ****
+  	}
+  
+  	/* If there is a "must appear" string, look for it. */
+! 	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
+  		return(0);
+  
+  	/* Mark beginning of line for ^ . */
+--- 720,727 ----
+  	}
+  
+  	/* If there is a "must appear" string, look for it. */
+! 	if ((prog->regmlen > 0) &&
+! 	    strstr(string, &prog->program[prog->regmust]) == NULL)
+  		return(0);
+  
+  	/* Mark beginning of line for ^ . */
+***************
+*** 729,736 ****
+  char *string;
+  {
+  	register int i;
+! 	register char **stp;
+! 	register char **enp;
+  
+  	ep->reginput = string;
+  
+--- 760,767 ----
+  char *string;
+  {
+  	register int i;
+! 	register const char **stp;
+! 	register const char **enp;
+  
+  	ep->reginput = string;
+  
+***************
+*** 1004,1011 ****
+  		printf("start `%c' ", r->regstart);
+  	if (r->reganch)
+  		printf("anchored ");
+! 	if (r->regmust != NULL)
+! 		printf("must have \"%s\"", r->regmust);
+  	printf("\n");
+  }
+  
+--- 1035,1042 ----
+  		printf("start `%c' ", r->regstart);
+  	if (r->reganch)
+  		printf("anchored ");
+! 	if (r->regmlen > 0)
+! 		printf("must have \"%s\"", &r->program[r->regmust]);
+  	printf("\n");
+  }
+  
+*** regexp.h	1996/04/06 19:24:49	1.1
+--- regexp.h	1996/04/07 01:52:19
+***************
+*** 6,16 ****
+   */
+  #define NSUBEXP  10
+  typedef struct regexp {
+! 	char *startp[NSUBEXP];
+! 	char *endp[NSUBEXP];
+  	char regstart;		/* Internal use only. */
+  	char reganch;		/* Internal use only. */
+! 	char *regmust;		/* Internal use only. */
+  	int regmlen;		/* Internal use only. */
+  	char program[1];	/* Unwarranted chumminess with compiler. */
+  } regexp;
+--- 6,16 ----
+   */
+  #define NSUBEXP  10
+  typedef struct regexp {
+! 	const char *startp[NSUBEXP];
+! 	const char *endp[NSUBEXP];
+  	char regstart;		/* Internal use only. */
+  	char reganch;		/* Internal use only. */
+! 	int regmust;		/* Internal use only. */
+  	int regmlen;		/* Internal use only. */
+  	char program[1];	/* Unwarranted chumminess with compiler. */
+  } regexp;
+***************
+*** 18,21 ****
+--- 18,27 ----
+  extern regexp *regcomp(const char *re);
+  extern int regexec(regexp *rp, const char *s);
+  extern void regsub(const regexp *rp, const char *src, char *dst);
++ extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
++ extern size_t regsublen(const regexp *rp, const char *src);
++ 
+  extern void regerror(char *message);
++ extern size_t regcomp_len(const char *exp);
++ extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
++ 
+*** regsub.c	1996/04/06 19:24:49	1.1
+--- regsub.c	1996/04/07 02:10:29
+***************
+*** 11,25 ****
+  /*
+   - regsub - perform substitutions after a regexp match
+   */
+  void
+! regsub(rp, source, dest)
+  const regexp *rp;
+  const char *source;
+  char *dest;
+  {
+  	register regexp * const prog = (regexp *)rp;
+! 	register char *src = (char *)source;
+  	register char *dst = dest;
+  	register char c;
+  	register int no;
+  	register size_t len;
+--- 11,42 ----
+  /*
+   - regsub - perform substitutions after a regexp match
+   */
++ 
++ void regsub(rp, source, dest)
++ const regexp *rp;
++ const char *source;
++ char *dest;
++ {
++         regnsub(rp, source, dest, BUFSIZ);
++ }
++ 
++ 
++ 
++ /*
++  - regnsub - perform bounds-checked substitutions after a regexp match
++  */
+  void
+! regnsub(rp, source, dest, destlen)
+  const regexp *rp;
+  const char *source;
+  char *dest;
++ size_t destlen;
+  {
+  	register regexp * const prog = (regexp *)rp;
+! 	register const char *src = (char *)source;
+  	register char *dst = dest;
++ 	char *dstend = dest + destlen;
++ 	char *odst;
+  	register char c;
+  	register int no;
+  	register size_t len;
+***************
+*** 45,55 ****
+  			if (c == '\\' && (*src == '\\' || *src == '&'))
+  				c = *src++;
+  			*dst++ = c;
+  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
+! 					prog->endp[no] > prog->startp[no]) {
+  			len = prog->endp[no] - prog->startp[no];
+! 			(void) strncpy(dst, prog->startp[no], len);
+  			dst += len;
+  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
+  				regerror("damaged match string");
+  				return;
+--- 62,83 ----
+  			if (c == '\\' && (*src == '\\' || *src == '&'))
+  				c = *src++;
+  			*dst++ = c;
++ 			if (dst >= dstend) 
++ 			{
++ 			    	regerror("output buffer too small");
++ 				return;
++ 			}
+  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
+! 			   prog->endp[no] > prog->startp[no]) {
+  			len = prog->endp[no] - prog->startp[no];
+! 			odst = dst;
+  			dst += len;
++ 			if (dst >= dstend) 
++ 			{
++ 			    	regerror("output buffer too small");
++ 				return;
++ 			}
++ 			(void) strncpy(odst, prog->startp[no], len);
+  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
+  				regerror("damaged match string");
+  				return;
+***************
+*** 58,60 ****
+--- 86,131 ----
+  	}
+  	*dst++ = '\0';
+  }
++ 
++ size_t regsublen(rp, source)
++ const regexp *rp;
++ const char *source;
++ {
++     register regexp * const prog = (regexp *)rp;
++     register char *src = (char *)source;
++     register char c;
++     register int no;
++     register int len = 0;
++ 	
++     if (prog == NULL || source == NULL) {
++ 	regerror("NULL parameter to regsublen");
++ 	return -1;
++     }
++     
++     if ((unsigned char)*(prog->program) != MAGIC) {
++ 	regerror("damaged regexp");
++ 	return -1;
++     }
++     while ((c = *src++) != '\0') {
++ 	if (c == '&')
++ 	    no = 0;
++ 	else if (c == '\\' && isdigit(*src))
++ 	    no = *src++ - '0';
++ 	else
++ 	    no = -1;
++ 	if (no < 0) {		/* Ordinary character. */
++ 	    if (c == '\\' && (*src == '\\' || *src == '&'))
++ 		src++;
++ 	    len++;
++ 	} else {
++ 	    const char *s = prog->startp[no];
++ 	    const char *e = prog->endp[no];
++ 	    if ((s != NULL) && (e != NULL) && (e > s)) {
++ 		len += e-s;
++ 	    }
++ 	}
++     }
++     return len+1;
++ }
++ 
++ 
+
+Original regexp code from henry:
+[unpacked & deleted -Olin]
diff --git a/scsh/regexp/regerror.c b/scsh/regexp/regerror.c
index 2d682c2..a280cee 100644
--- a/scsh/regexp/regerror.c
+++ b/scsh/regexp/regerror.c
@@ -1,14 +1,18 @@
+/*
+ * regerror
+ */
 #include <stdio.h>
- 
-void 
+#include <stdlib.h>
+
+void
 regerror(s)
-    char           *s;
+char *s;
 {
 #ifdef ERRAVAIL
-    error("regexp: %s", s);
+	error("regexp: %s", s);
 #else
-    fprintf(stderr, "regexp(3): %s", s);
-    exit(1);
+	fprintf(stderr, "regexp(3): %s\n", s);
+	exit(EXIT_FAILURE);
 #endif
-    /* NOTREACHED */
+	/* NOTREACHED */
 }
diff --git a/scsh/regexp/regexp.3 b/scsh/regexp/regexp.3
new file mode 100644
index 0000000..6d2555b
--- /dev/null
+++ b/scsh/regexp/regexp.3
@@ -0,0 +1,186 @@
+.TH REGEXP 3 "2 Sept 1995"
+.SH NAME
+regcomp, regexec, regsub, regerror \- regular expression handler
+.SH SYNOPSIS
+.ft B
+.nf
+#include <regexp.h>
+
+regexp *regcomp(exp)
+const char *exp;
+
+int regexec(prog, string)
+regexp *prog;
+const char *string;
+
+void regsub(prog, source, dest)
+const regexp *prog;
+const char *source;
+char *dest;
+
+void regerror(msg)
+char *msg;
+.SH DESCRIPTION
+These functions implement
+.IR egrep (1)-style
+regular expressions and supporting facilities.
+.PP
+.I Regcomp
+compiles a regular expression into a structure of type
+.IR regexp ,
+and returns a pointer to it.
+The space has been allocated using
+.IR malloc (3)
+and may be released by
+.IR free .
+.PP
+.I Regexec
+matches a NUL-terminated \fIstring\fR against the compiled regular expression
+in \fIprog\fR.
+It returns 1 for success and 0 for failure, and adjusts the contents of
+\fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly.
+.PP
+The members of a
+.I regexp
+structure include at least the following (not necessarily in order):
+.PP
+.RS
+char *startp[NSUBEXP];
+.br
+char *endp[NSUBEXP];
+.RE
+.PP
+where
+.I NSUBEXP
+is defined (as 10) in the header file.
+Once a successful \fIregexec\fR has been done using the \fIregexp\fR,
+each \fIstartp\fR-\fIendp\fR pair describes one substring
+within the \fIstring\fR,
+with the \fIstartp\fR pointing to the first character of the substring and
+the \fIendp\fR pointing to the first character following the substring.
+The 0th substring is the substring of \fIstring\fR that matched the whole
+regular expression.
+The others are those substrings that matched parenthesized expressions
+within the regular expression, with parenthesized expressions numbered
+in left-to-right order of their opening parentheses.
+.PP
+.I Regsub
+copies \fIsource\fR to \fIdest\fR, making substitutions according to the
+most recent \fIregexec\fR performed using \fIprog\fR.
+Each instance of `&' in \fIsource\fR is replaced by the substring
+indicated by \fIstartp\fR[\fI0\fR] and
+\fIendp\fR[\fI0\fR].
+Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by
+the substring indicated by
+\fIstartp\fR[\fIn\fR] and
+\fIendp\fR[\fIn\fR].
+To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e';
+to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with
+another `\e'.
+.PP
+.I Regerror
+is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR,
+or \fIregsub\fR.
+The default \fIregerror\fR writes the string \fImsg\fR,
+with a suitable indicator of origin,
+on the standard
+error output
+and invokes \fIexit\fR(2).
+.I Regerror
+can be replaced by the user if other actions are desirable.
+.SH "REGULAR EXPRESSION SYNTAX"
+A regular expression is zero or more \fIbranches\fR, separated by `|'.
+It matches anything that matches one of the branches.
+.PP
+A branch is zero or more \fIpieces\fR, concatenated.
+It matches a match for the first, followed by a match for the second, etc.
+.PP
+A piece is an \fIatom\fR possibly followed by `*', `+', or `?'.
+An atom followed by `*' matches a sequence of 0 or more matches of the atom.
+An atom followed by `+' matches a sequence of 1 or more matches of the atom.
+An atom followed by `?' matches a match of the atom, or the null string.
+.PP
+An atom is a regular expression in parentheses (matching a match for the
+regular expression), a \fIrange\fR (see below), `.'
+(matching any single character), `^' (matching the null string at the
+beginning of the input string), `$' (matching the null string at the
+end of the input string), a `\e' followed by a single character (matching
+that character), or a single character with no other significance
+(matching that character).
+.PP
+A \fIrange\fR is a sequence of characters enclosed in `[]'.
+It normally matches any single character from the sequence.
+If the sequence begins with `^',
+it matches any single character \fInot\fR from the rest of the sequence.
+If two characters in the sequence are separated by `\-', this is shorthand
+for the full list of ASCII characters between them
+(e.g. `[0-9]' matches any decimal digit).
+To include a literal `]' in the sequence, make it the first character
+(following a possible `^').
+To include a literal `\-', make it the first or last character.
+.SH AMBIGUITY
+If a regular expression could match two different parts of the input string,
+it will match the one which begins earliest.
+If both begin in the same place but match different lengths, or match
+the same length in different ways, life gets messier, as follows.
+.PP
+In general, the possibilities in a list of branches are considered in
+left-to-right order, the possibilities for `*', `+', and `?' are
+considered longest-first, nested constructs are considered from the
+outermost in, and concatenated constructs are considered leftmost-first.
+The match that will be chosen is the one that uses the earliest
+possibility in the first choice that has to be made.
+If there is more than one choice, the next will be made in the same manner
+(earliest possibility) subject to the decision on the first choice.
+And so forth.
+.PP
+For example, `(ab|a)b*c' could match `abc' in one of two ways.
+The first choice is between `ab' and `a'; since `ab' is earlier, and does
+lead to a successful overall match, it is chosen.
+Since the `b' is already spoken for,
+the `b*' must match its last possibility\(emthe empty string\(emsince
+it must respect the earlier choice.
+.PP
+In the particular case where the regular expression does not use `|'
+and does not apply `*', `+', or `?' to parenthesized subexpressions,
+the net effect is that the longest possible
+match will be chosen.
+So `ab*', presented with `xabbbby', will match `abbbb'.
+Note that if `ab*' is tried against `xabyabbbz', it
+will match `ab' just after `x', due to the begins-earliest rule.
+(In effect, the decision on where to start the match is the first choice
+to be made, hence subsequent choices must respect it even if this leads them
+to less-preferred alternatives.)
+.SH SEE ALSO
+egrep(1), expr(1)
+.SH DIAGNOSTICS
+\fIRegcomp\fR returns NULL for a failure
+(\fIregerror\fR permitting),
+where failures are syntax errors, exceeding implementation limits,
+or applying `+' or `*' to a possibly-null operand.
+.SH HISTORY
+This is a revised version.
+Both code and manual page were
+originally written by Henry Spencer at University of Toronto.
+They are intended to be compatible with the Bell V8 \fIregexp\fR(3),
+but are not derived from Bell code.
+.SH BUGS
+Empty branches and empty regular expressions are not portable
+to other, otherwise-similar, implementations.
+.PP
+The ban on
+applying `*' or `+' to a possibly-null operand is an artifact of the
+simplistic implementation.
+.PP
+The match-choice rules are complex.
+A simple ``longest match'' rule would be preferable,
+but is harder to implement.
+.PP
+Although there is a general similarity to POSIX.2 ``extended'' regular
+expressions, neither the regular-expression syntax nor the programming
+interface is an exact match.
+.PP
+Due to emphasis on
+compactness and simplicity,
+it's not strikingly fast.
+It does give some attention to handling simple cases quickly.
diff --git a/scsh/regexp/regexp.c b/scsh/regexp/regexp.c
index 5f7fa6d..65d7e88 100644
--- a/scsh/regexp/regexp.c
+++ b/scsh/regexp/regexp.c
@@ -1,45 +1,21 @@
 /*
- * regcomp and regexec -- regsub and regerror are elsewhere @(#)regexp.c 1.3
- * of 18 April 87 
- *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer.  Not
- * derived from licensed software. 
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions: 
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it. 
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission. 
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software. 
- *
- * Beware that some of this code is subtly aware of the way operator precedence
- * is structured in regular expressions.  Serious changes in
- * regular-expression syntax might require a total rethink. 
+ * regcomp and regexec -- regsub and regerror are elsewhere
  */
 #include <stdio.h>
-#ifdef AMIGA
-#undef min
-#include "regexp.h"
-#else
+#include <stdlib.h>
+#include <string.h>
 #include <regexp.h>
-#endif
 #include "regmagic.h"
- 
+
 /*
  * The "internal use only" fields in regexp.h are present to pass info from
  * compile to execute that permits the execute phase to run lots faster on
- * simple cases.  They are: 
+ * simple cases.  They are:
  *
- * regstart	char that must begin a match; '\0' if none obvious reganch
- * is the match anchored (at beginning-of-line only)? regmust	string
- * (pointer into program) that match must include, or NULL regmlen
- * length of regmust string 
+ * regstart	char that must begin a match; '\0' if none obvious
+ * reganch	is the match anchored (at beginning-of-line only)?
+ * regmust	string (pointer into program) that match must include, or NULL
+ * regmlen	length of regmust string
  *
  * Regstart and reganch permit very fast decisions on suitable starting points
  * for a match, cutting down the work a lot.  Regmust permits fast rejection
@@ -48,1164 +24,1101 @@
  * potentially expensive (at present, the only such thing detected is * or +
  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  * supplied because the test in regexec() needs it and regcomp() is computing
- * it anyway. 
+ * it anyway.
  */
- 
+
 /*
- * Structure for regexp "program".  This is essentially a linear encoding of
- * a nondeterministic finite-state machine (aka syntax charts or "railroad
- * normal form" in parsing technology).  Each node is an opcode plus a "next"
- * pointer, possibly plus an operand.  "Next" pointers of all nodes except
- * BRANCH implement concatenation; a "next" pointer with a BRANCH on both
- * ends of it is connecting two alternatives.	(Here we have one of the
- * subtle syntax dependencies:	an individual BRANCH (as opposed to a
- * collection of them) is never concatenated with anything because of
- * operator precedence.)  The operand of some types of node is a literal
- * string; for others, it is a node leading into a sub-FSM.  In particular,
- * the operand of a BRANCH node is the first node of the branch. (NB this is
- * *not* a tree structure:  the tail of the branch connects to the thing
- * following the set of BRANCHes.)  The opcodes are: 
+ * Structure for regexp "program".  This is essentially a linear encoding
+ * of a nondeterministic finite-state machine (aka syntax charts or
+ * "railroad normal form" in parsing technology).  Each node is an opcode
+ * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
+ * all nodes except BRANCH implement concatenation; a "next" pointer with
+ * a BRANCH on both ends of it is connecting two alternatives.  (Here we
+ * have one of the subtle syntax dependencies:  an individual BRANCH (as
+ * opposed to a collection of them) is never concatenated with anything
+ * because of operator precedence.)  The operand of some types of node is
+ * a literal string; for others, it is a node leading into a sub-FSM.  In
+ * particular, the operand of a BRANCH node is the first node of the branch.
+ * (NB this is *not* a tree structure:  the tail of the branch connects
+ * to the thing following the set of BRANCHes.)  The opcodes are:
  */
- 
+
 /* definition	number	opnd?	meaning */
-#define END	0		/* no	End of program. */
-#define BOL	1		/* no	Match "" at beginning of line. */
-#define EOL	2		/* no	Match "" at end of line. */
-#define ANY	3		/* no	Match any one character. */
-#define ANYOF	4		/* str	Match any character in this string. */
-#define ANYBUT	5		/* str	Match any character not in this
-				 * string. */
-#define BRANCH	6		/* node Match this alternative, or the
-				 * next... */
-#define BACK	7		/* no	Match "", "next" ptr points backward. */
-#define EXACTLY 8		/* str	Match this string. */
-#define NOTHING 9		/* no	Match empty string. */
-#define STAR	10		/* node Match this (simple) thing 0 or more
-				 * times. */
-#define PLUS	11		/* node Match this (simple) thing 1 or more
-				 * times. */
-#define OPEN	20		/* no	Mark this point in input as start of
-				 * #n. */
-/* OPEN+1 is number 1, etc. */
-#define CLOSE	30		/* no	Analogous to OPEN. */
- 
+#define	END	0	/* no	End of program. */
+#define	BOL	1	/* no	Match beginning of line. */
+#define	EOL	2	/* no	Match end of line. */
+#define	ANY	3	/* no	Match any character. */
+#define	ANYOF	4	/* str	Match any of these. */
+#define	ANYBUT	5	/* str	Match any but one of these. */
+#define	BRANCH	6	/* node	Match this, or the next..\&. */
+#define	BACK	7	/* no	"next" ptr points backward. */
+#define	EXACTLY	8	/* str	Match this string. */
+#define	NOTHING	9	/* no	Match empty string. */
+#define	STAR	10	/* node	Match this 0 or more times. */
+#define	PLUS	11	/* node	Match this 1 or more times. */
+#define	OPEN	20	/* no	Sub-RE starts here. */
+			/*	OPEN+1 is number 1, etc. */
+#define	CLOSE	30	/* no	Analogous to OPEN. */
+
 /*
- * Opcode notes: 
+ * Opcode notes:
  *
- * BRANCH	The set of branches constituting a single choice are hooked together
- * with their "next" pointers, since precedence prevents anything being
- * concatenated to any individual branch.  The "next" pointer of the last
- * BRANCH in a choice points to the thing following the whole choice.  This
- * is also where the final "next" pointer of each individual branch points;
- * each branch starts with the operand node of a BRANCH node. 
+ * BRANCH	The set of branches constituting a single choice are hooked
+ *		together with their "next" pointers, since precedence prevents
+ *		anything being concatenated to any individual branch.  The
+ *		"next" pointer of the last BRANCH in a choice points to the
+ *		thing following the whole choice.  This is also where the
+ *		final "next" pointer of each individual branch points; each
+ *		branch starts with the operand node of a BRANCH node.
  *
- * BACK 	Normal "next" pointers all implicitly point forward; BACK exists to
- * make loop structures possible. 
+ * BACK		Normal "next" pointers all implicitly point forward; BACK
+ *		exists to make loop structures possible.
  *
  * STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
- * BRANCH structures using BACK.  Simple cases (one character per match) are
- * implemented with STAR and PLUS for speed and to minimize recursive
- * plunges. 
+ *		BRANCH structures using BACK.  Simple cases (one character
+ *		per match) are implemented with STAR and PLUS for speed
+ *		and to minimize recursive plunges.
  *
- * OPEN,CLOSE	...are numbered at compile time. 
+ * OPEN,CLOSE	...are numbered at compile time.
  */
- 
+
 /*
  * A node is one char of opcode followed by two chars of "next" pointer.
  * "Next" pointers are stored as two 8-bit pieces, high order first.  The
- * value is a positive offset from the opcode of the node containing it. An
- * operand, if any, simply follows the node.  (Note that much of the code
- * generation knows about this implicit relationship.) 
+ * value is a positive offset from the opcode of the node containing it.
+ * An operand, if any, simply follows the node.  (Note that much of the
+ * code generation knows about this implicit relationship.)
  *
- * Using two bytes for the "next" pointer is vast overkill for most things, but
- * allows patterns to get big without disasters. 
+ * Using two bytes for the "next" pointer is vast overkill for most things,
+ * but allows patterns to get big without disasters.
  */
-#define OP(p)   (*(p))
-#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
-#define OPERAND(p)      ((p) + 3)
- 
+#define	OP(p)		(*(p))
+#define	NEXT(p)		(((*((p)+1)&0177)<<8) + (*((p)+2)&0377))
+#define	OPERAND(p)	((p) + 3)
+
 /*
- * See regmagic.h for one further detail of program structure. 
+ * See regmagic.h for one further detail of program structure.
  */
- 
- 
+
+
 /*
- * Utility definitions. 
+ * Utility definitions.
  */
-#ifndef CHARBITS
-#define UCHARAT(p)      ((int)*(unsigned char *)(p))
-#else
-#define UCHARAT(p)      ((int)*(p)&CHARBITS)
-#endif
- 
-#define FAIL(m) { regerror(m); return(NULL); }
-#define ISMULT(c)       ((c) == '*' || (c) == '+' || (c) == '?')
-#define META	"^$.[()|?+*\\"
- 
+#define	FAIL(m)		{ regerror(m); return(NULL); }
+#define	FAILN(m)	{ regerror(m); return(-1); }
+#define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
+#define	META		"^$.[()|?+*\\"
+
 /*
- * Flags to be passed up and down. 
+ * Flags to be passed up and down.
  */
-#define HASWIDTH	01	/* Known never to match null string. */
-#define SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
-#define SPSTART 	04	/* Starts with * or +. */
-#define WORST		0	/* Worst case. */
- 
+#define	HASWIDTH	01	/* Known never to match null string. */
+#define	SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
+#define	SPSTART		04	/* Starts with * or +. */
+#define	WORST		0	/* Worst case. */
+
 /*
- * Global work variables for regcomp(). 
+ * Work-variable struct for regcomp().
  */
-static char    *regparse;	/* Input-scan pointer. */
-static int      regnpar;	/* () count. */
-static char     regdummy;
-static char    *regcode;	/* Code-emit pointer; &regdummy = don't. */
-static long     regsize;	/* Code size. */
- 
+struct comp {
+	char *regparse;		/* Input-scan pointer. */
+	int regnpar;		/* () count. */
+	char *regcode;		/* Code-emit pointer; &regdummy = don't. */
+	char regdummy[3];	/* NOTHING, 0 next ptr */
+	long regsize;		/* Code size. */
+};
+#define	EMITTING(cp)	((cp)->regcode != (cp)->regdummy)
+
 /*
- * Forward declarations for regcomp()'s friends. 
+ * Forward declarations for regcomp()'s friends.
  */
-#ifndef STATIC
-#define STATIC	static
-#endif
-STATIC char    *reg();
-STATIC char    *regbranch();
-STATIC char    *regpiece();
-STATIC char    *regatom();
-STATIC char    *regnode();
-STATIC char    *regnext();
-STATIC void     regc();
-STATIC void     reginsert();
-STATIC void     regtail();
-STATIC void     regoptail();
-#ifdef STRCSPN
-int             strcspn();
-#endif
- 
+static char *reg(struct comp *cp, int paren, int *flagp);
+static char *regbranch(struct comp *cp, int *flagp);
+static char *regpiece(struct comp *cp, int *flagp);
+static char *regatom(struct comp *cp, int *flagp);
+static char *regnode(struct comp *cp, int op);
+static char *regnext(char *node);
+static void regc(struct comp *cp, int c);
+static void reginsert(struct comp *cp, int op, char *opnd);
+static void regtail(struct comp *cp, char *p, char *val);
+static void regoptail(struct comp *cp, char *p, char *val);
+
 /*
- * - regcomp - compile a regular expression into internal code 
+ - regcomp - compile a regular expression into internal code
  *
- * We can't allocate space until we know how big the compiled form will be, but
- * we can't compile it (and thus know how big it is) until we've got a place
- * to put the code.  So we cheat:  we compile it twice, once with code
+ * We can't allocate space until we know how big the compiled form will be,
+ * but we can't compile it (and thus know how big it is) until we've got a
+ * place to put the code.  So we cheat:  we compile it twice, once with code
  * generation turned off and size counting turned on, and once "for real".
  * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the code
- * and thus invalidate pointers into it.  (Note that it has to be in one
- * piece because free() must be able to free it all.) 
+ * thing really will compile successfully, and we never have to move the
+ * code and thus invalidate pointers into it.  (Note that it has to be in
+ * one piece because free() must be able to free it all.)
  *
- * Beware that the optimization-preparation code in here knows about some of the
- * structure of the compiled regexp. 
+ * Beware that the optimization-preparation code in here knows about some
+ * of the structure of the compiled regexp.
  */
-regexp         *
+regexp *
 regcomp(exp)
-    char           *exp;
+const char *exp;
 {
-    register regexp *r;
-    register char  *scan;
-    register char  *longest;
-    register int    len;
-    int             flags;
-    extern char    *malloc();
- 
-    if (exp == NULL)
-	FAIL("NULL argument");
- 
-    /* First pass: determine size, legality. */
-    regparse = exp;
-    regnpar = 1;
-    regsize = 0L;
-    regcode = &regdummy;
-    regc(MAGIC);
-    if (reg(0, &flags) == NULL)
-	return (NULL);
- 
-    /* Small enough for pointer-storage convention? */
-    if (regsize >= 32767L)	/* Probably could be 65535L. */
-	FAIL("regexp too big");
- 
-    /* Allocate space. */
-    r = (regexp *) malloc(sizeof(regexp) + (unsigned) regsize);
-    if (r == NULL)
-	FAIL("out of space");
- 
-    /* Second pass: emit code. */
-    regparse = exp;
-    regnpar = 1;
-    regcode = r->program;
-    regc(MAGIC);
-    if (reg(0, &flags) == NULL)
-	return (NULL);
- 
-    /* Dig out information for optimizations. */
-    r->regstart = '\0';		/* Worst-case defaults. */
-    r->reganch = 0;
-    r->regmust = NULL;
-    r->regmlen = 0;
-    scan = r->program + 1;	/* First BRANCH. */
-    if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
-	scan = OPERAND(scan);
- 
-	/* Starting-point info. */
-	if (OP(scan) == EXACTLY)
-	    r->regstart = *OPERAND(scan);
-	else if (OP(scan) == BOL)
-	    r->reganch++;
- 
-	/*
-	 * If there's something expensive in the r.e., find the longest
-	 * literal string that must appear and make it the regmust.  Resolve
-	 * ties in favor of later strings, since the regstart check works
-	 * with the beginning of the r.e. and avoiding duplication
-	 * strengthens checking.  Not a strong reason, but sufficient in the
-	 * absence of others. 
-	 */
-	if (flags & SPSTART) {
-	    longest = NULL;
-	    len = 0;
-	    for (; scan != NULL; scan = regnext(scan))
-		if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
-		    longest = OPERAND(scan);
-		    len = strlen(OPERAND(scan));
+	register regexp *r;
+	size_t len;
+
+	len = regcomp_len(exp);
+	if (len <= 0)
+	        return NULL;
+
+	/* Allocate space. */
+	r = (regexp *)malloc(len);
+
+	if (r == NULL)
+		FAIL("out of space");
+	return regcomp_comp(exp, r, len);
+}
+
+
+size_t
+regcomp_len(exp)
+const char *exp;
+{
+	int flags;
+	register regexp *r;
+	struct comp co;
+
+	if (exp == NULL)
+		FAILN("NULL argument to regcomp");
+
+	/* First pass: determine size, legality. */
+	co.regparse = (char *)exp;
+	co.regnpar = 1;
+	co.regsize = 0L;
+	co.regdummy[0] = NOTHING;
+	co.regdummy[1] = co.regdummy[2] = 0;
+	co.regcode = co.regdummy;
+	regc(&co, MAGIC);
+	if (reg(&co, 0, &flags) == NULL)
+		return -1;
+
+	/* Small enough for pointer-storage convention? */
+	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
+		FAILN("regexp too big");
+
+	return (sizeof(regexp) + (size_t)co.regsize);
+}
+
+
+regexp *
+regcomp_comp(exp, r, len)
+const char *exp;
+register regexp *r;
+size_t len;
+{
+	register char *scan;
+	int flags;
+	struct comp co;
+
+	/* Second pass: emit code. */
+	co.regparse = (char *)exp;
+	co.regnpar = 1;
+	co.regcode = r->program;
+	co.regsize = len - sizeof(regexp);
+	regc(&co, MAGIC);
+	if (reg(&co, 0, &flags) == NULL)
+		return(NULL);
+
+	/* Dig out information for optimizations. */
+	r->regstart = '\0';		/* Worst-case defaults. */
+	r->reganch = 0;
+	r->regmust = 0;
+	r->regmlen = 0;
+	scan = r->program+1;		/* First BRANCH. */
+	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
+		scan = OPERAND(scan);
+
+		/* Starting-point info. */
+		if (OP(scan) == EXACTLY)
+			r->regstart = *OPERAND(scan);
+		else if (OP(scan) == BOL)
+			r->reganch = 1;
+
+		/*
+		 * If there's something expensive in the r.e., find the
+		 * longest literal string that must appear and make it the
+		 * regmust.  Resolve ties in favor of later strings, since
+		 * the regstart check works with the beginning of the r.e.
+		 * and avoiding duplication strengthens checking.  Not a
+		 * strong reason, but sufficient in the absence of others.
+		 */
+		if (flags&SPSTART) {
+			register char *longest = NULL;
+			register size_t len = 0;
+
+			for (; scan != NULL; scan = regnext(scan))
+				if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
+					longest = OPERAND(scan);
+					len = strlen(OPERAND(scan));
+				}
+			r->regmust = longest - r->program;
+			r->regmlen = (int)len;
 		}
-	    r->regmust = longest;
-	    r->regmlen = len;
 	}
-    }
-    return (r);
+
+	return(r);
 }
- 
+
 /*
- * - reg - regular expression, i.e. main body or parenthesized thing 
+ - reg - regular expression, i.e. main body or parenthesized thing
  *
- * Caller must absorb opening parenthesis. 
+ * Caller must absorb opening parenthesis.
  *
- * Combining parenthesis handling with the base level of regular expression is a
- * trifle forced, but the need to tie the tails of the branches to what
- * follows makes it hard to avoid. 
+ * Combining parenthesis handling with the base level of regular expression
+ * is a trifle forced, but the need to tie the tails of the branches to what
+ * follows makes it hard to avoid.
  */
-static char    *
-reg(paren, flagp)
-    int             paren;	/* Parenthesized? */
-    int            *flagp;
+static char *
+reg(cp, paren, flagp)
+register struct comp *cp;
+int paren;			/* Parenthesized? */
+int *flagp;
 {
-    register char  *ret;
-    register char  *br;
-    register char  *ender;
-    register int    parno;
-    int             flags;
- 
-    *flagp = HASWIDTH;		/* Tentatively. */
- 
-    /* Make an OPEN node, if parenthesized. */
-    if (paren) {
-	if (regnpar >= NSUBEXP)
-	    FAIL("too many ()");
-	parno = regnpar;
-	regnpar++;
-	ret = regnode(OPEN + parno);
-    } else
-	ret = NULL;
- 
-    /* Pick up the branches, linking them together. */
-    br = regbranch(&flags);
-    if (br == NULL)
-	return (NULL);
-    if (ret != NULL)
-	regtail(ret, br);	/* OPEN -> first. */
-    else
-	ret = br;
-    if (!(flags & HASWIDTH))
-	*flagp &= ~HASWIDTH;
-    *flagp |= flags & SPSTART;
-    while (*regparse == '|') {
-	regparse++;
-	br = regbranch(&flags);
+	register char *ret;
+	register char *br;
+	register char *ender;
+	register int parno;
+	int flags;
+
+	*flagp = HASWIDTH;	/* Tentatively. */
+
+	if (paren) {
+		/* Make an OPEN node. */
+		if (cp->regnpar >= NSUBEXP)
+			FAIL("too many ()");
+		parno = cp->regnpar;
+		cp->regnpar++;
+		ret = regnode(cp, OPEN+parno);
+	}
+
+	/* Pick up the branches, linking them together. */
+	br = regbranch(cp, &flags);
 	if (br == NULL)
-	    return (NULL);
-	regtail(ret, br);	/* BRANCH -> BRANCH. */
-	if (!(flags & HASWIDTH))
-	    *flagp &= ~HASWIDTH;
-	*flagp |= flags & SPSTART;
-    }
- 
-    /* Make a closing node, and hook it on the end. */
-    ender = regnode((paren) ? CLOSE + parno : END);
-    regtail(ret, ender);
- 
-    /* Hook the tails of the branches to the closing node. */
-    for (br = ret; br != NULL; br = regnext(br))
-	regoptail(br, ender);
- 
-    /* Check for proper termination. */
-    if (paren && *regparse++ != ')') {
-	FAIL("unmatched ()");
-    } else if (!paren && *regparse != '\0') {
-	if (*regparse == ')') {
-	    FAIL("unmatched ()");
-	} else
-	    FAIL("junk on end");/* "Can't happen". */
-	/* NOTREACHED */
-    }
-    return (ret);
-}
- 
-/*
- * - regbranch - one alternative of an | operator 
- *
- * Implements the concatenation operator. 
- */
-static char    *
-regbranch(flagp)
-    int            *flagp;
-{
-    register char  *ret;
-    register char  *chain;
-    register char  *latest;
-    int             flags;
- 
-    *flagp = WORST;		/* Tentatively. */
- 
-    ret = regnode(BRANCH);
-    chain = NULL;
-    while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
-	latest = regpiece(&flags);
-	if (latest == NULL)
-	    return (NULL);
-	*flagp |= flags & HASWIDTH;
-	if (chain == NULL)	/* First piece. */
-	    *flagp |= flags & SPSTART;
+		return(NULL);
+	if (paren)
+		regtail(cp, ret, br);	/* OPEN -> first. */
 	else
-	    regtail(chain, latest);
-	chain = latest;
-    }
-    if (chain == NULL)		/* Loop ran zero times. */
-	(void) regnode(NOTHING);
- 
-    return (ret);
-}
- 
-/*
- * - regpiece - something followed by possible [*+?] 
- *
- * Note that the branching code sequences used for ? and the general cases of *
- * and + are somewhat optimized:  they use the same NOTHING node as both the
- * endmarker for their branch list and the body of the last branch. It might
- * seem that this node could be dispensed with entirely, but the endmarker
- * role is not redundant. 
- */
-static char    *
-regpiece(flagp)
-    int            *flagp;
-{
-    register char  *ret;
-    register char   op;
-    register char  *next;
-    int             flags;
- 
-    ret = regatom(&flags);
-    if (ret == NULL)
-	return (NULL);
- 
-    op = *regparse;
-    if (!ISMULT(op)) {
-	*flagp = flags;
-	return (ret);
-    }
-    if (!(flags & HASWIDTH) && op != '?')
-	FAIL("*+ operand could be empty");
-    *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH);
- 
-    if (op == '*' && (flags & SIMPLE))
-	reginsert(STAR, ret);
-    else if (op == '*') {
-	/* Emit x* as (x&|), where & means "self". */
-	reginsert(BRANCH, ret);	/* Either x */
-	regoptail(ret, regnode(BACK));	/* and loop */
-	regoptail(ret, ret);	/* back */
-	regtail(ret, regnode(BRANCH));	/* or */
-	regtail(ret, regnode(NOTHING));	/* null. */
-    } else if (op == '+' && (flags & SIMPLE))
-	reginsert(PLUS, ret);
-    else if (op == '+') {
-	/* Emit x+ as x(&|), where & means "self". */
-	next = regnode(BRANCH);	/* Either */
-	regtail(ret, next);
-	regtail(regnode(BACK), ret);	/* loop back */
-	regtail(next, regnode(BRANCH));	/* or */
-	regtail(ret, regnode(NOTHING));	/* null. */
-    } else if (op == '?') {
-	/* Emit x? as (x|) */
-	reginsert(BRANCH, ret);	/* Either x */
-	regtail(ret, regnode(BRANCH));	/* or */
-	next = regnode(NOTHING);/* null. */
-	regtail(ret, next);
-	regoptail(ret, next);
-    }
-    regparse++;
-    if (ISMULT(*regparse))
-	FAIL("nested *?+");
- 
-    return (ret);
-}
- 
-/*
- * - regatom - the lowest level 
- *
- * Optimization:  gobbles an entire sequence of ordinary characters so that it
- * can turn them into a single node, which is smaller to store and faster to
- * run.  Backslashed characters are exceptions, each becoming a separate
- * node; the code is simpler that way and it's not worth fixing. 
- */
-static char    *
-regatom(flagp)
-    int            *flagp;
-{
-    register char  *ret;
-    int             flags;
- 
-    *flagp = WORST;		/* Tentatively. */
- 
-    switch (*regparse++) {
-    case '^':
-	ret = regnode(BOL);
-	break;
-    case '$':
-	ret = regnode(EOL);
-	break;
-    case '.':
-	ret = regnode(ANY);
-	*flagp |= HASWIDTH | SIMPLE;
-	break;
-    case '[':{
-	    register int    class;
-	    register int    classend;
- 
-	    if (*regparse == '^') {	/* Complement of range. */
-		ret = regnode(ANYBUT);
-		regparse++;
-	    } else
-		ret = regnode(ANYOF);
-	    if (*regparse == ']' || *regparse == '-')
-		regc(*regparse++);
-	    while (*regparse != '\0' && *regparse != ']') {
-		if (*regparse == '-') {
-		    regparse++;
-		    if (*regparse == ']' || *regparse == '\0')
-			regc('-');
-		    else {
-			class = UCHARAT(regparse - 2) + 1;
-			classend = UCHARAT(regparse);
-			if (class > classend + 1)
-			    FAIL("invalid [] range");
-			for (; class <= classend; class++)
-			    regc(class);
-			regparse++;
-		    }
+		ret = br;
+	*flagp &= ~(~flags&HASWIDTH);	/* Clear bit if bit 0. */
+	*flagp |= flags&SPSTART;
+	while (*cp->regparse == '|') {
+		cp->regparse++;
+		br = regbranch(cp, &flags);
+		if (br == NULL)
+			return(NULL);
+		regtail(cp, ret, br);	/* BRANCH -> BRANCH. */
+		*flagp &= ~(~flags&HASWIDTH);
+		*flagp |= flags&SPSTART;
+	}
+
+	/* Make a closing node, and hook it on the end. */
+	ender = regnode(cp, (paren) ? CLOSE+parno : END);
+	regtail(cp, ret, ender);
+
+	/* Hook the tails of the branches to the closing node. */
+	for (br = ret; br != NULL; br = regnext(br))
+		regoptail(cp, br, ender);
+
+	/* Check for proper termination. */
+	if (paren && *cp->regparse++ != ')') {
+		FAIL("unterminated ()");
+	} else if (!paren && *cp->regparse != '\0') {
+		if (*cp->regparse == ')') {
+			FAIL("unmatched ()");
 		} else
-		    regc(*regparse++);
-	    }
-	    regc('\0');
-	    if (*regparse != ']')
-		FAIL("unmatched []");
-	    regparse++;
-	    *flagp |= HASWIDTH | SIMPLE;
+			FAIL("internal error: junk on end");
+		/* NOTREACHED */
 	}
-	break;
-    case '(':
-	ret = reg(1, &flags);
-	if (ret == NULL)
-	    return (NULL);
-	*flagp |= flags & (HASWIDTH | SPSTART);
-	break;
-    case '\0':
-    case '|':
-    case ')':
-	FAIL("internal urp");	/* Supposed to be caught earlier. */
-	break;
-    case '?':
-    case '+':
-    case '*':
-	FAIL("?+* follows nothing");
-	break;
-    case '\\':
-	if (*regparse == '\0')
-	    FAIL("trailing \\");
-	ret = regnode(EXACTLY);
-	regc(*regparse++);
-	regc('\0');
-	*flagp |= HASWIDTH | SIMPLE;
-	break;
-    default:{
-	    register int    len;
-	    register char   ender;
- 
-	    regparse--;
-	    len = strcspn(regparse, META);
-	    if (len <= 0)
-		FAIL("internal disaster");
-	    ender = *(regparse + len);
-	    if (len > 1 && ISMULT(ender))
-		len--;		/* Back off clear of ?+* operand. */
-	    *flagp |= HASWIDTH;
-	    if (len == 1)
-		*flagp |= SIMPLE;
-	    ret = regnode(EXACTLY);
-	    while (len > 0) {
-		regc(*regparse++);
-		len--;
-	    }
-	    regc('\0');
-	}
-	break;
-    }
- 
-    return (ret);
+
+	return(ret);
 }
- 
+
 /*
- * - regnode - emit a node 
- */
-static char    *		/* Location. */
-regnode(op)
-    char            op;
-{
-    register char  *ret;
-    register char  *ptr;
- 
-    ret = regcode;
-    if (ret == &regdummy) {
-	regsize += 3;
-	return (ret);
-    }
-    ptr = ret;
-    *ptr++ = op;
-    *ptr++ = '\0';		/* Null "next" pointer. */
-    *ptr++ = '\0';
-    regcode = ptr;
- 
-    return (ret);
-}
- 
-/*
- * - regc - emit (if appropriate) a byte of code 
- */
-static void
-regc(b)
-    char            b;
-{
-    if (regcode != &regdummy)
-	*regcode++ = b;
-    else
-	regsize++;
-}
- 
-/*
- * - reginsert - insert an operator in front of already-emitted operand 
+ - regbranch - one alternative of an | operator
  *
- * Means relocating the operand. 
+ * Implements the concatenation operator.
+ */
+static char *
+regbranch(cp, flagp)
+register struct comp *cp;
+int *flagp;
+{
+	register char *ret;
+	register char *chain;
+	register char *latest;
+	int flags;
+	register int c;
+
+	*flagp = WORST;				/* Tentatively. */
+
+	ret = regnode(cp, BRANCH);
+	chain = NULL;
+	while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') {
+		latest = regpiece(cp, &flags);
+		if (latest == NULL)
+			return(NULL);
+		*flagp |= flags&HASWIDTH;
+		if (chain == NULL)		/* First piece. */
+			*flagp |= flags&SPSTART;
+		else
+			regtail(cp, chain, latest);
+		chain = latest;
+	}
+	if (chain == NULL)			/* Loop ran zero times. */
+		(void) regnode(cp, NOTHING);
+
+	return(ret);
+}
+
+/*
+ - regpiece - something followed by possible [*+?]
+ *
+ * Note that the branching code sequences used for ? and the general cases
+ * of * and + are somewhat optimized:  they use the same NOTHING node as
+ * both the endmarker for their branch list and the body of the last branch.
+ * It might seem that this node could be dispensed with entirely, but the
+ * endmarker role is not redundant.
+ */
+static char *
+regpiece(cp, flagp)
+register struct comp *cp;
+int *flagp;
+{
+	register char *ret;
+	register char op;
+	register char *next;
+	int flags;
+
+	ret = regatom(cp, &flags);
+	if (ret == NULL)
+		return(NULL);
+
+	op = *cp->regparse;
+	if (!ISREPN(op)) {
+		*flagp = flags;
+		return(ret);
+	}
+
+	if (!(flags&HASWIDTH) && op != '?')
+		FAIL("*+ operand could be empty");
+	switch (op) {
+	case '*':	*flagp = WORST|SPSTART;			break;
+	case '+':	*flagp = WORST|SPSTART|HASWIDTH;	break;
+	case '?':	*flagp = WORST;				break;
+	}
+
+	if (op == '*' && (flags&SIMPLE))
+		reginsert(cp, STAR, ret);
+	else if (op == '*') {
+		/* Emit x* as (x&|), where & means "self". */
+		reginsert(cp, BRANCH, ret);		/* Either x */
+		regoptail(cp, ret, regnode(cp, BACK));	/* and loop */
+		regoptail(cp, ret, ret);		/* back */
+		regtail(cp, ret, regnode(cp, BRANCH));	/* or */
+		regtail(cp, ret, regnode(cp, NOTHING));	/* null. */
+	} else if (op == '+' && (flags&SIMPLE))
+		reginsert(cp, PLUS, ret);
+	else if (op == '+') {
+		/* Emit x+ as x(&|), where & means "self". */
+		next = regnode(cp, BRANCH);		/* Either */
+		regtail(cp, ret, next);
+		regtail(cp, regnode(cp, BACK), ret);	/* loop back */
+		regtail(cp, next, regnode(cp, BRANCH));	/* or */
+		regtail(cp, ret, regnode(cp, NOTHING));	/* null. */
+	} else if (op == '?') {
+		/* Emit x? as (x|) */
+		reginsert(cp, BRANCH, ret);		/* Either x */
+		regtail(cp, ret, regnode(cp, BRANCH));	/* or */
+		next = regnode(cp, NOTHING);		/* null. */
+		regtail(cp, ret, next);
+		regoptail(cp, ret, next);
+	}
+	cp->regparse++;
+	if (ISREPN(*cp->regparse))
+		FAIL("nested *?+");
+
+	return(ret);
+}
+
+/*
+ - regatom - the lowest level
+ *
+ * Optimization:  gobbles an entire sequence of ordinary characters so that
+ * it can turn them into a single node, which is smaller to store and
+ * faster to run.  Backslashed characters are exceptions, each becoming a
+ * separate node; the code is simpler that way and it's not worth fixing.
+ */
+static char *
+regatom(cp, flagp)
+register struct comp *cp;
+int *flagp;
+{
+	register char *ret;
+	int flags;
+
+	*flagp = WORST;		/* Tentatively. */
+
+	switch (*cp->regparse++) {
+	case '^':
+		ret = regnode(cp, BOL);
+		break;
+	case '$':
+		ret = regnode(cp, EOL);
+		break;
+	case '.':
+		ret = regnode(cp, ANY);
+		*flagp |= HASWIDTH|SIMPLE;
+		break;
+	case '[': {
+		register int range;
+		register int rangeend;
+		register int c;
+
+		if (*cp->regparse == '^') {	/* Complement of range. */
+			ret = regnode(cp, ANYBUT);
+			cp->regparse++;
+		} else
+			ret = regnode(cp, ANYOF);
+		if ((c = *cp->regparse) == ']' || c == '-') {
+			regc(cp, c);
+			cp->regparse++;
+		}
+		while ((c = *cp->regparse++) != '\0' && c != ']') {
+			if (c != '-')
+				regc(cp, c);
+			else if ((c = *cp->regparse) == ']' || c == '\0')
+				regc(cp, '-');
+			else {
+				range = (unsigned char)*(cp->regparse-2);
+				rangeend = (unsigned char)c;
+				if (range > rangeend)
+					FAIL("invalid [] range");
+				for (range++; range <= rangeend; range++)
+					regc(cp, range);
+				cp->regparse++;
+			}
+		}
+		regc(cp, '\0');
+		if (c != ']')
+			FAIL("unmatched []");
+		*flagp |= HASWIDTH|SIMPLE;
+		break;
+		}
+	case '(':
+		ret = reg(cp, 1, &flags);
+		if (ret == NULL)
+			return(NULL);
+		*flagp |= flags&(HASWIDTH|SPSTART);
+		break;
+	case '\0':
+	case '|':
+	case ')':
+		/* supposed to be caught earlier */
+		FAIL("internal error: \\0|) unexpected");
+		break;
+	case '?':
+	case '+':
+	case '*':
+		FAIL("?+* follows nothing");
+		break;
+	case '\\':
+		if (*cp->regparse == '\0')
+			FAIL("trailing \\");
+		ret = regnode(cp, EXACTLY);
+		regc(cp, *cp->regparse++);
+		regc(cp, '\0');
+		*flagp |= HASWIDTH|SIMPLE;
+		break;
+	default: {
+		register size_t len;
+		register char ender;
+
+		cp->regparse--;
+		len = strcspn(cp->regparse, META);
+		if (len == 0)
+			FAIL("internal error: strcspn 0");
+		ender = *(cp->regparse+len);
+		if (len > 1 && ISREPN(ender))
+			len--;		/* Back off clear of ?+* operand. */
+		*flagp |= HASWIDTH;
+		if (len == 1)
+			*flagp |= SIMPLE;
+		ret = regnode(cp, EXACTLY);
+		for (; len > 0; len--)
+			regc(cp, *cp->regparse++);
+		regc(cp, '\0');
+		break;
+		}
+	}
+
+	return(ret);
+}
+
+/*
+ - regnode - emit a node
+ */
+static char *			/* Location. */
+regnode(cp, op)
+register struct comp *cp;
+char op;
+{
+	register char *const ret = cp->regcode;
+	register char *ptr;
+
+	if (!EMITTING(cp)) {
+		cp->regsize += 3;
+		return(ret);
+	}
+
+	ptr = ret;
+	*ptr++ = op;
+	*ptr++ = '\0';		/* Null next pointer. */
+	*ptr++ = '\0';
+	cp->regcode = ptr;
+
+	return(ret);
+}
+
+/*
+ - regc - emit (if appropriate) a byte of code
  */
 static void
-reginsert(op, opnd)
-    char            op;
-    char           *opnd;
+regc(cp, b)
+register struct comp *cp;
+char b;
 {
-    register char  *src;
-    register char  *dst;
-    register char  *place;
- 
-    if (regcode == &regdummy) {
-	regsize += 3;
-	return;
-    }
-    src = regcode;
-    regcode += 3;
-    dst = regcode;
-    while (src > opnd)
-	*--dst = *--src;
- 
-    place = opnd;		/* Op node, where operand used to be. */
-    *place++ = op;
-    *place++ = '\0';
-    *place++ = '\0';
+	if (EMITTING(cp))
+		*cp->regcode++ = b;
+	else
+		cp->regsize++;
 }
- 
+
 /*
- * - regtail - set the next-pointer at the end of a node chain 
+ - reginsert - insert an operator in front of already-emitted operand
+ *
+ * Means relocating the operand.
  */
 static void
-regtail(p, val)
-    char           *p;
-    char           *val;
+reginsert(cp, op, opnd)
+register struct comp *cp;
+char op;
+char *opnd;
 {
-    register char  *scan;
-    register char  *temp;
-    register int    offset;
- 
-    if (p == &regdummy)
-	return;
- 
-    /* Find last node. */
-    scan = p;
-    for (;;) {
-	temp = regnext(scan);
-	if (temp == NULL)
-	    break;
-	scan = temp;
-    }
- 
-    if (OP(scan) == BACK)
-	offset = scan - val;
-    else
-	offset = val - scan;
-    *(scan + 1) = (offset >> 8) & 0377;
-    *(scan + 2) = offset & 0377;
+	register char *place;
+
+	if (!EMITTING(cp)) {
+		cp->regsize += 3;
+		return;
+	}
+
+	(void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd));
+	cp->regcode += 3;
+
+	place = opnd;		/* Op node, where operand used to be. */
+	*place++ = op;
+	*place++ = '\0';
+	*place++ = '\0';
 }
- 
+
 /*
- * - regoptail - regtail on operand of first argument; nop if operandless 
+ - regtail - set the next-pointer at the end of a node chain
  */
 static void
-regoptail(p, val)
-    char           *p;
-    char           *val;
+regtail(cp, p, val)
+register struct comp *cp;
+char *p;
+char *val;
 {
-    /* "Operandless" and "op != BRANCH" are synonymous in practice. */
-    if (p == NULL || p == &regdummy || OP(p) != BRANCH)
-	return;
-    regtail(OPERAND(p), val);
+	register char *scan;
+	register char *temp;
+	register int offset;
+
+	if (!EMITTING(cp))
+		return;
+
+	/* Find last node. */
+	for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
+		continue;
+
+	offset = (OP(scan) == BACK) ? scan - val : val - scan;
+	*(scan+1) = (offset>>8)&0177;
+	*(scan+2) = offset&0377;
 }
- 
+
 /*
- * regexec and friends 
+ - regoptail - regtail on operand of first argument; nop if operandless
  */
- 
+static void
+regoptail(cp, p, val)
+register struct comp *cp;
+char *p;
+char *val;
+{
+	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
+	if (!EMITTING(cp) || OP(p) != BRANCH)
+		return;
+	regtail(cp, OPERAND(p), val);
+}
+
 /*
- * Global work variables for regexec(). 
+ * regexec and friends
  */
-static char    *reginput;	/* String-input pointer. */
-static char    *regbol;		/* Beginning of input, for ^ check. */
-static char   **regstartp;	/* Pointer to startp array. */
-static char   **regendp;	/* Ditto for endp. */
- 
+
 /*
- * Forwards. 
+ * Work-variable struct for regexec().
  */
-STATIC int      regtry();
-STATIC int      regmatch();
-STATIC int      regrepeat();
- 
+struct exec {
+	char *reginput;		/* String-input pointer. */
+	char *regbol;		/* Beginning of input, for ^ check. */
+	const char **regstartp;	/* Pointer to startp array. */
+	const char **regendp;		/* Ditto for endp. */
+};
+
+/*
+ * Forwards.
+ */
+static int regtry(struct exec *ep, regexp *rp, char *string);
+static int regmatch(struct exec *ep, char *prog);
+static size_t regrepeat(struct exec *ep, char *node);
+
 #ifdef DEBUG
-int             regnarrate = 0;
-void            regdump();
-STATIC char    *regprop();
+int regnarrate = 0;
+void regdump();
+static char *regprop();
 #endif
- 
+
 /*
- * - regexec - match a regexp against a string 
+ - regexec - match a regexp against a string
  */
 int
-regexec(prog, string)
-    register regexp *prog;
-    register char  *string;
+regexec(prog, str)
+register regexp *prog;
+const char *str;
 {
-    register char  *s;
-    extern char    *strchr();
- 
-    /* Be paranoid... */
-    if (prog == NULL || string == NULL) {
-	regerror("NULL parameter");
-	return (0);
-    }
-    /* Check validity of program. */
-    if (UCHARAT(prog->program) != MAGIC) {
-	regerror("corrupted program");
-	return (0);
-    }
-    /* If there is a "must appear" string, look for it. */
-    if (prog->regmust != NULL) {
-	s = string;
-	while ((s = strchr(s, prog->regmust[0])) != NULL) {
-	    if (strncmp(s, prog->regmust, prog->regmlen) == 0)
-		break;		/* Found it. */
-	    s++;
+	register char *string = (char *)str;	/* avert const poisoning */
+	register char *s;
+	struct exec ex;
+
+	/* Be paranoid. */
+	if (prog == NULL || string == NULL) {
+		regerror("NULL argument to regexec");
+		return(0);
 	}
-	if (s == NULL)		/* Not present. */
-	    return (0);
-    }
-    /* Mark beginning of line for ^ . */
-    regbol = string;
- 
-    /* Simplest case:  anchored match need be tried only once. */
-    if (prog->reganch)
-	return (regtry(prog, string));
- 
-    /* Messy cases:  unanchored match. */
-    s = string;
-    if (prog->regstart != '\0')
-	/* We know what char it must start with. */
-	while ((s = strchr(s, prog->regstart)) != NULL) {
-	    if (regtry(prog, s))
-		return (1);
-	    s++;
+
+	/* Check validity of program. */
+	if ((unsigned char)*prog->program != MAGIC) {
+		regerror("corrupted regexp");
+		return(0);
 	}
-    else
-	/* We don't -- general case. */
-	do {
-	    if (regtry(prog, s))
-		return (1);
-	} while (*s++ != '\0');
- 
-    /* Failure. */
-    return (0);
+
+	/* If there is a "must appear" string, look for it. */
+	if ((prog->regmlen > 0) &&
+	    strstr(string, &prog->program[prog->regmust]) == NULL)
+		return(0);
+
+	/* Mark beginning of line for ^ . */
+	ex.regbol = string;
+	ex.regstartp = prog->startp;
+	ex.regendp = prog->endp;
+
+	/* Simplest case:  anchored match need be tried only once. */
+	if (prog->reganch)
+		return(regtry(&ex, prog, string));
+
+	/* Messy cases:  unanchored match. */
+	if (prog->regstart != '\0') {
+		/* We know what char it must start with. */
+		for (s = string; s != NULL; s = strchr(s+1, prog->regstart))
+			if (regtry(&ex, prog, s))
+				return(1);
+		return(0);
+	} else {
+		/* We don't -- general case. */
+		for (s = string; !regtry(&ex, prog, s); s++)
+			if (*s == '\0')
+				return(0);
+		return(1);
+	}
+	/* NOTREACHED */
 }
- 
+
 /*
- * - regtry - try match at specific point 
+ - regtry - try match at specific point
  */
 static int			/* 0 failure, 1 success */
-regtry(prog, string)
-    regexp         *prog;
-    char           *string;
+regtry(ep, prog, string)
+register struct exec *ep;
+regexp *prog;
+char *string;
 {
-    register int    i;
-    register char **sp;
-    register char **ep;
- 
-    reginput = string;
-    regstartp = prog->startp;
-    regendp = prog->endp;
- 
-    sp = prog->startp;
-    ep = prog->endp;
-    for (i = NSUBEXP; i > 0; i--) {
-	*sp++ = NULL;
-	*ep++ = NULL;
-    }
-    if (regmatch(prog->program + 1)) {
-	prog->startp[0] = string;
-	prog->endp[0] = reginput;
-	return (1);
-    } else
-	return (0);
+	register int i;
+	register const char **stp;
+	register const char **enp;
+
+	ep->reginput = string;
+
+	stp = prog->startp;
+	enp = prog->endp;
+	for (i = NSUBEXP; i > 0; i--) {
+		*stp++ = NULL;
+		*enp++ = NULL;
+	}
+	if (regmatch(ep, prog->program + 1)) {
+		prog->startp[0] = string;
+		prog->endp[0] = ep->reginput;
+		return(1);
+	} else
+		return(0);
 }
- 
+
 /*
- * - regmatch - main matching routine 
+ - regmatch - main matching routine
  *
- * Conceptually the strategy is simple:  check to see whether the current node
- * matches, call self recursively to see whether the rest matches, and then
- * act accordingly.  In practice we make some effort to avoid recursion, in
- * particular by going through "ordinary" nodes (that don't need to know
- * whether the rest of the match failed) by a loop instead of by recursion. 
+ * Conceptually the strategy is simple:  check to see whether the current
+ * node matches, call self recursively to see whether the rest matches,
+ * and then act accordingly.  In practice we make some effort to avoid
+ * recursion, in particular by going through "ordinary" nodes (that don't
+ * need to know whether the rest of the match failed) by a loop instead of
+ * by recursion.
  */
 static int			/* 0 failure, 1 success */
-regmatch(prog)
-    char           *prog;
+regmatch(ep, prog)
+register struct exec *ep;
+char *prog;
 {
-    register char  *scan;	/* Current node. */
-    char           *next;	/* Next node. */
-    extern char    *strchr();
- 
-    scan = prog;
+	register char *scan;	/* Current node. */
+	char *next;		/* Next node. */
+
 #ifdef DEBUG
-    if (scan != NULL && regnarrate)
-	fprintf(stderr, "%s(\n", regprop(scan));
+	if (prog != NULL && regnarrate)
+		fprintf(stderr, "%s(\n", regprop(prog));
 #endif
-    while (scan != NULL) {
+	for (scan = prog; scan != NULL; scan = next) {
 #ifdef DEBUG
-	if (regnarrate)
-	    fprintf(stderr, "%s...\n", regprop(scan));
+		if (regnarrate)
+			fprintf(stderr, "%s...\n", regprop(scan));
 #endif
-	next = regnext(scan);
- 
-	switch (OP(scan)) {
-	case BOL:
-	    if (reginput != regbol)
-		return (0);
-	    break;
-	case EOL:
-	    if (*reginput != '\0')
-		return (0);
-	    break;
+		next = regnext(scan);
+
+		switch (OP(scan)) {
+		case BOL:
+			if (ep->reginput != ep->regbol)
+				return(0);
+			break;
+		case EOL:
+			if (*ep->reginput != '\0')
+				return(0);
+			break;
+		case ANY:
+			if (*ep->reginput == '\0')
+				return(0);
+			ep->reginput++;
+			break;
+		case EXACTLY: {
+			register size_t len;
+			register char *const opnd = OPERAND(scan);
+
+			/* Inline the first character, for speed. */
+			if (*opnd != *ep->reginput)
+				return(0);
+			len = strlen(opnd);
+			if (len > 1 && strncmp(opnd, ep->reginput, len) != 0)
+				return(0);
+			ep->reginput += len;
+			break;
+			}
+		case ANYOF:
+			if (*ep->reginput == '\0' ||
+					strchr(OPERAND(scan), *ep->reginput) == NULL)
+				return(0);
+			ep->reginput++;
+			break;
+		case ANYBUT:
+			if (*ep->reginput == '\0' ||
+					strchr(OPERAND(scan), *ep->reginput) != NULL)
+				return(0);
+			ep->reginput++;
+			break;
+		case NOTHING:
+			break;
+		case BACK:
+			break;
+		case OPEN+1: case OPEN+2: case OPEN+3:
+		case OPEN+4: case OPEN+5: case OPEN+6:
+		case OPEN+7: case OPEN+8: case OPEN+9: {
+			register const int no = OP(scan) - OPEN;
+			register char *const input = ep->reginput;
+
+			if (regmatch(ep, next)) {
+				/*
+				 * Don't set startp if some later
+				 * invocation of the same parentheses
+				 * already has.
+				 */
+				if (ep->regstartp[no] == NULL)
+					ep->regstartp[no] = input;
+				return(1);
+			} else
+				return(0);
+			break;
+			}
+		case CLOSE+1: case CLOSE+2: case CLOSE+3:
+		case CLOSE+4: case CLOSE+5: case CLOSE+6:
+		case CLOSE+7: case CLOSE+8: case CLOSE+9: {
+			register const int no = OP(scan) - CLOSE;
+			register char *const input = ep->reginput;
+
+			if (regmatch(ep, next)) {
+				/*
+				 * Don't set endp if some later
+				 * invocation of the same parentheses
+				 * already has.
+				 */
+				if (ep->regendp[no] == NULL)
+					ep->regendp[no] = input;
+				return(1);
+			} else
+				return(0);
+			break;
+			}
+		case BRANCH: {
+			register char *const save = ep->reginput;
+
+			if (OP(next) != BRANCH)		/* No choice. */
+				next = OPERAND(scan);	/* Avoid recursion. */
+			else {
+				while (OP(scan) == BRANCH) {
+					if (regmatch(ep, OPERAND(scan)))
+						return(1);
+					ep->reginput = save;
+					scan = regnext(scan);
+				}
+				return(0);
+				/* NOTREACHED */
+			}
+			break;
+			}
+		case STAR: case PLUS: {
+			register const char nextch =
+				(OP(next) == EXACTLY) ? *OPERAND(next) : '\0';
+			register size_t no;
+			register char *const save = ep->reginput;
+			register const size_t min = (OP(scan) == STAR) ? 0 : 1;
+
+			for (no = regrepeat(ep, OPERAND(scan)) + 1; no > min; no--) {
+				ep->reginput = save + no - 1;
+				/* If it could work, try it. */
+				if (nextch == '\0' || *ep->reginput == nextch)
+					if (regmatch(ep, next))
+						return(1);
+			}
+			return(0);
+			break;
+			}
+		case END:
+			return(1);	/* Success! */
+			break;
+		default:
+			regerror("regexp corruption");
+			return(0);
+			break;
+		}
+	}
+
+	/*
+	 * We get here only if there's trouble -- normally "case END" is
+	 * the terminating point.
+	 */
+	regerror("corrupted pointers");
+	return(0);
+}
+
+/*
+ - regrepeat - report how many times something simple would match
+ */
+static size_t
+regrepeat(ep, node)
+register struct exec *ep;
+char *node;
+{
+	register size_t count;
+	register char *scan;
+	register char ch;
+
+	switch (OP(node)) {
 	case ANY:
-	    if (*reginput == '\0')
-		return (0);
-	    reginput++;
-	    break;
-	case EXACTLY:{
-		register int    len;
-		register char  *opnd;
- 
-		opnd = OPERAND(scan);
-		/* Inline the first character, for speed. */
-		if (*opnd != *reginput)
-		    return (0);
-		len = strlen(opnd);
-		if (len > 1 && strncmp(opnd, reginput, len) != 0)
-		    return (0);
-		reginput += len;
-	    }
-	    break;
+		return(strlen(ep->reginput));
+		break;
+	case EXACTLY:
+		ch = *OPERAND(node);
+		count = 0;
+		for (scan = ep->reginput; *scan == ch; scan++)
+			count++;
+		return(count);
+		break;
 	case ANYOF:
-	    if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
-		return (0);
-	    reginput++;
-	    break;
+		return(strspn(ep->reginput, OPERAND(node)));
+		break;
 	case ANYBUT:
-	    if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
-		return (0);
-	    reginput++;
-	    break;
-	case NOTHING:
-	    break;
-	case BACK:
-	    break;
-	case OPEN + 1:
-	case OPEN + 2:
-	case OPEN + 3:
-	case OPEN + 4:
-	case OPEN + 5:
-	case OPEN + 6:
-	case OPEN + 7:
-	case OPEN + 8:
-	case OPEN + 9:{
-		register int    no;
-		register char  *save;
- 
-		no = OP(scan) - OPEN;
-		save = reginput;
- 
-		if (regmatch(next)) {
-		    /*
-		     * Don't set startp if some later invocation of the same
-		     * parentheses already has. 
-		     */
-		    if (regstartp[no] == NULL)
-			regstartp[no] = save;
-		    return (1);
-		} else
-		    return (0);
-	    }
-	    break;
-	case CLOSE + 1:
-	case CLOSE + 2:
-	case CLOSE + 3:
-	case CLOSE + 4:
-	case CLOSE + 5:
-	case CLOSE + 6:
-	case CLOSE + 7:
-	case CLOSE + 8:
-	case CLOSE + 9:{
-		register int    no;
-		register char  *save;
- 
-		no = OP(scan) - CLOSE;
-		save = reginput;
- 
-		if (regmatch(next)) {
-		    /*
-		     * Don't set endp if some later invocation of the same
-		     * parentheses already has. 
-		     */
-		    if (regendp[no] == NULL)
-			regendp[no] = save;
-		    return (1);
-		} else
-		    return (0);
-	    }
-	    break;
-	case BRANCH:{
-		register char  *save;
- 
-		if (OP(next) != BRANCH)	/* No choice. */
-		    next = OPERAND(scan);	/* Avoid recursion. */
-		else {
-		    do {
-			save = reginput;
-			if (regmatch(OPERAND(scan)))
-			    return (1);
-			reginput = save;
-			scan = regnext(scan);
-		    } while (scan != NULL && OP(scan) == BRANCH);
-		    return (0);
-		    /* NOTREACHED */
-		}
-	    }
-	    break;
-	case STAR:
-	case PLUS:{
-		register char   nextch;
-		register int    no;
-		register char  *save;
-		register int    min;
- 
-		/*
-		 * Lookahead to avoid useless match attempts when we know
-		 * what character comes next. 
-		 */
-		nextch = '\0';
-		if (OP(next) == EXACTLY)
-		    nextch = *OPERAND(next);
-		min = (OP(scan) == STAR) ? 0 : 1;
-		save = reginput;
-		no = regrepeat(OPERAND(scan));
-		while (no >= min) {
-		    /* If it could work, try it. */
-		    if (nextch == '\0' || *reginput == nextch)
-			if (regmatch(next))
-			    return (1);
-		    /* Couldn't or didn't -- back up. */
-		    no--;
-		    reginput = save + no;
-		}
-		return (0);
-	    }
-	    break;
-	case END:
-	    return (1);		/* Success! */
-	    break;
-	default:
-	    regerror("memory corruption");
-	    return (0);
-	    break;
+		return(strcspn(ep->reginput, OPERAND(node)));
+		break;
+	default:		/* Oh dear.  Called inappropriately. */
+		regerror("internal error: bad call of regrepeat");
+		return(0);	/* Best compromise. */
+		break;
 	}
- 
-	scan = next;
-    }
- 
-    /*
-     * We get here only if there's trouble -- normally "case END" is the
-     * terminating point. 
-     */
-    regerror("corrupted pointers");
-    return (0);
+	/* NOTREACHED */
 }
- 
+
 /*
- * - regrepeat - repeatedly match something simple, report how many 
+ - regnext - dig the "next" pointer out of a node
  */
-static int
-regrepeat(p)
-    char           *p;
-{
-    register int    count = 0;
-    register char  *scan;
-    register char  *opnd;
- 
-    scan = reginput;
-    opnd = OPERAND(p);
-    switch (OP(p)) {
-    case ANY:
-	count = strlen(scan);
-	scan += count;
-	break;
-    case EXACTLY:
-	while (*opnd == *scan) {
-	    count++;
-	    scan++;
-	}
-	break;
-    case ANYOF:
-	while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
-	    count++;
-	    scan++;
-	}
-	break;
-    case ANYBUT:
-	while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
-	    count++;
-	    scan++;
-	}
-	break;
-    default:			/* Oh dear.  Called inappropriately. */
-	regerror("internal foulup");
-	count = 0;		/* Best compromise. */
-	break;
-    }
-    reginput = scan;
- 
-    return (count);
-}
- 
-/*
- * - regnext - dig the "next" pointer out of a node 
- */
-static char    *
+static char *
 regnext(p)
-    register char  *p;
+register char *p;
 {
-    register int    offset;
- 
-    if (p == &regdummy)
-	return (NULL);
- 
-    offset = NEXT(p);
-    if (offset == 0)
-	return (NULL);
- 
-    if (OP(p) == BACK)
-	return (p - offset);
-    else
-	return (p + offset);
+	register const int offset = NEXT(p);
+
+	if (offset == 0)
+		return(NULL);
+
+	return((OP(p) == BACK) ? p-offset : p+offset);
 }
- 
+
 #ifdef DEBUG
- 
-STATIC char    *regprop();
- 
+
+static char *regprop();
+
 /*
- * - regdump - dump a regexp onto stdout in vaguely comprehensible form 
+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
  */
 void
 regdump(r)
-    regexp         *r;
+regexp *r;
 {
-    register char  *s;
-    register char   op = EXACTLY;	/* Arbitrary non-END op. */
-    register char  *next;
-    extern char    *strchr();
- 
- 
-    s = r->program + 1;
-    while (op != END) {		/* While that wasn't END last time... */
-	op = OP(s);
-	printf("%2d%s", s - r->program, regprop(s));	/* Where, what. */
-	next = regnext(s);
-	if (next == NULL)	/* Next ptr. */
-	    printf("(0)");
-	else
-	    printf("(%d)", (s - r->program) + (next - s));
-	s += 3;
-	if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
-	    /* Literal string, where present. */
-	    while (*s != '\0') {
-		putchar(*s);
-		s++;
-	    }
-	    s++;
+	register char *s;
+	register char op = EXACTLY;	/* Arbitrary non-END op. */
+	register char *next;
+
+
+	s = r->program + 1;
+	while (op != END) {	/* While that wasn't END last time... */
+		op = OP(s);
+		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
+		next = regnext(s);
+		if (next == NULL)		/* Next ptr. */
+			printf("(0)");
+		else 
+			printf("(%d)", (s-r->program)+(next-s));
+		s += 3;
+		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
+			/* Literal string, where present. */
+			while (*s != '\0') {
+				putchar(*s);
+				s++;
+			}
+			s++;
+		}
+		putchar('\n');
 	}
-	putchar('\n');
-    }
- 
-    /* Header fields of interest. */
-    if (r->regstart != '\0')
-	printf("start `%c' ", r->regstart);
-    if (r->reganch)
-	printf("anchored ");
-    if (r->regmust != NULL)
-	printf("must have \"%s\"", r->regmust);
-    printf("\n");
+
+	/* Header fields of interest. */
+	if (r->regstart != '\0')
+		printf("start `%c' ", r->regstart);
+	if (r->reganch)
+		printf("anchored ");
+	if (r->regmlen > 0)
+		printf("must have \"%s\"", &r->program[r->regmust]);
+	printf("\n");
 }
- 
+
 /*
- * - regprop - printable representation of opcode 
+ - regprop - printable representation of opcode
  */
-static char    *
+static char *
 regprop(op)
-    char           *op;
+char *op;
 {
-    register char  *p;
-    static char     buf[50];
- 
-    (void) strcpy(buf, ":");
- 
-    switch (OP(op)) {
-    case BOL:
-	p = "BOL";
-	break;
-    case EOL:
-	p = "EOL";
-	break;
-    case ANY:
-	p = "ANY";
-	break;
-    case ANYOF:
-	p = "ANYOF";
-	break;
-    case ANYBUT:
-	p = "ANYBUT";
-	break;
-    case BRANCH:
-	p = "BRANCH";
-	break;
-    case EXACTLY:
-	p = "EXACTLY";
-	break;
-    case NOTHING:
-	p = "NOTHING";
-	break;
-    case BACK:
-	p = "BACK";
-	break;
-    case END:
-	p = "END";
-	break;
-    case OPEN + 1:
-    case OPEN + 2:
-    case OPEN + 3:
-    case OPEN + 4:
-    case OPEN + 5:
-    case OPEN + 6:
-    case OPEN + 7:
-    case OPEN + 8:
-    case OPEN + 9:
-	sprintf(buf + strlen(buf), "OPEN%d", OP(op) - OPEN);
-	p = NULL;
-	break;
-    case CLOSE + 1:
-    case CLOSE + 2:
-    case CLOSE + 3:
-    case CLOSE + 4:
-    case CLOSE + 5:
-    case CLOSE + 6:
-    case CLOSE + 7:
-    case CLOSE + 8:
-    case CLOSE + 9:
-	sprintf(buf + strlen(buf), "CLOSE%d", OP(op) - CLOSE);
-	p = NULL;
-	break;
-    case STAR:
-	p = "STAR";
-	break;
-    case PLUS:
-	p = "PLUS";
-	break;
-    default:
-	regerror("corrupted opcode");
-	break;
-    }
-    if (p != NULL)
-	(void) strcat(buf, p);
-    return (buf);
-}
-#endif
- 
-/*
- * The following is provided for those people who do not have strcspn() in
- * their C libraries.  They should get off their butts and do something about
- * it; at least one public-domain implementation of those (highly useful)
- * string routines has been published on Usenet. 
- */
-#ifdef STRCSPN
-/*
- * strcspn - find length of initial segment of s1 consisting entirely of
- * characters not from s2 
- */
- 
-static int
-strcspn(s1, s2)
-    char           *s1;
-    char           *s2;
-{
-    register char  *scan1;
-    register char  *scan2;
-    register int    count;
- 
-    count = 0;
-    for (scan1 = s1; *scan1 != '\0'; scan1++) {
-	for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
-	    if (*scan1 == *scan2++)
-		return (count);
-	count++;
-    }
-    return (count);
+	register char *p;
+	static char buf[50];
+
+	(void) strcpy(buf, ":");
+
+	switch (OP(op)) {
+	case BOL:
+		p = "BOL";
+		break;
+	case EOL:
+		p = "EOL";
+		break;
+	case ANY:
+		p = "ANY";
+		break;
+	case ANYOF:
+		p = "ANYOF";
+		break;
+	case ANYBUT:
+		p = "ANYBUT";
+		break;
+	case BRANCH:
+		p = "BRANCH";
+		break;
+	case EXACTLY:
+		p = "EXACTLY";
+		break;
+	case NOTHING:
+		p = "NOTHING";
+		break;
+	case BACK:
+		p = "BACK";
+		break;
+	case END:
+		p = "END";
+		break;
+	case OPEN+1:
+	case OPEN+2:
+	case OPEN+3:
+	case OPEN+4:
+	case OPEN+5:
+	case OPEN+6:
+	case OPEN+7:
+	case OPEN+8:
+	case OPEN+9:
+		sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
+		p = NULL;
+		break;
+	case CLOSE+1:
+	case CLOSE+2:
+	case CLOSE+3:
+	case CLOSE+4:
+	case CLOSE+5:
+	case CLOSE+6:
+	case CLOSE+7:
+	case CLOSE+8:
+	case CLOSE+9:
+		sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
+		p = NULL;
+		break;
+	case STAR:
+		p = "STAR";
+		break;
+	case PLUS:
+		p = "PLUS";
+		break;
+	default:
+		regerror("corrupted opcode");
+		break;
+	}
+	if (p != NULL)
+		(void) strcat(buf, p);
+	return(buf);
 }
 #endif
diff --git a/scsh/regexp/regexp.h b/scsh/regexp/regexp.h
index ef07a9f..48af08c 100644
--- a/scsh/regexp/regexp.h
+++ b/scsh/regexp/regexp.h
@@ -6,16 +6,22 @@
  */
 #define NSUBEXP  10
 typedef struct regexp {
-	char *startp[NSUBEXP];
-	char *endp[NSUBEXP];
+	const char *startp[NSUBEXP];
+	const char *endp[NSUBEXP];
 	char regstart;		/* Internal use only. */
 	char reganch;		/* Internal use only. */
-	char *regmust;		/* Internal use only. */
+	int regmust;		/* Internal use only. */
 	int regmlen;		/* Internal use only. */
 	char program[1];	/* Unwarranted chumminess with compiler. */
 } regexp;
- 
-extern regexp *regcomp();
-extern int regexec();
-extern void regsub();
-extern void regerror();
+
+extern regexp *regcomp(const char *re);
+extern int regexec(regexp *rp, const char *s);
+extern void regsub(const regexp *rp, const char *src, char *dst);
+extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
+extern size_t regsublen(const regexp *rp, const char *src);
+
+extern void regerror(char *message);
+extern size_t regcomp_len(const char *exp);
+extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
+
diff --git a/scsh/regexp/regsub.c b/scsh/regexp/regsub.c
index 7fc003c..bc98845 100644
--- a/scsh/regexp/regsub.c
+++ b/scsh/regexp/regsub.c
@@ -1,83 +1,131 @@
 /*
- * regsub @(#)regsub.c	1.3 of 2 April 86 
- *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer.  Not
- * derived from licensed software. 
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions: 
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it. 
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission. 
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software. 
+ * regsub
  */
 #include <stdio.h>
-#ifdef AMIGA
-#include "regexp.h"
-#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
 #include <regexp.h>
-#endif
 #include "regmagic.h"
- 
-#ifndef CHARBITS
-#define	UCHARAT(p)	((int)*(unsigned char *)(p))
-#else
-#define	UCHARAT(p)	((int)*(p)&CHARBITS)
-#endif
- 
+
 /*
- * - regsub - perform substitutions after a regexp match 
+ - regsub - perform substitutions after a regexp match
+ */
+
+void regsub(rp, source, dest)
+const regexp *rp;
+const char *source;
+char *dest;
+{
+        regnsub(rp, source, dest, BUFSIZ);
+}
+
+
+
+/*
+ - regnsub - perform bounds-checked substitutions after a regexp match
  */
 void
-regsub(prog, source, dest)
-    regexp         *prog;
-    char           *source;
-    char           *dest;
+regnsub(rp, source, dest, destlen)
+const regexp *rp;
+const char *source;
+char *dest;
+size_t destlen;
 {
-    register char  *src;
-    register char  *dst;
-    register char   c;
-    register int    no;
-    register int    len;
-    extern char    *strncpy();
- 
-    if (prog == NULL || source == NULL || dest == NULL) {
-	regerror("NULL parm to regsub");
-	return;
+	register regexp * const prog = (regexp *)rp;
+	register const char *src = (char *)source;
+	register char *dst = dest;
+	char *dstend = dest + destlen;
+	char *odst;
+	register char c;
+	register int no;
+	register size_t len;
+
+	if (prog == NULL || source == NULL || dest == NULL) {
+		regerror("NULL parameter to regsub");
+		return;
+	}
+	if ((unsigned char)*(prog->program) != MAGIC) {
+		regerror("damaged regexp");
+		return;
+	}
+
+	while ((c = *src++) != '\0') {
+		if (c == '&')
+			no = 0;
+		else if (c == '\\' && isdigit(*src))
+			no = *src++ - '0';
+		else
+			no = -1;
+
+		if (no < 0) {	/* Ordinary character. */
+			if (c == '\\' && (*src == '\\' || *src == '&'))
+				c = *src++;
+			*dst++ = c;
+			if (dst >= dstend) 
+			{
+			    	regerror("output buffer too small");
+				return;
+			}
+		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
+			   prog->endp[no] > prog->startp[no]) {
+			len = prog->endp[no] - prog->startp[no];
+			odst = dst;
+			dst += len;
+			if (dst >= dstend) 
+			{
+			    	regerror("output buffer too small");
+				return;
+			}
+			(void) strncpy(odst, prog->startp[no], len);
+			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
+				regerror("damaged match string");
+				return;
+			}
+		}
+	}
+	*dst++ = '\0';
+}
+
+size_t regsublen(rp, source)
+const regexp *rp;
+const char *source;
+{
+    register regexp * const prog = (regexp *)rp;
+    register char *src = (char *)source;
+    register char c;
+    register int no;
+    register int len = 0;
+	
+    if (prog == NULL || source == NULL) {
+	regerror("NULL parameter to regsublen");
+	return -1;
     }
-    if (UCHARAT(prog->program) != MAGIC) {
-	regerror("damaged regexp fed to regsub");
-	return;
+    
+    if ((unsigned char)*(prog->program) != MAGIC) {
+	regerror("damaged regexp");
+	return -1;
     }
-    src = source;
-    dst = dest;
     while ((c = *src++) != '\0') {
 	if (c == '&')
 	    no = 0;
-	else if (c == '\\' && '0' <= *src && *src <= '9')
+	else if (c == '\\' && isdigit(*src))
 	    no = *src++ - '0';
 	else
 	    no = -1;
- 
 	if (no < 0) {		/* Ordinary character. */
 	    if (c == '\\' && (*src == '\\' || *src == '&'))
-		c = *src++;
-	    *dst++ = c;
-	} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
-	    len = prog->endp[no] - prog->startp[no];
-	    (void) strncpy(dst, prog->startp[no], len);
-	    dst += len;
-	    if (len != 0 && *(dst - 1) == '\0') {	/* strncpy hit NUL. */
-		regerror("damaged match string");
-		return;
+		src++;
+	    len++;
+	} else {
+	    const char *s = prog->startp[no];
+	    const char *e = prog->endp[no];
+	    if ((s != NULL) && (e != NULL) && (e > s)) {
+		len += e-s;
 	    }
 	}
     }
-    *dst++ = '\0';
+    return len+1;
 }
+
+
diff --git a/scsh/regexp/tests b/scsh/regexp/tests
new file mode 100644
index 0000000..10aa6f9
--- /dev/null
+++ b/scsh/regexp/tests
@@ -0,0 +1,127 @@
+abc	abc	y	&	abc
+abc	xbc	n	-	-
+abc	axc	n	-	-
+abc	abx	n	-	-
+abc	xabcy	y	&	abc
+abc	ababc	y	&	abc
+ab*c	abc	y	&	abc
+ab*bc	abc	y	&	abc
+ab*bc	abbc	y	&	abbc
+ab*bc	abbbbc	y	&	abbbbc
+ab+bc	abbc	y	&	abbc
+ab+bc	abc	n	-	-
+ab+bc	abq	n	-	-
+ab+bc	abbbbc	y	&	abbbbc
+ab?bc	abbc	y	&	abbc
+ab?bc	abc	y	&	abc
+ab?bc	abbbbc	n	-	-
+ab?c	abc	y	&	abc
+^abc$	abc	y	&	abc
+^abc$	abcc	n	-	-
+^abc	abcc	y	&	abc
+^abc$	aabc	n	-	-
+abc$	aabc	y	&	abc
+^	abc	y	&	
+$	abc	y	&	
+a.c	abc	y	&	abc
+a.c	axc	y	&	axc
+a.*c	axyzc	y	&	axyzc
+a.*c	axyzd	n	-	-
+a[bc]d	abc	n	-	-
+a[bc]d	abd	y	&	abd
+a[b-d]e	abd	n	-	-
+a[b-d]e	ace	y	&	ace
+a[b-d]	aac	y	&	ac
+a[-b]	a-	y	&	a-
+a[b-]	a-	y	&	a-
+[k]	ab	n	-	-
+a[b-a]	-	c	-	-
+a[]b	-	c	-	-
+a[	-	c	-	-
+a]	a]	y	&	a]
+a[]]b	a]b	y	&	a]b
+a[^bc]d	aed	y	&	aed
+a[^bc]d	abd	n	-	-
+a[^-b]c	adc	y	&	adc
+a[^-b]c	a-c	n	-	-
+a[^]b]c	a]c	n	-	-
+a[^]b]c	adc	y	&	adc
+ab|cd	abc	y	&	ab
+ab|cd	abcd	y	&	ab
+()ef	def	y	&-\1	ef-
+()*	-	c	-	-
+*a	-	c	-	-
+^*	-	c	-	-
+$*	-	c	-	-
+(*)b	-	c	-	-
+$b	b	n	-	-
+a\	-	c	-	-
+a\(b	a(b	y	&-\1	a(b-
+a\(*b	ab	y	&	ab
+a\(*b	a((b	y	&	a((b
+a\\b	a\b	y	&	a\b
+abc)	-	c	-	-
+(abc	-	c	-	-
+((a))	abc	y	&-\1-\2	a-a-a
+(a)b(c)	abc	y	&-\1-\2	abc-a-c
+a+b+c	aabbabc	y	&	abc
+a**	-	c	-	-
+a*?	-	c	-	-
+(a*)*	-	c	-	-
+(a*)+	-	c	-	-
+(a|)*	-	c	-	-
+(a*|b)*	-	c	-	-
+(a+|b)*	ab	y	&-\1	ab-b
+(a+|b)+	ab	y	&-\1	ab-b
+(a+|b)?	ab	y	&-\1	a-a
+[^ab]*	cde	y	&	cde
+(^)*	-	c	-	-
+(ab|)*	-	c	-	-
+)(	-	c	-	-
+	abc	y	&	
+abc		n	-	-
+a*		y	&	
+abcd	abcd	y	&-\&-\\&	abcd-&-\abcd
+a(bc)d	abcd	y	\1-\\1-\\\1	bc-\1-\bc
+([abc])*d	abbbcd	y	&-\1	abbbcd-c
+([abc])*bcd	abcd	y	&-\1	abcd-a
+a|b|c|d|e	e	y	&	e
+(a|b|c|d|e)f	ef	y	&-\1	ef-e
+((a*|b))*	-	c	-	-
+abcd*efg	abcdefg	y	&	abcdefg
+ab*	xabyabbbz	y	&	ab
+ab*	xayabbbz	y	&	a
+(ab|cd)e	abcde	y	&-\1	cde-cd
+[abhgefdc]ij	hij	y	&	hij
+^(ab|cd)e	abcde	n	x\1y	xy
+(abc|)ef	abcdef	y	&-\1	ef-
+(a|b)c*d	abcd	y	&-\1	bcd-b
+(ab|ab*)bc	abc	y	&-\1	abc-a
+a([bc]*)c*	abc	y	&-\1	abc-bc
+a([bc]*)(c*d)	abcd	y	&-\1-\2	abcd-bc-d
+a([bc]+)(c*d)	abcd	y	&-\1-\2	abcd-bc-d
+a([bc]*)(c+d)	abcd	y	&-\1-\2	abcd-b-cd
+a[bcd]*dcdcde	adcdcde	y	&	adcdcde
+a[bcd]+dcdcde	adcdcde	n	-	-
+(ab|a)b*c	abc	y	&-\1	abc-ab
+((a)(b)c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
+[ -~]*	abc	y	&	abc
+[ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~ -~ -~]*	abc	y	&	abc
+[ -~ -~ -~ -~ -~ -~ -~]*	abc	y	&	abc
+[a-zA-Z_][a-zA-Z0-9_]*	alpha	y	&	alpha
+^a(bc+|b[eh])g|.h$	abh	y	&-\1	bh-
+(bc+d$|ef*g.|h?i(j|k))	effgz	y	&-\1-\2	effgz-effgz-
+(bc+d$|ef*g.|h?i(j|k))	ij	y	&-\1-\2	ij-ij-j
+(bc+d$|ef*g.|h?i(j|k))	effg	n	-	-
+(bc+d$|ef*g.|h?i(j|k))	bcdd	n	-	-
+(bc+d$|ef*g.|h?i(j|k))	reffgz	y	&-\1-\2	effgz-effgz-
+((((((((((a))))))))))	-	c	-	-
+(((((((((a)))))))))	a	y	&	a
+multiple words of text	uh-uh	n	-	-
+multiple words	multiple words, yeah	y	&	multiple words
+(.*)c(.*)	abcde	y	&-\1-\2	abcde-ab-de
+\((.*), (.*)\)	(a, b)	y	(\2, \1)	(b, a)
diff --git a/scsh/regexp/timer.c b/scsh/regexp/timer.c
new file mode 100644
index 0000000..c104a4f
--- /dev/null
+++ b/scsh/regexp/timer.c
@@ -0,0 +1,164 @@
+/*
+ * Simple timing program for regcomp().
+ * Usage: timer ncomp nexec nsub
+ *	or
+ *	timer ncomp nexec nsub regexp string [ answer [ sub ] ]
+ *
+ * The second form is for timing repetitions of a single test case.
+ * The first form's test data is a compiled-in copy of the "tests" file.
+ * Ncomp, nexec, nsub are how many times to do each regcomp, regexec,
+ * and regsub.  The way to time an operation individually is to do something
+ * like "timer 1 50 1".
+ */
+#include <stdio.h>
+
+struct try {
+	char *re, *str, *ans, *src, *dst;
+} tests[] = {
+#include "timer.t.h"
+{ NULL, NULL, NULL, NULL, NULL }
+};
+
+#include <regexp.h>
+
+int errreport = 0;		/* Report errors via errseen? */
+char *errseen = NULL;		/* Error message. */
+
+char *progname;
+
+/* ARGSUSED */
+main(argc, argv)
+int argc;
+char *argv[];
+{
+	int ncomp, nexec, nsub;
+	struct try one;
+	char dummy[512];
+
+	if (argc < 4) {
+		ncomp = 1;
+		nexec = 1;
+		nsub = 1;
+	} else {
+		ncomp = atoi(argv[1]);
+		nexec = atoi(argv[2]);
+		nsub = atoi(argv[3]);
+	}
+	
+	progname = argv[0];
+	if (argc > 5) {
+		one.re = argv[4];
+		one.str = argv[5];
+		if (argc > 6)
+			one.ans = argv[6];
+		else
+			one.ans = "y";
+		if (argc > 7) {	
+			one.src = argv[7];
+			one.dst = "xxx";
+		} else {
+			one.src = "x";
+			one.dst = "x";
+		}
+		errreport = 1;
+		try(one, ncomp, nexec, nsub);
+	} else
+		multiple(ncomp, nexec, nsub);
+	exit(0);
+}
+
+void
+regerror(s)
+char *s;
+{
+	if (errreport)
+		errseen = s;
+	else
+		error(s, "");
+}
+
+#ifndef ERRAVAIL
+error(s1, s2)
+char *s1;
+char *s2;
+{
+	fprintf(stderr, "regexp: ");
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, "\n");
+	exit(1);
+}
+#endif
+
+int lineno = 0;
+
+multiple(ncomp, nexec, nsub)
+int ncomp, nexec, nsub;
+{
+	register int i;
+	extern char *strchr();
+
+	errreport = 1;
+	for (i = 0; tests[i].re != NULL; i++) {
+		lineno++;
+		try(tests[i], ncomp, nexec, nsub);
+	}
+}
+
+try(fields, ncomp, nexec, nsub)
+struct try fields;
+int ncomp, nexec, nsub;
+{
+	regexp *r;
+	char dbuf[BUFSIZ];
+	register int i;
+
+	errseen = NULL;
+	r = regcomp(fields.re);
+	if (r == NULL) {
+		if (*fields.ans != 'c')
+			complain("regcomp failure in `%s'", fields.re);
+		return;
+	}
+	if (*fields.ans == 'c') {
+		complain("unexpected regcomp success in `%s'", fields.re);
+		free((char *)r);
+		return;
+	}
+	for (i = ncomp-1; i > 0; i--) {
+		free((char *)r);
+		r = regcomp(fields.re);
+	}
+	if (!regexec(r, fields.str)) {
+		if (*fields.ans != 'n')
+			complain("regexec failure in `%s'", "");
+		free((char *)r);
+		return;
+	}
+	if (*fields.ans == 'n') {
+		complain("unexpected regexec success", "");
+		free((char *)r);
+		return;
+	}
+	for (i = nexec-1; i > 0; i--)
+		(void) regexec(r, fields.str);
+	errseen = NULL;
+	for (i = nsub; i > 0; i--)
+		regsub(r, fields.src, dbuf);
+	if (errseen != NULL) {	
+		complain("regsub complaint", "");
+		free((char *)r);
+		return;
+	}
+	if (strcmp(dbuf, fields.dst) != 0)
+		complain("regsub result `%s' wrong", dbuf);
+	free((char *)r);
+}
+
+complain(s1, s2)
+char *s1;
+char *s2;
+{
+	fprintf(stderr, "try: %d: ", lineno);
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
+}
diff --git a/scsh/regexp/try.c b/scsh/regexp/try.c
index 072c94e..9b6424b 100644
--- a/scsh/regexp/try.c
+++ b/scsh/regexp/try.c
@@ -1,236 +1,220 @@
 /*
- * Simple test program for regexp(3) stuff.  Knows about debugging hooks. 
+ * Simple test program for regexp(3) stuff.  Knows about debugging hooks.
+ * Usage: try re [string [output [-]]]
+ * The re is compiled and dumped, regexeced against the string, the result
+ * is applied to output using regsub().  The - triggers a running narrative
+ * from regexec().  Dumping and narrative don't happen unless DEBUG.
  *
- * Copyright (c) 1986 by University of Toronto. Written by Henry Spencer.  Not
- * derived from licensed software. 
- *
- * Permission is granted to anyone to use this software for any purpose on any
- * computer system, and to redistribute it freely, subject to the following
- * restrictions: 
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from defects in it. 
- *
- * 2. The origin of this software must not be misrepresented, either by explicit
- * claim or by omission. 
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software. 
- *
- * Usage: try re [string [output [-]]] The re is compiled and dumped, regexeced
- * against the string, the result is applied to output using regsub().  The -
- * triggers a running narrative from regexec().  Dumping and narrative don't
- * happen unless DEBUG. 
- *
- * If there are no arguments, stdin is assumed to be a stream of lines with five
- * fields:  a r.e., a string to match it against, a result code, a source
- * string for regsub, and the proper result.  Result codes are 'c' for
- * compile failure, 'y' for match success, 'n' for match failure. Field
- * separator is tab. 
+ * If there are no arguments, stdin is assumed to be a stream of lines with
+ * five fields:  a r.e., a string to match it against, a result code, a
+ * source string for regsub, and the proper result.  Result codes are 'c'
+ * for compile failure, 'y' for match success, 'n' for match failure.
+ * Field separator is tab.
  */
 #include <stdio.h>
 #include <regexp.h>
- 
+
 #ifdef ERRAVAIL
-char           *progname;
-extern char    *mkprogname();
+char *progname;
+extern char *mkprogname();
 #endif
- 
+
 #ifdef DEBUG
-extern int      regnarrate;
+extern int regnarrate;
 #endif
- 
-char            buf[BUFSIZ];
- 
-int             errreport = 0;	/* Report errors via errseen? */
-char           *errseen = NULL;	/* Error message. */
-int             status = 0;	/* Exit status. */
- 
+
+char buf[BUFSIZ];
+
+int errreport = 0;		/* Report errors via errseen? */
+char *errseen = NULL;		/* Error message. */
+int status = 0;			/* Exit status. */
+
 /* ARGSUSED */
 main(argc, argv)
-    int             argc;
-    char           *argv[];
+int argc;
+char *argv[];
 {
-    regexp         *r;
-    int             i;
- 
+	regexp *r;
+	int i;
+
 #ifdef ERRAVAIL
-    progname = mkprogname(argv[0]);
+	progname = mkprogname(argv[0]);
 #endif
- 
-    if (argc == 1) {
-	multiple();
-	exit(status);
-    }
-    r = regcomp(argv[1]);
-    if (r == NULL)
-	error("regcomp failure", "");
+
+	if (argc == 1) {
+		multiple();
+		exit(status);
+	}
+
+	r = regcomp(argv[1]);
+	if (r == NULL)
+		error("regcomp failure", "");
 #ifdef DEBUG
-    regdump(r);
-    if (argc > 4)
-	regnarrate++;
+	regdump(r);
+	if (argc > 4)
+		regnarrate++;
 #endif
-    if (argc > 2) {
-	i = regexec(r, argv[2]);
-	printf("%d", i);
-	for (i = 1; i < NSUBEXP; i++)
-	    if (r->startp[i] != NULL && r->endp[i] != NULL)
-		printf(" \\%d", i);
-	printf("\n");
-    }
-    if (argc > 3) {
-	regsub(r, argv[3], buf);
-	printf("%s\n", buf);
-    }
-    exit(status);
+	if (argc > 2) {
+		i = regexec(r, argv[2]);
+		printf("%d", i);
+		for (i = 1; i < NSUBEXP; i++)
+			if (r->startp[i] != NULL && r->endp[i] != NULL)
+				printf(" \\%d", i);
+		printf("\n");
+	}
+	if (argc > 3) {
+		regsub(r, argv[3], buf);
+		printf("%s\n", buf);
+	}
+	exit(status);
 }
- 
+
 void
 regerror(s)
-    char           *s;
+char *s;
 {
-    if (errreport)
-	errseen = s;
-    else
-	error(s, "");
+	if (errreport)
+		errseen = s;
+	else
+		error(s, "");
 }
- 
+
 #ifndef ERRAVAIL
 error(s1, s2)
-    char           *s1;
-    char           *s2;
+char *s1;
+char *s2;
 {
-    fprintf(stderr, "regexp: ");
-    fprintf(stderr, s1, s2);
-    fprintf(stderr, "\n");
-    exit(1);
+	fprintf(stderr, "regexp: ");
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, "\n");
+	exit(1);
 }
 #endif
- 
-int             lineno;
- 
-regexp          badregexp;	/* Implicit init to 0. */
- 
+
+int lineno;
+
+regexp badregexp;		/* Implicit init to 0. */
+
 multiple()
 {
-    char            rbuf[BUFSIZ];
-    char           *field[5];
-    char           *scan;
-    int             i;
-    regexp         *r;
-    extern char    *strchr();
- 
-    errreport = 1;
-    lineno = 0;
-    while (fgets(rbuf, sizeof(rbuf), stdin) != NULL) {
-	rbuf[strlen(rbuf) - 1] = '\0';	/* Dispense with \n. */
-	lineno++;
-	scan = rbuf;
-	for (i = 0; i < 5; i++) {
-	    field[i] = scan;
-	    if (field[i] == NULL) {
-		complain("bad testfile format", "");
-		exit(1);
-	    }
-	    scan = strchr(scan, '\t');
-	    if (scan != NULL)
-		*scan++ = '\0';
+	char rbuf[BUFSIZ];
+	char *field[5];
+	char *scan;
+	int i;
+	regexp *r;
+	extern char *strchr();
+
+	errreport = 1;
+	lineno = 0;
+	while (fgets(rbuf, sizeof(rbuf), stdin) != NULL) {
+		rbuf[strlen(rbuf)-1] = '\0';	/* Dispense with \n. */
+		lineno++;
+		scan = rbuf;
+		for (i = 0; i < 5; i++) {
+			field[i] = scan;
+			if (field[i] == NULL) {
+				complain("bad testfile format", "");
+				exit(1);
+			}
+			scan = strchr(scan, '\t');
+			if (scan != NULL)
+				*scan++ = '\0';
+		}
+		try(field);
 	}
-	try(field);
-    }
- 
-    /* And finish up with some internal testing... */
-    lineno = 9990;
-    errseen = NULL;
-    if (regcomp((char *) NULL) != NULL || errseen == NULL)
-	complain("regcomp(NULL) doesn't complain", "");
-    lineno = 9991;
-    errseen = NULL;
-    if (regexec((regexp *) NULL, "foo") || errseen == NULL)
-	complain("regexec(NULL, ...) doesn't complain", "");
-    lineno = 9992;
-    r = regcomp("foo");
-    if (r == NULL) {
-	complain("regcomp(\"foo\") fails", "");
-	return;
-    }
-    lineno = 9993;
-    errseen = NULL;
-    if (regexec(r, (char *) NULL) || errseen == NULL)
-	complain("regexec(..., NULL) doesn't complain", "");
-    lineno = 9994;
-    errseen = NULL;
-    regsub((regexp *) NULL, "foo", rbuf);
-    if (errseen == NULL)
-	complain("regsub(NULL, ..., ...) doesn't complain", "");
-    lineno = 9995;
-    errseen = NULL;
-    regsub(r, (char *) NULL, rbuf);
-    if (errseen == NULL)
-	complain("regsub(..., NULL, ...) doesn't complain", "");
-    lineno = 9996;
-    errseen = NULL;
-    regsub(r, "foo", (char *) NULL);
-    if (errseen == NULL)
-	complain("regsub(..., ..., NULL) doesn't complain", "");
-    lineno = 9997;
-    errseen = NULL;
-    if (regexec(&badregexp, "foo") || errseen == NULL)
-	complain("regexec(nonsense, ...) doesn't complain", "");
-    lineno = 9998;
-    errseen = NULL;
-    regsub(&badregexp, "foo", rbuf);
-    if (errseen == NULL)
-	complain("regsub(nonsense, ..., ...) doesn't complain", "");
+
+	/* And finish up with some internal testing... */
+	lineno = 9990;
+	errseen = NULL;
+	if (regcomp((char *)NULL) != NULL || errseen == NULL)
+		complain("regcomp(NULL) doesn't complain", "");
+	lineno = 9991;
+	errseen = NULL;
+	if (regexec((regexp *)NULL, "foo") || errseen == NULL)
+		complain("regexec(NULL, ...) doesn't complain", "");
+	lineno = 9992;
+	r = regcomp("foo");
+	if (r == NULL) {
+		complain("regcomp(\"foo\") fails", "");
+		return;
+	}
+	lineno = 9993;
+	errseen = NULL;
+	if (regexec(r, (char *)NULL) || errseen == NULL)
+		complain("regexec(..., NULL) doesn't complain", "");
+	lineno = 9994;
+	errseen = NULL;
+	regsub((regexp *)NULL, "foo", rbuf);
+	if (errseen == NULL)
+		complain("regsub(NULL, ..., ...) doesn't complain", "");
+	lineno = 9995;
+	errseen = NULL;
+	regsub(r, (char *)NULL, rbuf);
+	if (errseen == NULL)
+		complain("regsub(..., NULL, ...) doesn't complain", "");
+	lineno = 9996;
+	errseen = NULL;
+	regsub(r, "foo", (char *)NULL);
+	if (errseen == NULL)
+		complain("regsub(..., ..., NULL) doesn't complain", "");
+	lineno = 9997;
+	errseen = NULL;
+	if (regexec(&badregexp, "foo") || errseen == NULL)
+		complain("regexec(nonsense, ...) doesn't complain", "");
+	lineno = 9998;
+	errseen = NULL;
+	regsub(&badregexp, "foo", rbuf);
+	if (errseen == NULL)
+		complain("regsub(nonsense, ..., ...) doesn't complain", "");
 }
- 
+
 try(fields)
-    char          **fields;
+char **fields;
 {
-    regexp         *r;
-    char            dbuf[BUFSIZ];
- 
-    errseen = NULL;
-    r = regcomp(fields[0]);
-    if (r == NULL) {
-	if (*fields[2] != 'c')
-	    complain("regcomp failure in `%s'", fields[0]);
-	return;
-    }
-    if (*fields[2] == 'c') {
-	complain("unexpected regcomp success in `%s'", fields[0]);
-	free((char *) r);
-	return;
-    }
-    if (!regexec(r, fields[1])) {
-	if (*fields[2] != 'n')
-	    complain("regexec failure in `%s'", "");
-	free((char *) r);
-	return;
-    }
-    if (*fields[2] == 'n') {
-	complain("unexpected regexec success", "");
-	free((char *) r);
-	return;
-    }
-    errseen = NULL;
-    regsub(r, fields[3], dbuf);
-    if (errseen != NULL) {
-	complain("regsub complaint", "");
-	free((char *) r);
-	return;
-    }
-    if (strcmp(dbuf, fields[4]) != 0)
-	complain("regsub result `%s' wrong", dbuf);
-    free((char *) r);
+	regexp *r;
+	char dbuf[BUFSIZ];
+
+	errseen = NULL;
+	r = regcomp(fields[0]);
+	if (r == NULL) {
+		if (*fields[2] != 'c')
+			complain("regcomp failure in `%s'", fields[0]);
+		return;
+	}
+	if (*fields[2] == 'c') {
+		complain("unexpected regcomp success in `%s'", fields[0]);
+		free((char *)r);
+		return;
+	}
+	if (!regexec(r, fields[1])) {
+		if (*fields[2] != 'n')
+			complain("regexec failure in `%s'", fields[0]);
+		free((char *)r);
+		return;
+	}
+	if (*fields[2] == 'n') {
+		complain("unexpected regexec success", "");
+		free((char *)r);
+		return;
+	}
+	errseen = NULL;
+	regsub(r, fields[3], dbuf);
+	if (errseen != NULL) {
+		complain("regsub complaint", "");
+		free((char *)r);
+		return;
+	}
+	if (strcmp(dbuf, fields[4]) != 0)
+		complain("regsub result `%s' wrong", dbuf);
+	free((char *)r);
 }
- 
+
 complain(s1, s2)
-    char           *s1;
-    char           *s2;
+char *s1;
+char *s2;
 {
-    fprintf(stderr, "try: %d: ", lineno);
-    fprintf(stderr, s1, s2);
-    fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
-    status = 1;
+	fprintf(stderr, "try: %d: ", lineno);
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
+	status = 1;
 }