1. Removed cruft (timer.c, try.c, regexp.{3,c,h}, regmagic.h regsub.c)

that shouldn't even be in this dir -- it was from a different Spencer package, and wasn't used; got copied in by accident at some point. 2. Removed *.ih Makefile regex.h These are derived files produced during the build. 3. Removed patch-msg, which is old & dead. 4. Updated rest of the source to a newer version of Spencer's Posix package (alpha3.7). Not all of these files changed, actually, but I guess the last-mod dates did, so CVS thinks they're being updated or something?
1999-07-10 20:01:52 +00:00 · 1999-07-10 20:01:52 +00:00 · 14fe107a7e
parent c23ba5b0cb
commit 14fe107a7e
23 changed files with 781 additions and 3189 deletions
--- a/scsh/regexp/COPYRIGHT
+++ b/scsh/regexp/COPYRIGHT
@ -1,19 +1,20 @@
-Copyright (c) 1986, 1993, 1995 by University of Toronto.
+Copyright 1992, 1993, 1994, 1997 Henry Spencer.  All rights reserved.
-Written by Henry Spencer.  Not derived from licensed software.
+This software is not subject to any license of the American Telephone
 and Telegraph Company or of the Regents of the University of California.
-Permission is granted to anyone to use this software for any
+Permission is granted to anyone to use this software for any purpose on
-purpose on any computer system, and to redistribute it in any way,
+any computer system, and to alter it and redistribute it, subject
-subject to the following restrictions:
+to the following restrictions:
-1. The author is not responsible for the consequences of use of
+1. The author is not responsible for the consequences of use of this
-	this software, no matter how awful, even if they arise
+   software, no matter how awful, even if they arise from flaws in it.
 	from defects in it.
-2. The origin of this software must not be misrepresented, either
+2. The origin of this software must not be misrepresented, either by
-	by explicit claim or by omission.
+   explicit claim or by omission.  Since few users ever read sources,
   credits must appear in the documentation.
-3. Altered versions must be plainly marked as such, and must not
+3. Altered versions must be plainly marked as such, and must not be
-	be misrepresented (by explicit claim or omission) as being
+   misrepresented as being the original software.  Since few users
-	the original software.
+   ever read sources, credits must appear in the documentation.
-4. This notice must not be removed or altered.
+4. This notice may not be removed or altered.
--- a/scsh/regexp/Makefile
+++ b/scsh/regexp/Makefile
--- a/scsh/regexp/Makefile.in
+++ b/scsh/regexp/Makefile.in
@ -1,118 +1,137 @@
 srcdir  = @srcdir@
 VPATH   = @srcdir@
 CC      = @CC@
 CFLAGS1 = @CFLAGS1@
 RANLIB  = @RANLIB@
-# Things you might want to put in ENV:
+# You probably want to take -DREDEBUG out of CFLAGS, and put something like
-# -DERRAVAIL		have utzoo-compatible error() function and friends
+# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
-ENV=
+# internal assertion checking and some debugging facilities).
 # Put -Dconst= in for a pre-ANSI compiler.
 # Do not take -DPOSIX_MISTAKE out.
 # REGCFLAGS isn't important to you (it's for my use in some special contexts).
 #CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
 CFLAGS=-I. -DPOSIX_MISTAKE $(REGCFLAGS) $(CFLAGS1)
-# Things you might want to put in TEST:
+# If you have a pre-ANSI compiler, put -o into MKHFLAGS.  If you want
-# -DDEBUG		debugging hooks
+# the Berkeley __P macro, put -b in.
-# -I.			regexp.h from current directory, not /usr/include
+MKHFLAGS=
 TEST=-I. -I$(srcdir)
-# Things you might want to put in PROF:
+# Flags for linking but not compiling, if any.
-# -pg			profiler
+LDFLAGS=
 # PROF=
-CFLAGS=$(CFLAGS1) $(ENV) $(TEST) $(PROF)
+# Extra libraries for linking, if any.
-LDFLAGS=$(PROF)
+LIBS=
-LIB=libregexp.a
+# Internal stuff, should not need changing.
-OBJ=regexp.o regsub.o regerror.o
+OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
-TMP=dtr.tmp
+OBJS=$(OBJPRODN) split.o debug.o main.o
 H=cclass.h cname.h regex2.h utils.h
 REGSRC=regcomp.c regerror.c regexec.c regfree.c
 ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
 # Stuff that matters only if you're trying to lint the package.
 LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
 LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
 JUNKLINT=possible pointer alignment|null effect
 # arrangements to build forward-reference header files
 .SUFFIXES:	.ih .h
 .c.ih:
 	sh ./mkh $(MKHFLAGS) -p $< >$@
 default:	r
-try:	try.o $(LIB)
+lib:	purge $(OBJPRODN)
-	$(CC) $(LDFLAGS) try.o $(LIB) -o try
+	rm -f libregex.a
 	ar crv libregex.a $(OBJPRODN)
-# Making timer will probably require putting stuff in $(PROF) and then
+purge:
-# recompiling everything; the following is just the final stage.
+	rm -f *.o
 timer:	timer.o $(LIB)
 	$(CC) $(LDFLAGS) timer.o $(LIB) -o timer
-timer.o:	timer.c timer.t.h
+# stuff to build regex.h
 REGEXH=regex.h
 REGEXHSRC=regex2.h $(REGSRC)
 $(REGEXH):	$(REGEXHSRC) mkh
 	sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
 	cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
 	rm -f regex.tmp
-timer.t.h:	tests
+# dependencies
-	sed 's/	/","/g;s/\\/&&/g;s/.*/{"&"},/' tests >timer.t.h
+$(OBJPRODN) debug.o:	utils.h regex.h regex2.h
 regcomp.o:	cclass.h cname.h regcomp.ih
 regexec.o:	engine.c engine.ih
 regerror.o:	regerror.ih
 debug.o:	debug.ih
 main.o:	main.ih
-# Regression test.
+# tester
-r:	try tests
+re:	$(OBJS)
-	./try <tests		# no news is good news...
+	$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
-$(LIB):	$(OBJ)
+# regression test
-	ar cr $(LIB) $(OBJ)
+r:	re tests
-	$(RANLIB) libregexp.a
+	./re <tests
 	./re -el <tests
 	./re -er <tests
-regexp.o:	regexp.c regexp.h regmagic.h
+# 57 variants, and other stuff, for development use -- not useful to you
-regsub.o:	regsub.c regexp.h regmagic.h
+ra:	./re tests
 	-./re <tests
 	-./re -el <tests
 	-./re -er <tests
-clean:
+rx:	./re tests
-	rm -f *.o core mon.out gmon.out timer.t.h dtr copy try timer r.*
+	./re -x <tests
-	rm -f residue rs.* re.1 rm.h re.h ch.soe ch.ps j badcom fig[012]
+	./re -x -el <tests
-	rm -f ch.sml fig[12].ps $(LIB)
+	./re -x -er <tests
 	rm -rf $(TMP)
-# the rest of this is unlikely to be of use to you
+t:	./re tests
 	-time ./re <tests
 	-time ./re -cs <tests
 	-time ./re -el <tests
 	-time ./re -cs -el <tests
-BITS = r.1 rs.1 re.1 rm.h re.h
+l:	$(LINTC)
-OPT=-p -ms
+	lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
-ch.soe:	ch $(BITS)
+fullprint:
-	soelim ch >$@
+	ti README WHATSNEW notes todo | list
 	ti *.h | list
 	list *.c
 	list regex.3 regex.7
-ch.sml:	ch $(BITS) smlize splitfigs
+print:
-	splitfigs ch | soelim | smlize >$@
+	ti README WHATSNEW notes todo | list
 	ti *.h | list
 	list reg*.c engine.c
 fig0 fig1 fig2:	ch splitfigs
 	splitfigs ch >/dev/null
-f:	fig0 fig1 fig2 figs
+mf.tmp:	Makefile
-	groff -Tps -s $(OPT) figs | lpr
+	sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
-fig1.ps:	fig0 fig1
+DTRH=cclass.h cname.h regex2.h utils.h
-	( cat fig0 ; echo ".LP" ; cat fig1 ) | groff -Tps $(OPT) >$@
+PRE=COPYRIGHT README WHATSNEW
 POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
 FILES=$(PRE) Makefile $(POST)
 DTR=$(PRE) Makefile=mf.tmp $(POST)
 dtr:	$(FILES) mf.tmp
 	makedtr $(DTR) >$@
 	rm mf.tmp
-fig2.ps:	fig0 fig2
+cio:	$(FILES)
-	( cat fig0 ; echo ".LP" ; cat fig2 ) | groff -Tps $(OPT) >$@
+	cio $(FILES)
-fp:	fig1.ps fig2.ps
+rdf:	$(FILES)
 	rcsdiff -c $(FILES) 2>&1 | p
-r.1:	regexp.c splitter
+# various forms of cleanup
-	splitter regexp.c
+tidy:
 	rm -f junk* core core.* *.core dtr *.tmp lint
-rs.1:	regsub.c splitter
+clean:	tidy
-	splitter regsub.c
+	rm -f *.o *.s *.ih re libregex.a
-re.1:	regerror.c splitter
+# don't do this one unless you know what you're doing
-	splitter regerror.c
+spotless:	clean
-
+	rm -f mkh regex.h
 rm.h:	regmagic.h splitter
 	splitter regmagic.h
 re.h:	regexp.h splitter
 	splitter regexp.h
 PLAIN=COPYRIGHT README Makefile regexp.3 try.c timer.c tests
 FIX=regexp.h regexp.c regsub.c regerror.c regmagic.h
 DTR=$(PLAIN) $(FIX)
 dtr:	r $(DTR)
 	rm -rf $(TMP)
 	mkdir $(TMP)
 	cp $(PLAIN) $(TMP)
 	for f in $(FIX) ; do normalize $$f >$(TMP)/$$f ; done
 	( cd $(TMP) ; makedtr $(DTR) ) >$@
 	rm -rf $(TMP)
 ch.ps:	ch Makefile $(BITS)
 	groff -Tps $(OPT) ch >$@
 copy:	ch.soe ch.sml fp
 	makedtr REMARKS ch.sml fig*.ps ch.soe >$@
 go:	copy dtr
--- a/scsh/regexp/README
+++ b/scsh/regexp/README
@ -1,57 +1,32 @@
-This is a revision of my well-known regular-expression package, regexp(3).
+alpha3.7 release.
-It gives C programs the ability to use egrep-style regular expressions, and
+Fri Nov 21 13:25:21 EST 1997
-does it in a much cleaner fashion than the analogous routines in SysV.
+henry@zoo.toronto.edu
 It is not, alas, fully POSIX.2-compliant; that is hard.  (I'm working on
 a full reimplementation that will do that.)
-This version is the one which is examined and explained in one chapter of
+See WHATSNEW for change listing.
 "Software Solutions in C" (Dale Schumacher, ed.; AP Professional 1994;
 ISBN 0-12-632360-7), plus a couple of insignificant updates, plus one
 significant bug fix (done 10 Nov 1995).
-Although this package was inspired by the Bell V8 regexp(3), this
+installation notes:
-implementation is *NOT* AT&T/Bell code, and is not derived from licensed
+--------
-software.  Even though U of T is a V8 licensee.  This software is based on
+Read the comments at the beginning of Makefile before running.
 a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed
 here is a complete rewrite and hence is not covered by AT&T copyright).
 I admit to some familiarity with regular-expression implementations of
 the past, but the only one that this code traces any ancestry to is the
 one published in Kernighan & Plauger's "Software Tools" (from which
 this one draws ideas but not code).
-Simplistically:  put this stuff into a source directory, inspect Makefile
+Utils.h contains some things that just might have to be modified on
-for compilation options that need changing to suit your local environment,
+some systems, as well as a nested include (ugh) of <assert.h>.
 and then do "make".  This compiles the regexp(3) functions, builds a
 library containing them, compiles a test program, and runs a large set of
 regression tests.  If there are no complaints, then put regexp.h into
 /usr/include, add regexp.o, regsub.o, and regerror.o into your C library
 (or put libre.a into /usr/lib), and install regexp.3 (perhaps with slight
 modifications) in your manual-pages directory. 
-The files are:
+The "fake" directory contains quick-and-dirty fakes for some header
 files and routines that old systems may not have.  Note also that
 -DUSEBCOPY will make utils.h substitute bcopy() for memmove().
-COPYRIGHT	copyright notice
+After that, "make r" will build regcomp.o, regexec.o, regfree.o,
-README		this text
+and regerror.o (the actual routines), bundle them together into a test
-Makefile	instructions to make everything
+program, and run regression tests on them.  No output is good output.
 regexp.3	manual page
 regexp.h	header file, for /usr/include
 regexp.c	source for regcomp() and regexec()
 regsub.c	source for regsub()
 regerror.c	source for default regerror()
 regmagic.h	internal header file
 try.c		source for test program
 timer.c		source for timing program
 tests		test list for try and timer
-This implementation uses nondeterministic automata rather than the
+"make lib" builds just the .o files for the actual routines (when
-deterministic ones found in some other implementations, which makes it
+you're happy with testing and have adjusted CFLAGS for production),
-simpler, smaller, and faster at compiling regular expressions, but slower
+and puts them together into libregex.a.  You can pick up either the
-at executing them.  Many users have found the speed perfectly adequate,
+library or *.o ("make lib" makes sure there are no other .o files left
-although replacing the insides of egrep with this code would be a mistake.
+around to confuse things).
-This stuff should be pretty portable, given an ANSI C compiler and
+Main.c, debug.c, split.c are used for regression testing but are not part
-appropriate option settings.  There are no "reserved" char values except for
+of the RE routines themselves.
-NUL, and no special significance is attached to the top bit of chars.
+
-The string(3) functions are used a fair bit, on the grounds that they are
+Regex.h goes in /usr/include.  All other .h files are internal only.
-probably faster than coding the operations in line.  Some attempts at code
+--------
 tuning have been made, but this is invariably a bit machine-specific.
--- a/scsh/regexp/WHATSNEW
+++ b/scsh/regexp/WHATSNEW
@ -1,3 +1,7 @@
 New in alpha3.7:  A bit of cleanup aimed at maximizing portability,
 possibly at slight cost in efficiency.  "ul" suffixes and "unsigned long"
 no longer appear, in particular.
 New in alpha3.6:  A couple more portability glitches fixed.
 New in alpha3.5:  Active development of this code has been stopped --
--- a/scsh/regexp/debug.ih
+++ b/scsh/regexp/debug.ih
@ -1,14 +0,0 @@
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* === debug.c === */
 void regprint(regex_t *r, FILE *d);
 static void s_print(register struct re_guts *g, FILE *d);
 static char *regchar(int ch);
 #ifdef __cplusplus
 }
 #endif
 /* ========= end header generated by ./mkh ========= */
--- a/scsh/regexp/engine.ih
+++ b/scsh/regexp/engine.ih
@ -1,35 +0,0 @@
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* === engine.c === */
 static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
 static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
 static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
 static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
 static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
 static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
 #define	BOL	(OUT+1)
 #define	EOL	(BOL+1)
 #define	BOLEOL	(BOL+2)
 #define	NOTHING	(BOL+3)
 #define	BOW	(BOL+4)
 #define	EOW	(BOL+5)
 #define	CODEMAX	(BOL+5)		/* highest code used */
 #define	NONCHAR(c)	((c) > CHAR_MAX)
 #define	NNONCHAR	(CODEMAX-CHAR_MAX)
 #ifdef REDEBUG
 static void print(struct match *m, char *caption, states st, int ch, FILE *d);
 #endif
 #ifdef REDEBUG
 static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
 #endif
 #ifdef REDEBUG
 static char *pchar(int ch);
 #endif
 #ifdef __cplusplus
 }
 #endif
 /* ========= end header generated by ./mkh ========= */
--- a/scsh/regexp/main.ih
+++ b/scsh/regexp/main.ih
@ -1,19 +0,0 @@
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* === main.c === */
 void regress(FILE *in);
 void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
 int options(int type, char *s);
 int opt(int c, char *s);
 void fixstr(register char *p);
 char *check(char *str, regmatch_t sub, char *should);
 static char *eprint(int err);
 static int efind(char *name);
 #ifdef __cplusplus
 }
 #endif
 /* ========= end header generated by ./mkh ========= */
--- a/scsh/regexp/patch-msg
+++ b/scsh/regexp/patch-msg
@ -1,803 +0,0 @@
 Date: Mon, 1 Jul 1996 23:22:47 GMT
 From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
 To: shivers@lcs.mit.edu, bdc@ai.mit.edu
 Subject: scsh patch for precompiled regexps..
 I meant to send this out months ago but I was just too hosed with work.
 Here's what I have right now:
 There are three pieces here:
 	diffs to the "core" scsh
 	diffs to Henry Spencer's latest regexp library
 	a copy of Henry Spencer's latest regexp library..
 It appears to work (it passes the same regression tests as the C library..).
 Let me know if I didn't include something needed for this to work..
 				- Bill
 diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
 *** scsh-0.4.2/scsh/re.scm	Fri Oct 27 04:58:56 1995
 --- scsh-0.4.2-regexp/scsh/re.scm	Sat Apr  6 21:07:41 1996
 ***************
 *** 34,49 ****
  ;;; Bogus stub definitions for low-level match routines:
 ! (define regexp? string?)
 ! (define (make-regexp str) str)
 ! (define (regexp-exec regexp str . maybe-start)
    (let ((start (optional-arg maybe-start 0))
  	(start-vec (make-vector 10))
  	(end-vec (make-vector 10)))
 !     (and (%regexp-match regexp str start start-vec end-vec)
 ! 	 (make-regexp-match str start-vec end-vec))))
 ! 
  ;;; Convert a string into a regex pattern that matches that string exactly --
  ;;; in other words, quote the special chars with backslashes.
 --- 34,53 ----
  ;;; Bogus stub definitions for low-level match routines:
 ! (define-record iregexp
 !   string)
 ! (define regexp? iregexp?)
 ! 
 ! (define (make-regexp str) 
 !   (make-iregexp (compile-regexp str)))
 ! 
 ! (define (regexp-exec r s . maybe-start)
    (let ((start (optional-arg maybe-start 0))
  	(start-vec (make-vector 10))
  	(end-vec (make-vector 10)))
 !     (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
 ! 	 (make-regexp-match s start-vec end-vec))))
  ;;; Convert a string into a regex pattern that matches that string exactly --
  ;;; in other words, quote the special chars with backslashes.
 ***************
 *** 58,75 ****
  		  (cons #\\ result)
  		  result))))))
 ! (define-foreign %regexp-match/errno (reg_match (string regexp)
 ! 					       (string s)
 ! 					       (integer start)
 ! 					       (vector-desc start-vec)
 ! 					       (vector-desc end-vec))
 !   static-string ; Error string or #f if all is ok.
 !   bool)		; match?
 ! 
 ! (define (%regexp-match regexp string start start-vec end-vec)
 !   (receive (err match?) (%regexp-match/errno regexp string start
 ! 					     start-vec end-vec)
 !     (if err (error err %regexp-match regexp string start) match?)))
  ;;; I do this one in C, I'm not sure why:
 --- 62,79 ----
  		  (cons #\\ result)
  		  result))))))
 ! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
 ! ;;;					       (string s)
 ! ;;;					       (integer start)
 ! ;;;					       (vector-desc start-vec)
 ! ;;;					       (vector-desc end-vec))
 ! ;;;  static-string ; Error string or #f if all is ok.
 ! ;;;  bool)		; match?
 ! 
 ! ;;;(define (%regexp-match regexp string start start-vec end-vec)
 ! ;;;  (receive (err match?) (%regexp-match/errno regexp string start
 ! ;;;					     start-vec end-vec)
 ! ;;;    (if err (error err %regexp-match regexp string start) match?)))
  ;;; I do this one in C, I'm not sure why:
 ***************
 *** 79,81 ****
 --- 83,166 ----
    (filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
    static-string	; error message -- #f if no error.
    integer)	; number of files that pass the filter.
 + 
 + ;;; precompiled regexps.
 + 
 + (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
 +   static-string
 +   integer)
 + 
 + (define-foreign %regexp-compile (reg_comp_comp (string regexp)
 + 					       (string-desc re-buf))
 +   static-string)
 + 
 + (define (%regexp-exec-1 r s start sv ev)
 +   (receive (err match?) (%regexp-exec r s start sv ev)
 + 	   (if err (error err s start)
 + 	       match?)))
 + 
 + (define-foreign %regexp-exec (reg_exec (string-desc regexp)
 + 				       (string s)
 + 				       (integer start)
 + 				       (vector-desc start-vec)
 + 				       (vector-desc end-vec))
 +   static-string
 +   bool)
 + 
 + 
 + (define (compile-regexp e)
 +   (receive (err len)
 + 	   (%regexp-compiled-length e)
 + 	   (if err (error err e)
 + 	       (let ((buf (make-string len)))
 + 		 (%regexp-compile e buf)
 + 		 buf))))
 + 
 + 
 + 
 + (define-foreign %regexp-subst (reg_subst (string-desc regexp)
 + 					 (string m)
 + 					 (string s)
 + 					 (integer start)
 + 					 (vector-desc start-vec)
 + 					 (vector-desc end-vec)
 + 					 (string-desc outbuf))
 +   static-string
 +   integer)
 + 
 + (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
 + 						 (string m)
 + 						 (string s)
 + 						 (integer start)
 + 						 (vector-desc start-vec)
 + 						 (vector-desc end-vec))
 +   static-string
 +   integer)
 + 
 + 
 + (define (regexp-subst re match replacement)
 +   (let ((cr (iregexp:string re))
 + 	(matchstr (regexp-match:string match))
 + 	(startvec (regexp-match:start match))
 + 	(endvec (regexp-match:end match)))
 +     (receive (err outlen)
 + 	     (%regexp-subst-len cr
 + 				matchstr
 + 				replacement
 + 				0
 + 				startvec
 + 				endvec)
 + 	     (if err (error err matchstr replacement)
 + 		 (let ((outbuf (make-string outlen)))
 + 		   (receive (err outlen)
 + 			    (%regexp-subst cr
 + 					   matchstr
 + 					   replacement
 + 					   0
 + 					   startvec
 + 					   endvec
 + 					   outbuf)
 + 			    (if err (error err matchstr replacement)
 + 				(substring outbuf 0 outlen))))))))
 + 
 + 		   
 \ No newline at end of file
 diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
 *** scsh-0.4.2/scsh/re1.c	Fri Oct 27 04:58:58 1995
 --- scsh-0.4.2-regexp/scsh/re1.c	Sat Apr  6 21:01:15 1996
 ***************
 *** 19,24 ****
 --- 19,150 ----
  /* Stash error msg in global. */
  void regerror(char *msg) {regexp_error = msg;}
 + /*
 + ** Return NULL normally, error string on error.
 + ** Stash number of bytes needed for compiled regexp into `*len'
 + */
 + 
 + char *reg_comp_len(const char *re, int *len)
 + {
 +     int l;
 + 
 +     regexp_error = NULL;
 +     *len = regcomp_len(re); 
 +     return regexp_error;
 + }
 + 
 + /*
 + ** Return NULL normally, error string on error.
 + ** Compile regexp into string described by `cr'.
 + */
 + 
 + char *reg_comp_comp(const char *re, scheme_value cr) 
 + {
 +     int len = STRING_LENGTH(cr);
 +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 + 
 +     regexp_error = NULL;
 +     r = regcomp_comp(re, r, len); 
 +     return regexp_error;
 + }
 + 
 + /* Return NULL normally, error string on error.
 + ** Stash match info in start_vec and end_vec.
 + ** Returns boolean match/no-match in hit.
 + */
 + 
 + char *reg_exec(scheme_value cr, const char *string, int start,
 + 	       scheme_value start_vec, scheme_value end_vec,  int *hit)
 + {
 +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 + 
 +     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 + 	return "Illegal start vector";
 + 	}
 +     
 +     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 + 	return "Illegal end vector";
 + 	}
 + 
 +     regexp_error = 0;
 +     *hit = 0;
 +     
 +     if( regexec(r, string+start) ) {
 + 	int i;
 + 	for(i=0; i<NSUBEXP; i++) {
 + 	    const char *s = r->startp[i];
 + 	    const char *e = r->endp[i];
 + 	    VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
 + 	    VECTOR_REF(end_vec,i)   = e?ENTER_FIXNUM(e - string):SCHFALSE;
 + 	    r->startp[i] = NULL;
 + 	    r->endp[i] = NULL;
 + 	    }
 + 	*hit = 1;
 + 	}
 +     return regexp_error;
 + }
 + 
 + char *reg_subst(scheme_value cr, const char *match,
 + 		const char *src, int start,
 + 		scheme_value start_vec, scheme_value end_vec,
 + 		scheme_value outbuf, int *len)
 + {
 +     int i;
 +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 + 
 +     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 + 	return "Illegal start vector";
 + 	}
 +     
 +     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 + 	return "Illegal end vector";
 + 	}
 + 
 +     for (i=0; i<NSUBEXP; i++) 
 +     {
 + 	scheme_value se = VECTOR_REF(start_vec, i);
 + 	scheme_value ee = VECTOR_REF(end_vec, i);
 + 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
 + 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
 +     }
 +     
 +     regexp_error = NULL;
 +     regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
 +     *len = strlen(&STRING_REF(outbuf, 0));
 +     return regexp_error;
 + }
 + 
 + char *reg_subst_len(scheme_value cr, const char *match,
 + 		    const char *src, int start,
 + 		    scheme_value start_vec, scheme_value end_vec,
 + 		    int *len)
 + {
 +     int i;
 +     regexp *r = (regexp *)&STRING_REF(cr, 0);
 + 
 +     if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
 + 	return "Illegal start vector";
 + 	}
 +     
 +     if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
 + 	return "Illegal end vector";
 + 	}
 + 
 +     for (i=0; i<NSUBEXP; i++) 
 +     {
 + 	scheme_value se = VECTOR_REF(start_vec, i);
 + 	scheme_value ee = VECTOR_REF(end_vec, i);
 + 	r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
 + 	r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
 +     }
 +     
 +     regexp_error = NULL;
 +     *len = regsublen (r, src);
 +     return regexp_error;
 + }
 + 
 + 
 + #if 0
  /* Return NULL normally, error string on error.
  ** Stash match info in start_vec and end_vec.
  ** Returns boolean match/no-match in hit.
 ***************
 *** 56,61 ****
 --- 182,188 ----
      Free(prog);
      return regexp_error;
      }
 + #endif
  char *filter_stringvec(const char *re, char const **stringvec,  int *nummatch)
 diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
 *** scsh-0.4.2/scsh/re1.h	Sun Oct 22 08:34:34 1995
 --- scsh-0.4.2-regexp/scsh/re1.h	Sat Apr  6 17:54:09 1996
 ***************
 *** 1,6 ****
 --- 1,21 ----
 + #if 0
  char *reg_match(const char *re, const char *string, int start,
  		scheme_value start_vec, scheme_value end_vec,
  		int *hit);
 + #endif
  char *filter_stringvec(const char *re, char const **stringvec,
  		       int *nummatch);
 + 
 + char *reg_comp_len(const char *re, int *len);
 + char *reg_comp_comp(const char *re, scheme_value cr);
 + 
 + char *reg_exec(scheme_value cr, const char *string, int start,
 + 	       scheme_value start_vec, scheme_value end_vec,  int *hit);
 + 
 + char *reg_subst(scheme_value cr, const char *match,
 + 		const char *src, int start,
 + 		scheme_value start_vec, scheme_value end_vec,
 + 		scheme_value outbuf, int *len);
 + 
 + 
 Only in scsh-0.4.2-regexp/scsh: re2.scm
 diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
 *** scsh-0.4.2/scsh/scsh-interfaces.scm	Tue Oct 31 19:19:30 1995
 --- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm	Sat Apr  6 18:48:12 1996
 ***************
 *** 413,418 ****
 --- 413,419 ----
  	  make-regexp
  	  regexp?
  	  regexp-exec
 + 	  regexp-subst
  	  regexp-quote))
 regexp library changes:
 *** Makefile	1996/04/06 19:24:49	1.1
 --- Makefile	1996/04/06 20:46:26
 ***************
 *** 5,11 ****
  # Things you might want to put in TEST:
  # -DDEBUG		debugging hooks
  # -I.			regexp.h from current directory, not /usr/include
 ! TEST=-I.
  # Things you might want to put in PROF:
  # -pg			profiler
 --- 5,11 ----
  # Things you might want to put in TEST:
  # -DDEBUG		debugging hooks
  # -I.			regexp.h from current directory, not /usr/include
 ! TEST=-I. -DDEBUG
  # Things you might want to put in PROF:
  # -pg			profiler
 *** regexp.c	1996/04/06 19:24:49	1.1
 --- regexp.c	1996/04/06 22:34:55
 ***************
 *** 105,110 ****
 --- 105,111 ----
   * Utility definitions.
   */
  #define	FAIL(m)		{ regerror(m); return(NULL); }
 + #define	FAILN(m)	{ regerror(m); return(-1); }
  #define	ISREPN(c)	((c) == '*' || (c) == '+' || (c) == '?')
  #define	META		"^$.[()|?+*\\"
 ***************
 *** 162,173 ****
  const char *exp;
  {
  	register regexp *r;
 ! 	register char *scan;
  	int flags;
  	struct comp co;
  	if (exp == NULL)
 ! 		FAIL("NULL argument to regcomp");
  	/* First pass: determine size, legality. */
  	co.regparse = (char *)exp;
 --- 163,193 ----
  const char *exp;
  {
  	register regexp *r;
 ! 	size_t len;
 ! 
 ! 	len = regcomp_len(exp);
 ! 	if (len <= 0)
 ! 	        return NULL;
 ! 
 ! 	/* Allocate space. */
 ! 	r = (regexp *)malloc(len);
 ! 
 ! 	if (r == NULL)
 ! 		FAIL("out of space");
 ! 	return regcomp_comp(exp, r, len);
 ! }
 ! 
 ! 
 ! size_t
 ! regcomp_len(exp)
 ! const char *exp;
 ! {
  	int flags;
 + 	register regexp *r;
  	struct comp co;
  	if (exp == NULL)
 ! 		FAILN("NULL argument to regcomp");
  	/* First pass: determine size, legality. */
  	co.regparse = (char *)exp;
 ***************
 *** 178,198 ****
  	co.regcode = co.regdummy;
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
 ! 		return(NULL);
  	/* Small enough for pointer-storage convention? */
  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
 ! 		FAIL("regexp too big");
 ! 	/* Allocate space. */
 ! 	r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
 ! 	if (r == NULL)
 ! 		FAIL("out of space");
  	/* Second pass: emit code. */
  	co.regparse = (char *)exp;
  	co.regnpar = 1;
  	co.regcode = r->program;
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
  		return(NULL);
 --- 198,228 ----
  	co.regcode = co.regdummy;
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
 ! 		return -1;
  	/* Small enough for pointer-storage convention? */
  	if (co.regsize >= 0x7fffL)	/* Probably could be 0xffffL. */
 ! 		FAILN("regexp too big");
 ! 	return (sizeof(regexp) + (size_t)co.regsize);
 ! }
 ! 
 ! 
 ! regexp *
 ! regcomp_comp(exp, r, len)
 ! const char *exp;
 ! register regexp *r;
 ! size_t len;
 ! {
 ! 	register char *scan;
 ! 	int flags;
 ! 	struct comp co;
  	/* Second pass: emit code. */
  	co.regparse = (char *)exp;
  	co.regnpar = 1;
  	co.regcode = r->program;
 + 	co.regsize = len - sizeof(regexp);
  	regc(&co, MAGIC);
  	if (reg(&co, 0, &flags) == NULL)
  		return(NULL);
 ***************
 *** 200,206 ****
  	/* Dig out information for optimizations. */
  	r->regstart = '\0';		/* Worst-case defaults. */
  	r->reganch = 0;
 ! 	r->regmust = NULL;
  	r->regmlen = 0;
  	scan = r->program+1;		/* First BRANCH. */
  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
 --- 230,236 ----
  	/* Dig out information for optimizations. */
  	r->regstart = '\0';		/* Worst-case defaults. */
  	r->reganch = 0;
 ! 	r->regmust = 0;
  	r->regmlen = 0;
  	scan = r->program+1;		/* First BRANCH. */
  	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
 ***************
 *** 229,235 ****
  					longest = OPERAND(scan);
  					len = strlen(OPERAND(scan));
  				}
 ! 			r->regmust = longest;
  			r->regmlen = (int)len;
  		}
  	}
 --- 259,265 ----
  					longest = OPERAND(scan);
  					len = strlen(OPERAND(scan));
  				}
 ! 			r->regmust = longest - r->program;
  			r->regmlen = (int)len;
  		}
  	}
 ***************
 *** 648,655 ****
  struct exec {
  	char *reginput;		/* String-input pointer. */
  	char *regbol;		/* Beginning of input, for ^ check. */
 ! 	char **regstartp;	/* Pointer to startp array. */
 ! 	char **regendp;		/* Ditto for endp. */
  };
  /*
 --- 678,685 ----
  struct exec {
  	char *reginput;		/* String-input pointer. */
  	char *regbol;		/* Beginning of input, for ^ check. */
 ! 	const char **regstartp;	/* Pointer to startp array. */
 ! 	const char **regendp;		/* Ditto for endp. */
  };
  /*
 ***************
 *** 690,696 ****
  	}
  	/* If there is a "must appear" string, look for it. */
 ! 	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
  		return(0);
  	/* Mark beginning of line for ^ . */
 --- 720,727 ----
  	}
  	/* If there is a "must appear" string, look for it. */
 ! 	if ((prog->regmlen > 0) &&
 ! 	    strstr(string, &prog->program[prog->regmust]) == NULL)
  		return(0);
  	/* Mark beginning of line for ^ . */
 ***************
 *** 729,736 ****
  char *string;
  {
  	register int i;
 ! 	register char **stp;
 ! 	register char **enp;
  	ep->reginput = string;
 --- 760,767 ----
  char *string;
  {
  	register int i;
 ! 	register const char **stp;
 ! 	register const char **enp;
  	ep->reginput = string;
 ***************
 *** 1004,1011 ****
  		printf("start `%c' ", r->regstart);
  	if (r->reganch)
  		printf("anchored ");
 ! 	if (r->regmust != NULL)
 ! 		printf("must have \"%s\"", r->regmust);
  	printf("\n");
  }
 --- 1035,1042 ----
  		printf("start `%c' ", r->regstart);
  	if (r->reganch)
  		printf("anchored ");
 ! 	if (r->regmlen > 0)
 ! 		printf("must have \"%s\"", &r->program[r->regmust]);
  	printf("\n");
  }
 *** regexp.h	1996/04/06 19:24:49	1.1
 --- regexp.h	1996/04/07 01:52:19
 ***************
 *** 6,16 ****
   */
  #define NSUBEXP  10
  typedef struct regexp {
 ! 	char *startp[NSUBEXP];
 ! 	char *endp[NSUBEXP];
  	char regstart;		/* Internal use only. */
  	char reganch;		/* Internal use only. */
 ! 	char *regmust;		/* Internal use only. */
  	int regmlen;		/* Internal use only. */
  	char program[1];	/* Unwarranted chumminess with compiler. */
  } regexp;
 --- 6,16 ----
   */
  #define NSUBEXP  10
  typedef struct regexp {
 ! 	const char *startp[NSUBEXP];
 ! 	const char *endp[NSUBEXP];
  	char regstart;		/* Internal use only. */
  	char reganch;		/* Internal use only. */
 ! 	int regmust;		/* Internal use only. */
  	int regmlen;		/* Internal use only. */
  	char program[1];	/* Unwarranted chumminess with compiler. */
  } regexp;
 ***************
 *** 18,21 ****
 --- 18,27 ----
  extern regexp *regcomp(const char *re);
  extern int regexec(regexp *rp, const char *s);
  extern void regsub(const regexp *rp, const char *src, char *dst);
 + extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
 + extern size_t regsublen(const regexp *rp, const char *src);
 + 
  extern void regerror(char *message);
 + extern size_t regcomp_len(const char *exp);
 + extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
 + 
 *** regsub.c	1996/04/06 19:24:49	1.1
 --- regsub.c	1996/04/07 02:10:29
 ***************
 *** 11,25 ****
  /*
   - regsub - perform substitutions after a regexp match
   */
  void
 ! regsub(rp, source, dest)
  const regexp *rp;
  const char *source;
  char *dest;
  {
  	register regexp * const prog = (regexp *)rp;
 ! 	register char *src = (char *)source;
  	register char *dst = dest;
  	register char c;
  	register int no;
  	register size_t len;
 --- 11,42 ----
  /*
   - regsub - perform substitutions after a regexp match
   */
 + 
 + void regsub(rp, source, dest)
 + const regexp *rp;
 + const char *source;
 + char *dest;
 + {
 +         regnsub(rp, source, dest, BUFSIZ);
 + }
 + 
 + 
 + 
 + /*
 +  - regnsub - perform bounds-checked substitutions after a regexp match
 +  */
  void
 ! regnsub(rp, source, dest, destlen)
  const regexp *rp;
  const char *source;
  char *dest;
 + size_t destlen;
  {
  	register regexp * const prog = (regexp *)rp;
 ! 	register const char *src = (char *)source;
  	register char *dst = dest;
 + 	char *dstend = dest + destlen;
 + 	char *odst;
  	register char c;
  	register int no;
  	register size_t len;
 ***************
 *** 45,55 ****
  			if (c == '\\' && (*src == '\\' || *src == '&'))
  				c = *src++;
  			*dst++ = c;
  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 ! 					prog->endp[no] > prog->startp[no]) {
  			len = prog->endp[no] - prog->startp[no];
 ! 			(void) strncpy(dst, prog->startp[no], len);
  			dst += len;
  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
  				regerror("damaged match string");
  				return;
 --- 62,83 ----
  			if (c == '\\' && (*src == '\\' || *src == '&'))
  				c = *src++;
  			*dst++ = c;
 + 			if (dst >= dstend) 
 + 			{
 + 			    	regerror("output buffer too small");
 + 				return;
 + 			}
  		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 ! 			   prog->endp[no] > prog->startp[no]) {
  			len = prog->endp[no] - prog->startp[no];
 ! 			odst = dst;
  			dst += len;
 + 			if (dst >= dstend) 
 + 			{
 + 			    	regerror("output buffer too small");
 + 				return;
 + 			}
 + 			(void) strncpy(odst, prog->startp[no], len);
  			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
  				regerror("damaged match string");
  				return;
 ***************
 *** 58,60 ****
 --- 86,131 ----
  	}
  	*dst++ = '\0';
  }
 + 
 + size_t regsublen(rp, source)
 + const regexp *rp;
 + const char *source;
 + {
 +     register regexp * const prog = (regexp *)rp;
 +     register char *src = (char *)source;
 +     register char c;
 +     register int no;
 +     register int len = 0;
 + 	
 +     if (prog == NULL || source == NULL) {
 + 	regerror("NULL parameter to regsublen");
 + 	return -1;
 +     }
 +     
 +     if ((unsigned char)*(prog->program) != MAGIC) {
 + 	regerror("damaged regexp");
 + 	return -1;
 +     }
 +     while ((c = *src++) != '\0') {
 + 	if (c == '&')
 + 	    no = 0;
 + 	else if (c == '\\' && isdigit(*src))
 + 	    no = *src++ - '0';
 + 	else
 + 	    no = -1;
 + 	if (no < 0) {		/* Ordinary character. */
 + 	    if (c == '\\' && (*src == '\\' || *src == '&'))
 + 		src++;
 + 	    len++;
 + 	} else {
 + 	    const char *s = prog->startp[no];
 + 	    const char *e = prog->endp[no];
 + 	    if ((s != NULL) && (e != NULL) && (e > s)) {
 + 		len += e-s;
 + 	    }
 + 	}
 +     }
 +     return len+1;
 + }
 + 
 + 
 Original regexp code from henry:
 [unpacked & deleted -Olin]
--- a/scsh/regexp/regcomp.ih
+++ b/scsh/regexp/regcomp.ih
@ -1,51 +0,0 @@
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* === regcomp.c === */
 static void p_ere(register struct parse *p, int stop);
 static void p_ere_exp(register struct parse *p);
 static void p_str(register struct parse *p);
 static void p_bre(register struct parse *p, register int end1, register int end2);
 static int p_simp_re(register struct parse *p, int starordinary);
 static int p_count(register struct parse *p);
 static void p_bracket(register struct parse *p);
 static void p_b_term(register struct parse *p, register cset *cs);
 static void p_b_cclass(register struct parse *p, register cset *cs);
 static void p_b_eclass(register struct parse *p, register cset *cs);
 static char p_b_symbol(register struct parse *p);
 static char p_b_coll_elem(register struct parse *p, int endc);
 static char othercase(int ch);
 static void bothcases(register struct parse *p, int ch);
 static void ordinary(register struct parse *p, register int ch);
 static void nonnewline(register struct parse *p);
 static void repeat(register struct parse *p, sopno start, int from, int to);
 static int seterr(register struct parse *p, int e);
 static cset *allocset(register struct parse *p);
 static void freeset(register struct parse *p, register cset *cs);
 static int freezeset(register struct parse *p, register cset *cs);
 static int firstch(register struct parse *p, register cset *cs);
 static int nch(register struct parse *p, register cset *cs);
 static void mcadd(register struct parse *p, register cset *cs, register char *cp);
 static void mcsub(register cset *cs, register char *cp);
 static int mcin(register cset *cs, register char *cp);
 static char *mcfind(register cset *cs, register char *cp);
 static void mcinvert(register struct parse *p, register cset *cs);
 static void mccase(register struct parse *p, register cset *cs);
 static int isinsets(register struct re_guts *g, int c);
 static int samesets(register struct re_guts *g, int c1, int c2);
 static void categorize(struct parse *p, register struct re_guts *g);
 static sopno dupl(register struct parse *p, sopno start, sopno finish);
 static void doemit(register struct parse *p, sop op, size_t opnd);
 static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
 static void dofwd(register struct parse *p, sopno pos, sop value);
 static void enlarge(register struct parse *p, sopno size);
 static void stripsnug(register struct parse *p, register struct re_guts *g);
 static void findmust(register struct parse *p, register struct re_guts *g);
 static sopno pluscount(register struct parse *p, register struct re_guts *g);
 #ifdef __cplusplus
 }
 #endif
 /* ========= end header generated by ./mkh ========= */
--- a/scsh/regexp/regerror.c
+++ b/scsh/regexp/regerror.c
@ -1,18 +1,126 @@
-/*
+#include <sys/types.h>
 * regerror
 */
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <regex.h>
-void
+#include "utils.h"
-regerror(s)
+#include "regerror.ih"
-char *s;
+
 /*
 = #define	REG_OKAY	 0
 = #define	REG_NOMATCH	 1
 = #define	REG_BADPAT	 2
 = #define	REG_ECOLLATE	 3
 = #define	REG_ECTYPE	 4
 = #define	REG_EESCAPE	 5
 = #define	REG_ESUBREG	 6
 = #define	REG_EBRACK	 7
 = #define	REG_EPAREN	 8
 = #define	REG_EBRACE	 9
 = #define	REG_BADBR	10
 = #define	REG_ERANGE	11
 = #define	REG_ESPACE	12
 = #define	REG_BADRPT	13
 = #define	REG_EMPTY	14
 = #define	REG_ASSERT	15
 = #define	REG_INVARG	16
 = #define	REG_ATOI	255	// convert name to number (!)
 = #define	REG_ITOA	0400	// convert number to name (!)
 */
 static struct rerr {
 	int code;
 	char *name;
 	char *explain;
 } rerrs[] = {
 	REG_OKAY,	"REG_OKAY",	"no errors detected",
 	REG_NOMATCH,	"REG_NOMATCH",	"regexec() failed to match",
 	REG_BADPAT,	"REG_BADPAT",	"invalid regular expression",
 	REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element",
 	REG_ECTYPE,	"REG_ECTYPE",	"invalid character class",
 	REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)",
 	REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number",
 	REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced",
 	REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced",
 	REG_EBRACE,	"REG_EBRACE",	"braces not balanced",
 	REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)",
 	REG_ERANGE,	"REG_ERANGE",	"invalid character range",
 	REG_ESPACE,	"REG_ESPACE",	"out of memory",
 	REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid",
 	REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression",
 	REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug",
 	REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine",
 	-1,		"",		"*** unknown regexp error code ***",
 };
 /*
 - regerror - the interface to error numbers
 = extern size_t regerror(int, const regex_t *, char *, size_t);
 */
 /* ARGSUSED */
 size_t
 regerror(errcode, preg, errbuf, errbuf_size)
 int errcode;
 const regex_t *preg;
 char *errbuf;
 size_t errbuf_size;
 {
-#ifdef ERRAVAIL
+	register struct rerr *r;
-	error("regexp: %s", s);
+	register size_t len;
-#else
+	register int target = errcode &~ REG_ITOA;
-	fprintf(stderr, "regexp(3): %s\n", s);
+	register char *s;
-	exit(EXIT_FAILURE);
+	char convbuf[50];
-#endif
+
-	/* NOTREACHED */
+	if (errcode == REG_ATOI)
 		s = regatoi(preg, convbuf);
 	else {
 		for (r = rerrs; r->code >= 0; r++)
 			if (r->code == target)
 				break;
 		if (errcode&REG_ITOA) {
 			if (r->code >= 0)
 				(void) strcpy(convbuf, r->name);
 			else
 				sprintf(convbuf, "REG_0x%x", target);
 			assert(strlen(convbuf) < sizeof(convbuf));
 			s = convbuf;
 		} else
 			s = r->explain;
 	}
 	len = strlen(s) + 1;
 	if (errbuf_size > 0) {
 		if (errbuf_size > len)
 			(void) strcpy(errbuf, s);
 		else {
 			(void) strncpy(errbuf, s, errbuf_size-1);
 			errbuf[errbuf_size-1] = '\0';
 		}
 	}
 	return(len);
 }
 /*
 - regatoi - internal routine to implement REG_ATOI
 == static char *regatoi(const regex_t *preg, char *localbuf);
 */
 static char *
 regatoi(preg, localbuf)
 const regex_t *preg;
 char *localbuf;
 {
 	register struct rerr *r;
 	for (r = rerrs; r->code >= 0; r++)
 		if (strcmp(r->name, preg->re_endp) == 0)
 			break;
 	if (r->code < 0)
 		return("0");
 	sprintf(localbuf, "%d", r->code);
 	return(localbuf);
 }
--- a/scsh/regexp/regerror.ih
+++ b/scsh/regexp/regerror.ih
@ -1,12 +0,0 @@
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* === regerror.c === */
 static char *regatoi(const regex_t *preg, char *localbuf);
 #ifdef __cplusplus
 }
 #endif
 /* ========= end header generated by ./mkh ========= */
--- a/scsh/regexp/regex.h
+++ b/scsh/regexp/regex.h
@ -1,74 +0,0 @@
 #ifndef _REGEX_H_
 #define	_REGEX_H_	/* never again */
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* === regex2.h === */
 typedef off_t regoff_t;
 typedef struct {
 	int re_magic;
 	size_t re_nsub;		/* number of parenthesized subexpressions */
 	const char *re_endp;	/* end pointer for REG_PEND */
 	struct re_guts *re_g;	/* none of your business :-) */
 } regex_t;
 typedef struct {
 	regoff_t rm_so;		/* start of match */
 	regoff_t rm_eo;		/* end of match */
 } regmatch_t;
 /* === regcomp.c === */
 extern int regcomp(regex_t *, const char *, int);
 #define	REG_BASIC	0000
 #define	REG_EXTENDED	0001
 #define	REG_ICASE	0002
 #define	REG_NOSUB	0004
 #define	REG_NEWLINE	0010
 #define	REG_NOSPEC	0020
 #define	REG_PEND	0040
 #define	REG_DUMP	0200
 /* === regerror.c === */
 #define	REG_OKAY	 0
 #define	REG_NOMATCH	 1
 #define	REG_BADPAT	 2
 #define	REG_ECOLLATE	 3
 #define	REG_ECTYPE	 4
 #define	REG_EESCAPE	 5
 #define	REG_ESUBREG	 6
 #define	REG_EBRACK	 7
 #define	REG_EPAREN	 8
 #define	REG_EBRACE	 9
 #define	REG_BADBR	10
 #define	REG_ERANGE	11
 #define	REG_ESPACE	12
 #define	REG_BADRPT	13
 #define	REG_EMPTY	14
 #define	REG_ASSERT	15
 #define	REG_INVARG	16
 #define	REG_ATOI	255	/* convert name to number (!) */
 #define	REG_ITOA	0400	/* convert number to name (!) */
 extern size_t regerror(int, const regex_t *, char *, size_t);
 /* === regexec.c === */
 extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
 #define	REG_NOTBOL	00001
 #define	REG_NOTEOL	00002
 #define	REG_STARTEND	00004
 #define	REG_TRACE	00400	/* tracing of execution */
 #define	REG_LARGE	01000	/* force large representation */
 #define	REG_BACKR	02000	/* force use of backref code */
 /* === regfree.c === */
 extern void regfree(regex_t *);
 #ifdef __cplusplus
 }
 #endif
 /* ========= end header generated by ./mkh ========= */
 #endif
--- a/scsh/regexp/regex2.h
+++ b/scsh/regexp/regex2.h
@ -36,36 +36,36 @@
 * In state representations, an operator's bit is on to signify a state
 * immediately *preceding* "execution" of that operator.
 */
-typedef unsigned long sop;	/* strip operator */
+typedef long sop;		/* strip operator */
 typedef long sopno;
-#define	OPRMASK	0xf8000000
+#define	OPRMASK	0x7c000000
-#define	OPDMASK	0x07ffffff
+#define	OPDMASK	0x03ffffff
-#define	OPSHIFT	((unsigned)27)
+#define	OPSHIFT	(26)
 #define	OP(n)	((n)&OPRMASK)
 #define	OPND(n)	((n)&OPDMASK)
 #define	SOP(op, opnd)	((op)|(opnd))
 /* operators			   meaning	operand			*/
 /*						(back, fwd are offsets)	*/
-#define	OEND	(1ul<<OPSHIFT)	/* endmarker	-			*/
+#define	OEND	(1<<OPSHIFT)	/* endmarker	-			*/
-#define	OCHAR	(2ul<<OPSHIFT)	/* character	unsigned char		*/
+#define	OCHAR	(2<<OPSHIFT)	/* character	unsigned char		*/
-#define	OBOL	(3ul<<OPSHIFT)	/* left anchor	-			*/
+#define	OBOL	(3<<OPSHIFT)	/* left anchor	-			*/
-#define	OEOL	(4ul<<OPSHIFT)	/* right anchor	-			*/
+#define	OEOL	(4<<OPSHIFT)	/* right anchor	-			*/
-#define	OANY	(5ul<<OPSHIFT)	/* .		-			*/
+#define	OANY	(5<<OPSHIFT)	/* .		-			*/
-#define	OANYOF	(6ul<<OPSHIFT)	/* [...]	set number		*/
+#define	OANYOF	(6<<OPSHIFT)	/* [...]	set number		*/
-#define	OBACK_	(7ul<<OPSHIFT)	/* begin \d	paren number		*/
+#define	OBACK_	(7<<OPSHIFT)	/* begin \d	paren number		*/
-#define	O_BACK	(8ul<<OPSHIFT)	/* end \d	paren number		*/
+#define	O_BACK	(8<<OPSHIFT)	/* end \d	paren number		*/
-#define	OPLUS_	(9ul<<OPSHIFT)	/* + prefix	fwd to suffix		*/
+#define	OPLUS_	(9<<OPSHIFT)	/* + prefix	fwd to suffix		*/
-#define	O_PLUS	(10ul<<OPSHIFT)	/* + suffix	back to prefix		*/
+#define	O_PLUS	(10<<OPSHIFT)	/* + suffix	back to prefix		*/
-#define	OQUEST_	(11ul<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
+#define	OQUEST_	(11<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
-#define	O_QUEST	(12ul<<OPSHIFT)	/* ? suffix	back to prefix		*/
+#define	O_QUEST	(12<<OPSHIFT)	/* ? suffix	back to prefix		*/
-#define	OLPAREN	(13ul<<OPSHIFT)	/* (		fwd to )		*/
+#define	OLPAREN	(13<<OPSHIFT)	/* (		fwd to )		*/
-#define	ORPAREN	(14ul<<OPSHIFT)	/* )		back to (		*/
+#define	ORPAREN	(14<<OPSHIFT)	/* )		back to (		*/
-#define	OCH_	(15ul<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
+#define	OCH_	(15<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
-#define	OOR1	(16ul<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
+#define	OOR1	(16<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
-#define	OOR2	(17ul<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
+#define	OOR2	(17<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
-#define	O_CH	(18ul<<OPSHIFT)	/* end choice	back to OOR1		*/
+#define	O_CH	(18<<OPSHIFT)	/* end choice	back to OOR1		*/
-#define	OBOW	(19ul<<OPSHIFT)	/* begin word	-			*/
+#define	OBOW	(19<<OPSHIFT)	/* begin word	-			*/
-#define	OEOW	(20ul<<OPSHIFT)	/* end word	-			*/
+#define	OEOW	(20<<OPSHIFT)	/* end word	-			*/
 /*
 * Structure for [] character-set representation.  Character sets are
--- a/scsh/regexp/regexec.c
+++ b/scsh/regexp/regexec.c
@ -19,27 +19,27 @@
 static int nope = 0;		/* for use in asserts; shuts lint up */
 /* macros for manipulating states, small version */
-#define	states	long
+#define	states	unsigned
-#define	states1	states		/* for later use in regexec() decision */
+#define	states1	unsigned	/* for later use in regexec() decision */
 #define	CLEAR(v)	((v) = 0)
-#define	SET0(v, n)	((v) &= ~(1ul << (n)))
+#define	SET0(v, n)	((v) &= ~((unsigned)1 << (n)))
-#define	SET1(v, n)	((v) |= 1ul << (n))
+#define	SET1(v, n)	((v) |= (unsigned)1 << (n))
-#define	ISSET(v, n)	((v) & (1ul << (n)))
+#define	ISSET(v, n)	((v) & ((unsigned)1 << (n)))
 #define	ASSIGN(d, s)	((d) = (s))
 #define	EQ(a, b)	((a) == (b))
 #define	STATEVARS	int dummy	/* dummy version */
 #define	STATESETUP(m, n)	/* nothing */
 #define	STATETEARDOWN(m)	/* nothing */
 #define	SETUP(v)	((v) = 0)
-#define	onestate	long
+#define	onestate	unsigned
-#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
+#define	INIT(o, n)	((o) = (unsigned)1 << (n))
 #define	INC(o)	((o) <<= 1)
 #define	ISSTATEIN(v, o)	((v) & (o))
 /* some abbreviations; note that some of these know variable names! */
 /* do "if I'm here, I can also be there" etc without branches */
-#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
+#define	FWD(dst, src, n)	((dst) |= ((unsigned)(src)&(here)) << (n))
-#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
+#define	BACK(dst, src, n)	((dst) |= ((unsigned)(src)&(here)) >> (n))
-#define	ISSETBACK(v, n)	((v) & ((unsigned long)here >> (n)))
+#define	ISSETBACK(v, n)	((v) & ((unsigned)here >> (n)))
 /* function names */
 #define SNAMES			/* engine.c looks after details */
--- a/scsh/regexp/regexp.3
+++ b/scsh/regexp/regexp.3
@ -1,186 +0,0 @@
 .TH REGEXP 3 "2 Sept 1995"
 .SH NAME
 regcomp, regexec, regsub, regerror \- regular expression handler
 .SH SYNOPSIS
 .ft B
 .nf
 #include <regexp.h>
 regexp *regcomp(exp)
 const char *exp;
 int regexec(prog, string)
 regexp *prog;
 const char *string;
 void regsub(prog, source, dest)
 const regexp *prog;
 const char *source;
 char *dest;
 void regerror(msg)
 char *msg;
 .SH DESCRIPTION
 These functions implement
 .IR egrep (1)-style
 regular expressions and supporting facilities.
 .PP
 .I Regcomp
 compiles a regular expression into a structure of type
 .IR regexp ,
 and returns a pointer to it.
 The space has been allocated using
 .IR malloc (3)
 and may be released by
 .IR free .
 .PP
 .I Regexec
 matches a NUL-terminated \fIstring\fR against the compiled regular expression
 in \fIprog\fR.
 It returns 1 for success and 0 for failure, and adjusts the contents of
 \fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly.
 .PP
 The members of a
 .I regexp
 structure include at least the following (not necessarily in order):
 .PP
 .RS
 char *startp[NSUBEXP];
 .br
 char *endp[NSUBEXP];
 .RE
 .PP
 where
 .I NSUBEXP
 is defined (as 10) in the header file.
 Once a successful \fIregexec\fR has been done using the \fIregexp\fR,
 each \fIstartp\fR-\fIendp\fR pair describes one substring
 within the \fIstring\fR,
 with the \fIstartp\fR pointing to the first character of the substring and
 the \fIendp\fR pointing to the first character following the substring.
 The 0th substring is the substring of \fIstring\fR that matched the whole
 regular expression.
 The others are those substrings that matched parenthesized expressions
 within the regular expression, with parenthesized expressions numbered
 in left-to-right order of their opening parentheses.
 .PP
 .I Regsub
 copies \fIsource\fR to \fIdest\fR, making substitutions according to the
 most recent \fIregexec\fR performed using \fIprog\fR.
 Each instance of `&' in \fIsource\fR is replaced by the substring
 indicated by \fIstartp\fR[\fI0\fR] and
 \fIendp\fR[\fI0\fR].
 Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by
 the substring indicated by
 \fIstartp\fR[\fIn\fR] and
 \fIendp\fR[\fIn\fR].
 To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e';
 to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with
 another `\e'.
 .PP
 .I Regerror
 is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR,
 or \fIregsub\fR.
 The default \fIregerror\fR writes the string \fImsg\fR,
 with a suitable indicator of origin,
 on the standard
 error output
 and invokes \fIexit\fR(2).
 .I Regerror
 can be replaced by the user if other actions are desirable.
 .SH "REGULAR EXPRESSION SYNTAX"
 A regular expression is zero or more \fIbranches\fR, separated by `|'.
 It matches anything that matches one of the branches.
 .PP
 A branch is zero or more \fIpieces\fR, concatenated.
 It matches a match for the first, followed by a match for the second, etc.
 .PP
 A piece is an \fIatom\fR possibly followed by `*', `+', or `?'.
 An atom followed by `*' matches a sequence of 0 or more matches of the atom.
 An atom followed by `+' matches a sequence of 1 or more matches of the atom.
 An atom followed by `?' matches a match of the atom, or the null string.
 .PP
 An atom is a regular expression in parentheses (matching a match for the
 regular expression), a \fIrange\fR (see below), `.'
 (matching any single character), `^' (matching the null string at the
 beginning of the input string), `$' (matching the null string at the
 end of the input string), a `\e' followed by a single character (matching
 that character), or a single character with no other significance
 (matching that character).
 .PP
 A \fIrange\fR is a sequence of characters enclosed in `[]'.
 It normally matches any single character from the sequence.
 If the sequence begins with `^',
 it matches any single character \fInot\fR from the rest of the sequence.
 If two characters in the sequence are separated by `\-', this is shorthand
 for the full list of ASCII characters between them
 (e.g. `[0-9]' matches any decimal digit).
 To include a literal `]' in the sequence, make it the first character
 (following a possible `^').
 To include a literal `\-', make it the first or last character.
 .SH AMBIGUITY
 If a regular expression could match two different parts of the input string,
 it will match the one which begins earliest.
 If both begin in the same place but match different lengths, or match
 the same length in different ways, life gets messier, as follows.
 .PP
 In general, the possibilities in a list of branches are considered in
 left-to-right order, the possibilities for `*', `+', and `?' are
 considered longest-first, nested constructs are considered from the
 outermost in, and concatenated constructs are considered leftmost-first.
 The match that will be chosen is the one that uses the earliest
 possibility in the first choice that has to be made.
 If there is more than one choice, the next will be made in the same manner
 (earliest possibility) subject to the decision on the first choice.
 And so forth.
 .PP
 For example, `(ab|a)b*c' could match `abc' in one of two ways.
 The first choice is between `ab' and `a'; since `ab' is earlier, and does
 lead to a successful overall match, it is chosen.
 Since the `b' is already spoken for,
 the `b*' must match its last possibility\(emthe empty string\(emsince
 it must respect the earlier choice.
 .PP
 In the particular case where the regular expression does not use `|'
 and does not apply `*', `+', or `?' to parenthesized subexpressions,
 the net effect is that the longest possible
 match will be chosen.
 So `ab*', presented with `xabbbby', will match `abbbb'.
 Note that if `ab*' is tried against `xabyabbbz', it
 will match `ab' just after `x', due to the begins-earliest rule.
 (In effect, the decision on where to start the match is the first choice
 to be made, hence subsequent choices must respect it even if this leads them
 to less-preferred alternatives.)
 .SH SEE ALSO
 egrep(1), expr(1)
 .SH DIAGNOSTICS
 \fIRegcomp\fR returns NULL for a failure
 (\fIregerror\fR permitting),
 where failures are syntax errors, exceeding implementation limits,
 or applying `+' or `*' to a possibly-null operand.
 .SH HISTORY
 This is a revised version.
 Both code and manual page were
 originally written by Henry Spencer at University of Toronto.
 They are intended to be compatible with the Bell V8 \fIregexp\fR(3),
 but are not derived from Bell code.
 .SH BUGS
 Empty branches and empty regular expressions are not portable
 to other, otherwise-similar, implementations.
 .PP
 The ban on
 applying `*' or `+' to a possibly-null operand is an artifact of the
 simplistic implementation.
 .PP
 The match-choice rules are complex.
 A simple ``longest match'' rule would be preferable,
 but is harder to implement.
 .PP
 Although there is a general similarity to POSIX.2 ``extended'' regular
 expressions, neither the regular-expression syntax nor the programming
 interface is an exact match.
 .PP
 Due to emphasis on
 compactness and simplicity,
 it's not strikingly fast.
 It does give some attention to handling simple cases quickly.
--- a/scsh/regexp/regexp.c
+++ b/scsh/regexp/regexp.c
--- a/scsh/regexp/regexp.h
+++ b/scsh/regexp/regexp.h
@ -1,27 +0,0 @@
 /*
 * Definitions etc. for regexp(3) routines.
 *
 * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
 * not the System V one.
 */
 #define NSUBEXP  10
 typedef struct regexp {
 	const char *startp[NSUBEXP];
 	const char *endp[NSUBEXP];
 	char regstart;		/* Internal use only. */
 	char reganch;		/* Internal use only. */
 	int regmust;		/* Internal use only. */
 	int regmlen;		/* Internal use only. */
 	char program[1];	/* Unwarranted chumminess with compiler. */
 } regexp;
 extern regexp *regcomp(const char *re);
 extern int regexec(regexp *rp, const char *s);
 extern void regsub(const regexp *rp, const char *src, char *dst);
 extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
 extern size_t regsublen(const regexp *rp, const char *src);
 extern void regerror(char *message);
 extern size_t regcomp_len(const char *exp);
 extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
--- a/scsh/regexp/regmagic.h
+++ b/scsh/regexp/regmagic.h
@ -1,5 +0,0 @@
 /*
 * The first byte of the regexp internal "program" is actually this magic
 * number; the start node begins in the second byte.
 */
 #define	MAGIC	0234
--- a/scsh/regexp/regsub.c
+++ b/scsh/regexp/regsub.c
@ -1,131 +0,0 @@
 /*
 * regsub
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <regexp.h>
 #include "regmagic.h"
 /*
 - regsub - perform substitutions after a regexp match
 */
 void regsub(rp, source, dest)
 const regexp *rp;
 const char *source;
 char *dest;
 {
        regnsub(rp, source, dest, BUFSIZ);
 }
 /*
 - regnsub - perform bounds-checked substitutions after a regexp match
 */
 void
 regnsub(rp, source, dest, destlen)
 const regexp *rp;
 const char *source;
 char *dest;
 size_t destlen;
 {
 	register regexp * const prog = (regexp *)rp;
 	register const char *src = (char *)source;
 	register char *dst = dest;
 	char *dstend = dest + destlen;
 	char *odst;
 	register char c;
 	register int no;
 	register size_t len;
 	if (prog == NULL || source == NULL || dest == NULL) {
 		regerror("NULL parameter to regsub");
 		return;
 	}
 	if ((unsigned char)*(prog->program) != MAGIC) {
 		regerror("damaged regexp");
 		return;
 	}
 	while ((c = *src++) != '\0') {
 		if (c == '&')
 			no = 0;
 		else if (c == '\\' && isdigit(*src))
 			no = *src++ - '0';
 		else
 			no = -1;
 		if (no < 0) {	/* Ordinary character. */
 			if (c == '\\' && (*src == '\\' || *src == '&'))
 				c = *src++;
 			*dst++ = c;
 			if (dst >= dstend) 
 			{
 			    	regerror("output buffer too small");
 				return;
 			}
 		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
 			   prog->endp[no] > prog->startp[no]) {
 			len = prog->endp[no] - prog->startp[no];
 			odst = dst;
 			dst += len;
 			if (dst >= dstend) 
 			{
 			    	regerror("output buffer too small");
 				return;
 			}
 			(void) strncpy(odst, prog->startp[no], len);
 			if (*(dst-1) == '\0') {	/* strncpy hit NUL. */
 				regerror("damaged match string");
 				return;
 			}
 		}
 	}
 	*dst++ = '\0';
 }
 size_t regsublen(rp, source)
 const regexp *rp;
 const char *source;
 {
    register regexp * const prog = (regexp *)rp;
    register char *src = (char *)source;
    register char c;
    register int no;
    register int len = 0;
    if (prog == NULL || source == NULL) {
 	regerror("NULL parameter to regsublen");
 	return -1;
    }
    if ((unsigned char)*(prog->program) != MAGIC) {
 	regerror("damaged regexp");
 	return -1;
    }
    while ((c = *src++) != '\0') {
 	if (c == '&')
 	    no = 0;
 	else if (c == '\\' && isdigit(*src))
 	    no = *src++ - '0';
 	else
 	    no = -1;
 	if (no < 0) {		/* Ordinary character. */
 	    if (c == '\\' && (*src == '\\' || *src == '&'))
 		src++;
 	    len++;
 	} else {
 	    const char *s = prog->startp[no];
 	    const char *e = prog->endp[no];
 	    if ((s != NULL) && (e != NULL) && (e > s)) {
 		len += e-s;
 	    }
 	}
    }
    return len+1;
 }
--- a/scsh/regexp/tests
+++ b/scsh/regexp/tests
@ -1,127 +1,477 @@
-abc	abc	y	&	abc
+# regular expression test set
-abc	xbc	n	-	-
+# Lines are at least three fields, separated by one or more tabs.  "" stands
-abc	axc	n	-	-
+# for an empty field.  First field is an RE.  Second field is flags.  If
-abc	abx	n	-	-
+# C flag given, regcomp() is expected to fail, and the third field is the
-abc	xabcy	y	&	abc
+# error name (minus the leading REG_).
-abc	ababc	y	&	abc
+#
-ab*c	abc	y	&	abc
+# Otherwise it is expected to succeed, and the third field is the string to
-ab*bc	abc	y	&	abc
+# try matching it against.  If there is no fourth field, the match is
-ab*bc	abbc	y	&	abbc
+# expected to fail.  If there is a fourth field, it is the substring that
-ab*bc	abbbbc	y	&	abbbbc
+# the RE is expected to match.  If there is a fifth field, it is a comma-
-ab+bc	abbc	y	&	abbc
+# separated list of what the subexpressions should match, with - indicating
-ab+bc	abc	n	-	-
+# no match for that one.  In both the fourth and fifth fields, a (sub)field
-ab+bc	abq	n	-	-
+# starting with @ indicates that the (sub)expression is expected to match
-ab+bc	abbbbc	y	&	abbbbc
+# a null string followed by the stuff after the @; this provides a way to
-ab?bc	abbc	y	&	abbc
+# test where null strings match.  The character `N' in REs and strings
-ab?bc	abc	y	&	abc
+# is newline, `S' is space, `T' is tab, `Z' is NUL.
-ab?bc	abbbbc	n	-	-
+#
-ab?c	abc	y	&	abc
+# The full list of flags:
-^abc$	abc	y	&	abc
+#	-	placeholder, does nothing
-^abc$	abcc	n	-	-
+#	b	RE is a BRE, not an ERE
-^abc	abcc	y	&	abc
+#	&	try it as both an ERE and a BRE
-^abc$	aabc	n	-	-
+#	C	regcomp() error expected, third field is error name
-abc$	aabc	y	&	abc
+#	i	REG_ICASE
-^	abc	y	&	
+#	m	("mundane") REG_NOSPEC
-$	abc	y	&	
+#	s	REG_NOSUB (not really testable)
-a.c	abc	y	&	abc
+#	n	REG_NEWLINE
-a.c	axc	y	&	axc
+#	^	REG_NOTBOL
-a.*c	axyzc	y	&	axyzc
+#	$	REG_NOTEOL
-a.*c	axyzd	n	-	-
+#	#	REG_STARTEND (see below)
-a[bc]d	abc	n	-	-
+#	p	REG_PEND
-a[bc]d	abd	y	&	abd
+#
-a[b-d]e	abd	n	-	-
+# For REG_STARTEND, the start/end offsets are those of the substring
-a[b-d]e	ace	y	&	ace
+# enclosed in ().
-a[b-d]	aac	y	&	ac
+
-a[-b]	a-	y	&	a-
+# basics
-a[b-]	a-	y	&	a-
+a		&	a	a
-[k]	ab	n	-	-
+abc		&	abc	abc
-a[b-a]	-	c	-	-
+abc|de		-	abc	abc
-a[]b	-	c	-	-
+a|b|c		-	abc	a
-a[	-	c	-	-
+
-a]	a]	y	&	a]
+# parentheses and perversions thereof
-a[]]b	a]b	y	&	a]b
+a(b)c		-	abc	abc
-a[^bc]d	aed	y	&	aed
+a\(b\)c		b	abc	abc
-a[^bc]d	abd	n	-	-
+a(		C	EPAREN
-a[^-b]c	adc	y	&	adc
+a(		b	a(	a(
-a[^-b]c	a-c	n	-	-
+a\(		-	a(	a(
-a[^]b]c	a]c	n	-	-
+a\(		bC	EPAREN
-a[^]b]c	adc	y	&	adc
+a\(b		bC	EPAREN
-ab|cd	abc	y	&	ab
+a(b		C	EPAREN
-ab|cd	abcd	y	&	ab
+a(b		b	a(b	a(b
-()ef	def	y	&-\1	ef-
+# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
-()*	-	c	-	-
+a)		-	a)	a)
-*a	-	c	-	-
+)		-	)	)
-^*	-	c	-	-
+# end gagging (in a just world, those *should* give EPAREN)
-$*	-	c	-	-
+a)		b	a)	a)
-(*)b	-	c	-	-
+a\)		bC	EPAREN
-$b	b	n	-	-
+\)		bC	EPAREN
-a\	-	c	-	-
+a()b		-	ab	ab
-a\(b	a(b	y	&-\1	a(b-
+a\(\)b		b	ab	ab
-a\(*b	ab	y	&	ab
+
-a\(*b	a((b	y	&	a((b
+# anchoring and REG_NEWLINE
-a\\b	a\b	y	&	a\b
+^abc$		&	abc	abc
-abc)	-	c	-	-
+a^b		-	a^b
-(abc	-	c	-	-
+a^b		b	a^b	a^b
-((a))	abc	y	&-\1-\2	a-a-a
+a$b		-	a$b
-(a)b(c)	abc	y	&-\1-\2	abc-a-c
+a$b		b	a$b	a$b
-a+b+c	aabbabc	y	&	abc
+^		&	abc	@abc
-a**	-	c	-	-
+$		&	abc	@
-a*?	-	c	-	-
+^$		&	""	@
-(a*)*	-	c	-	-
+$^		-	""	@
-(a*)+	-	c	-	-
+\($\)\(^\)	b	""	@
-(a|)*	-	c	-	-
+# stop retching, those are legitimate (although disgusting)
-(a*|b)*	-	c	-	-
+^^		-	""	@
-(a+|b)*	ab	y	&-\1	ab-b
+$$		-	""	@
-(a+|b)+	ab	y	&-\1	ab-b
+b$		&	abNc
-(a+|b)?	ab	y	&-\1	a-a
+b$		&n	abNc	b
-[^ab]*	cde	y	&	cde
+^b$		&	aNbNc
-(^)*	-	c	-	-
+^b$		&n	aNbNc	b
-(ab|)*	-	c	-	-
+^$		&n	aNNb	@Nb
-)(	-	c	-	-
+^$		n	abc
-	abc	y	&	
+^$		n	abcN	@
-abc		n	-	-
+$^		n	aNNb	@Nb
-a*		y	&	
+\($\)\(^\)	bn	aNNb	@Nb
-abcd	abcd	y	&-\&-\\&	abcd-&-\abcd
+^^		n^	aNNb	@Nb
-a(bc)d	abcd	y	\1-\\1-\\\1	bc-\1-\bc
+$$		n	aNNb	@NN
-([abc])*d	abbbcd	y	&-\1	abbbcd-c
+^a		^	a
-([abc])*bcd	abcd	y	&-\1	abcd-a
+a$		$	a
-a|b|c|d|e	e	y	&	e
+^a		^n	aNb
-(a|b|c|d|e)f	ef	y	&-\1	ef-e
+^b		^n	aNb	b
-((a*|b))*	-	c	-	-
+a$		$n	bNa
-abcd*efg	abcdefg	y	&	abcdefg
+b$		$n	bNa	b
-ab*	xabyabbbz	y	&	ab
+a*(^b$)c*	-	b	b
-ab*	xayabbbz	y	&	a
+a*\(^b$\)c*	b	b	b
-(ab|cd)e	abcde	y	&-\1	cde-cd
+
-[abhgefdc]ij	hij	y	&	hij
+# certain syntax errors and non-errors
-^(ab|cd)e	abcde	n	x\1y	xy
+|		C	EMPTY
-(abc|)ef	abcdef	y	&-\1	ef-
+|		b	|	|
-(a|b)c*d	abcd	y	&-\1	bcd-b
+*		C	BADRPT
-(ab|ab*)bc	abc	y	&-\1	abc-a
+*		b	*	*
-a([bc]*)c*	abc	y	&-\1	abc-bc
+		C	BADRPT
-a([bc]*)(c*d)	abcd	y	&-\1-\2	abcd-bc-d
+?		C	BADRPT
-a([bc]+)(c*d)	abcd	y	&-\1-\2	abcd-bc-d
+""		&C	EMPTY
-a([bc]*)(c+d)	abcd	y	&-\1-\2	abcd-b-cd
+()		-	abc	@abc
-a[bcd]*dcdcde	adcdcde	y	&	adcdcde
+\(\)		b	abc	@abc
-a[bcd]+dcdcde	adcdcde	n	-	-
+a||b		C	EMPTY
-(ab|a)b*c	abc	y	&-\1	abc-ab
+|ab		C	EMPTY
-((a)(b)c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
+ab|		C	EMPTY
-[ -~]*	abc	y	&	abc
+(|a)b		C	EMPTY
-[ -~ -~]*	abc	y	&	abc
+(a|)b		C	EMPTY
-[ -~ -~ -~]*	abc	y	&	abc
+(*a)		C	BADRPT
-[ -~ -~ -~ -~]*	abc	y	&	abc
+(+a)		C	BADRPT
-[ -~ -~ -~ -~ -~]*	abc	y	&	abc
+(?a)		C	BADRPT
-[ -~ -~ -~ -~ -~ -~]*	abc	y	&	abc
+({1}a)		C	BADRPT
-[ -~ -~ -~ -~ -~ -~ -~]*	abc	y	&	abc
+\(\{1\}a\)	bC	BADRPT
-[a-zA-Z_][a-zA-Z0-9_]*	alpha	y	&	alpha
+(a|*b)		C	BADRPT
-^a(bc+|b[eh])g|.h$	abh	y	&-\1	bh-
+(a|+b)		C	BADRPT
-(bc+d$|ef*g.|h?i(j|k))	effgz	y	&-\1-\2	effgz-effgz-
+(a|?b)		C	BADRPT
-(bc+d$|ef*g.|h?i(j|k))	ij	y	&-\1-\2	ij-ij-j
+(a|{1}b)	C	BADRPT
-(bc+d$|ef*g.|h?i(j|k))	effg	n	-	-
+^*		C	BADRPT
-(bc+d$|ef*g.|h?i(j|k))	bcdd	n	-	-
+^*		b	*	*
-(bc+d$|ef*g.|h?i(j|k))	reffgz	y	&-\1-\2	effgz-effgz-
+^+		C	BADRPT
-((((((((((a))))))))))	-	c	-	-
+^?		C	BADRPT
-(((((((((a)))))))))	a	y	&	a
+^{1}		C	BADRPT
-multiple words of text	uh-uh	n	-	-
+^\{1\}		bC	BADRPT
-multiple words	multiple words, yeah	y	&	multiple words
+
-(.*)c(.*)	abcde	y	&-\1-\2	abcde-ab-de
+# metacharacters, backslashes
-\((.*), (.*)\)	(a, b)	y	(\2, \1)	(b, a)
+a.c		&	abc	abc
 a[bc]d		&	abd	abd
 a\*c		&	a*c	a*c
 a\\b		&	a\b	a\b
 a\\\*b		&	a\*b	a\*b
 a\bc		&	abc	abc
 a\		&C	EESCAPE
 a\\bc		&	a\bc	a\bc
 \{		bC	BADRPT
 a\[b		&	a[b	a[b
 a[b		&C	EBRACK
 # trailing $ is a peculiar special case for the BRE code
 a$		&	a	a
 a$		&	a$
 a\$		&	a
 a\$		&	a$	a$
 a\\$		&	a
 a\\$		&	a$
 a\\$		&	a\$
 a\\$		&	a\	a\
 # back references, ugh
 a\(b\)\2c	bC	ESUBREG
 a\(b\1\)c	bC	ESUBREG
 a\(b*\)c\1d	b	abbcbbd	abbcbbd	bb
 a\(b*\)c\1d	b	abbcbd
 a\(b*\)c\1d	b	abbcbbbd
 ^\(.\)\1	b	abc
 a\([bc]\)\1d	b	abcdabbd	abbd	b
 a\(\([bc]\)\2\)*d	b	abbccd	abbccd
 a\(\([bc]\)\2\)*d	b	abbcbd
 # actually, this next one probably ought to fail, but the spec is unclear
 a\(\(b\)*\2\)*d		b	abbbd	abbbd
 # here is a case that no NFA implementation does right
 \(ab*\)[ab]*\1	b	ababaaa	ababaaa	a
 # check out normal matching in the presence of back refs
 \(a\)\1bcd	b	aabcd	aabcd
 \(a\)\1bc*d	b	aabcd	aabcd
 \(a\)\1bc*d	b	aabd	aabd
 \(a\)\1bc*d	b	aabcccd	aabcccd
 \(a\)\1bc*[ce]d	b	aabcccd	aabcccd
 ^\(a\)\1b\(c\)*cd$	b	aabcccd	aabcccd
 # ordinary repetitions
 ab*c		&	abc	abc
 ab+c		-	abc	abc
 ab?c		-	abc	abc
 a\(*\)b		b	a*b	a*b
 a\(**\)b	b	ab	ab
 a\(***\)b	bC	BADRPT
 *a		b	*a	*a
 **a		b	a	a
 ***a		bC	BADRPT
 # the dreaded bounded repetitions
 {		&	{	{
 {abc		&	{abc	{abc
 {1		C	BADRPT
 {1}		C	BADRPT
 a{b		&	a{b	a{b
 a{1}b		-	ab	ab
 a\{1\}b		b	ab	ab
 a{1,}b		-	ab	ab
 a\{1,\}b	b	ab	ab
 a{1,2}b		-	aab	aab
 a\{1,2\}b	b	aab	aab
 a{1		C	EBRACE
 a\{1		bC	EBRACE
 a{1a		C	EBRACE
 a\{1a		bC	EBRACE
 a{1a}		C	BADBR
 a\{1a\}		bC	BADBR
 a{,2}		-	a{,2}	a{,2}
 a\{,2\}		bC	BADBR
 a{,}		-	a{,}	a{,}
 a\{,\}		bC	BADBR
 a{1,x}		C	BADBR
 a\{1,x\}	bC	BADBR
 a{1,x		C	EBRACE
 a\{1,x		bC	EBRACE
 a{300}		C	BADBR
 a\{300\}	bC	BADBR
 a{1,0}		C	BADBR
 a\{1,0\}	bC	BADBR
 ab{0,0}c	-	abcac	ac
 ab\{0,0\}c	b	abcac	ac
 ab{0,1}c	-	abcac	abc
 ab\{0,1\}c	b	abcac	abc
 ab{0,3}c	-	abbcac	abbc
 ab\{0,3\}c	b	abbcac	abbc
 ab{1,1}c	-	acabc	abc
 ab\{1,1\}c	b	acabc	abc
 ab{1,3}c	-	acabc	abc
 ab\{1,3\}c	b	acabc	abc
 ab{2,2}c	-	abcabbc	abbc
 ab\{2,2\}c	b	abcabbc	abbc
 ab{2,4}c	-	abcabbc	abbc
 ab\{2,4\}c	b	abcabbc	abbc
 ((a{1,10}){1,10}){1,10}	-	a	a	a,a
 # multiple repetitions
 a**		&C	BADRPT
 a++		C	BADRPT
 a??		C	BADRPT
 a*+		C	BADRPT
 a*?		C	BADRPT
 a+*		C	BADRPT
 a+?		C	BADRPT
 a?*		C	BADRPT
 a?+		C	BADRPT
 a{1}{1}		C	BADRPT
 a*{1}		C	BADRPT
 a+{1}		C	BADRPT
 a?{1}		C	BADRPT
 a{1}*		C	BADRPT
 a{1}+		C	BADRPT
 a{1}?		C	BADRPT
 a*{b}		-	a{b}	a{b}
 a\{1\}\{1\}	bC	BADRPT
 a*\{1\}		bC	BADRPT
 a\{1\}*		bC	BADRPT
 # brackets, and numerous perversions thereof
 a[b]c		&	abc	abc
 a[ab]c		&	abc	abc
 a[^ab]c		&	adc	adc
 a[]b]c		&	a]c	a]c
 a[[b]c		&	a[c	a[c
 a[-b]c		&	a-c	a-c
 a[^]b]c		&	adc	adc
 a[^-b]c		&	adc	adc
 a[b-]c		&	a-c	a-c
 a[b		&C	EBRACK
 a[]		&C	EBRACK
 a[1-3]c		&	a2c	a2c
 a[3-1]c		&C	ERANGE
 a[1-3-5]c	&C	ERANGE
 a[[.-.]--]c	&	a-c	a-c
 a[1-		&C	ERANGE
 a[[.		&C	EBRACK
 a[[.x		&C	EBRACK
 a[[.x.		&C	EBRACK
 a[[.x.]		&C	EBRACK
 a[[.x.]]	&	ax	ax
 a[[.x,.]]	&C	ECOLLATE
 a[[.one.]]b	&	a1b	a1b
 a[[.notdef.]]b	&C	ECOLLATE
 a[[.].]]b	&	a]b	a]b
 a[[:alpha:]]c	&	abc	abc
 a[[:notdef:]]c	&C	ECTYPE
 a[[:		&C	EBRACK
 a[[:alpha	&C	EBRACK
 a[[:alpha:]	&C	EBRACK
 a[[:alpha,:]	&C	ECTYPE
 a[[:]:]]b	&C	ECTYPE
 a[[:-:]]b	&C	ECTYPE
 a[[:alph:]]	&C	ECTYPE
 a[[:alphabet:]]	&C	ECTYPE
 [[:alnum:]]+	-	-%@a0X-	a0X
 [[:alpha:]]+	-	-%@aX0-	aX
 [[:blank:]]+	-	aSSTb	SST
 [[:cntrl:]]+	-	aNTb	NT
 [[:digit:]]+	-	a019b	019
 [[:graph:]]+	-	Sa%bS	a%b
 [[:lower:]]+	-	AabC	ab
 [[:print:]]+	-	NaSbN	aSb
 [[:punct:]]+	-	S%-&T	%-&
 [[:space:]]+	-	aSNTb	SNT
 [[:upper:]]+	-	aBCd	BC
 [[:xdigit:]]+	-	p0f3Cq	0f3C
 a[[=b=]]c	&	abc	abc
 a[[=		&C	EBRACK
 a[[=b		&C	EBRACK
 a[[=b=		&C	EBRACK
 a[[=b=]		&C	EBRACK
 a[[=b,=]]	&C	ECOLLATE
 a[[=one=]]b	&	a1b	a1b
 # complexities
 a(((b)))c	-	abc	abc
 a(b|(c))d	-	abd	abd
 a(b*|c)d	-	abbd	abbd
 # just gotta have one DFA-buster, of course
 a[ab]{20}	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
 # and an inline expansion in case somebody gets tricky
 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
 # and in case somebody just slips in an NFA...
 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)	-	aaaaabaaaabaaaabaaaabweeknights	aaaaabaaaabaaaabaaaabweeknights
 # fish for anomalies as the number of states passes 32
 12345678901234567890123456789	-	a12345678901234567890123456789b	12345678901234567890123456789
 123456789012345678901234567890	-	a123456789012345678901234567890b	123456789012345678901234567890
 1234567890123456789012345678901	-	a1234567890123456789012345678901b	1234567890123456789012345678901
 12345678901234567890123456789012	-	a12345678901234567890123456789012b	12345678901234567890123456789012
 123456789012345678901234567890123	-	a123456789012345678901234567890123b	123456789012345678901234567890123
 # and one really big one, beyond any plausible word width
 1234567890123456789012345678901234567890123456789012345678901234567890	-	a1234567890123456789012345678901234567890123456789012345678901234567890b	1234567890123456789012345678901234567890123456789012345678901234567890
 # fish for problems as brackets go past 8
 [ab][cd][ef][gh][ij][kl][mn]	-	xacegikmoq	acegikm
 [ab][cd][ef][gh][ij][kl][mn][op]	-	xacegikmoq	acegikmo
 [ab][cd][ef][gh][ij][kl][mn][op][qr]	-	xacegikmoqy	acegikmoq
 [ab][cd][ef][gh][ij][kl][mn][op][q]	-	xacegikmoqy	acegikmoq
 # subtleties of matching
 abc		&	xabcy	abc
 a\(b\)?c\1d	b	acd
 aBc		i	Abc	Abc
 a[Bc]*d		i	abBCcd	abBCcd
 0[[:upper:]]1	&i	0a1	0a1
 0[[:lower:]]1	&i	0A1	0A1
 a[^b]c		&i	abc
 a[^b]c		&i	aBc
 a[^b]c		&i	adc	adc
 [a]b[c]		-	abc	abc
 [a]b[a]		-	aba	aba
 [abc]b[abc]	-	abc	abc
 [abc]b[abd]	-	abd	abd
 a(b?c)+d	-	accd	accd
 (wee|week)(knights|night)	-	weeknights	weeknights
 (we|wee|week|frob)(knights|night|day)	-	weeknights	weeknights
 a[bc]d		-	xyzaaabcaababdacd	abd
 a[ab]c		-	aaabc	abc
 abc		s	abc	abc
 a*		&	b	@b
 # Let's have some fun -- try to match a C comment.
 # first the obvious, which looks okay at first glance...
 /\*.*\*/	-	/*x*/	/*x*/
 # but...
 /\*.*\*/	-	/*x*/y/*z*/	/*x*/y/*z*/
 # okay, we must not match */ inside; try to do that...
 /\*([^*]|\*[^/])*\*/	-	/*x*/	/*x*/
 /\*([^*]|\*[^/])*\*/	-	/*x*/y/*z*/	/*x*/
 # but...
 /\*([^*]|\*[^/])*\*/	-	/*x**/y/*z*/	/*x**/y/*z*/
 # and a still fancier version, which does it right (I think)...
 /\*([^*]|\*+[^*/])*\*+/	-	/*x*/	/*x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x*/y/*z*/	/*x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x**/y/*z*/	/*x**/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x****/y/*z*/	/*x****/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x**x*/y/*z*/	/*x**x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x***x/y/*z*/	/*x***x/y/*z*/
 # subexpressions
 .*		-	abc	abc	-
 a(b)(c)d	-	abcd	abcd	b,c
 a(((b)))c	-	abc	abc	b,b,b
 a(b|(c))d	-	abd	abd	b,-
 a(b*|c|e)d	-	abbd	abbd	bb
 a(b*|c|e)d	-	acd	acd	c
 a(b*|c|e)d	-	ad	ad	@d
 a(b?)c		-	abc	abc	b
 a(b?)c		-	ac	ac	@c
 a(b+)c		-	abc	abc	b
 a(b+)c		-	abbbc	abbbc	bbb
 a(b*)c		-	ac	ac	@c
 (a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
 # the regression tester only asks for 9 subexpressions
 a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
 a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l	-	abcdefghijkl	abcdefghijkl	b,c,d,e,f,g,h,i,j,k
 a([bc]?)c	-	abc	abc	b
 a([bc]?)c	-	ac	ac	@c
 a([bc]+)c	-	abc	abc	b
 a([bc]+)c	-	abcc	abcc	bc
 a([bc]+)bc	-	abcbc	abcbc	bc
 a(bb+|b)b	-	abb	abb	b
 a(bbb+|bb+|b)b	-	abb	abb	b
 a(bbb+|bb+|b)b	-	abbb	abbb	bb
 a(bbb+|bb+|b)bb	-	abbb	abbb	b
 (.*).*		-	abcdef	abcdef	abcdef
 (a*)*		-	bc	@b	@b
 # do we get the right subexpression when it is used more than once?
 a(b|c)*d	-	ad	ad	-
 a(b|c)*d	-	abcd	abcd	c
 a(b|c)+d	-	abd	abd	b
 a(b|c)+d	-	abcd	abcd	c
 a(b|c?)+d	-	ad	ad	@d
 a(b|c?)+d	-	abcd	abcd	@d
 a(b|c){0,0}d	-	ad	ad	-
 a(b|c){0,1}d	-	ad	ad	-
 a(b|c){0,1}d	-	abd	abd	b
 a(b|c){0,2}d	-	ad	ad	-
 a(b|c){0,2}d	-	abcd	abcd	c
 a(b|c){0,}d	-	ad	ad	-
 a(b|c){0,}d	-	abcd	abcd	c
 a(b|c){1,1}d	-	abd	abd	b
 a(b|c){1,1}d	-	acd	acd	c
 a(b|c){1,2}d	-	abd	abd	b
 a(b|c){1,2}d	-	abcd	abcd	c
 a(b|c){1,}d	-	abd	abd	b
 a(b|c){1,}d	-	abcd	abcd	c
 a(b|c){2,2}d	-	acbd	acbd	b
 a(b|c){2,2}d	-	abcd	abcd	c
 a(b|c){2,4}d	-	abcd	abcd	c
 a(b|c){2,4}d	-	abcbd	abcbd	b
 a(b|c){2,4}d	-	abcbcd	abcbcd	c
 a(b|c){2,}d	-	abcd	abcd	c
 a(b|c){2,}d	-	abcbd	abcbd	b
 a(b+|((c)*))+d	-	abd	abd	@d,@d,-
 a(b+|((c)*))+d	-	abcd	abcd	@d,@d,-
 # check out the STARTEND option
 [abc]		&#	a(b)c	b
 [abc]		&#	a(d)c
 [abc]		&#	a(bc)d	b
 [abc]		&#	a(dc)d	c
 .		&#	a()c
 b.*c		&#	b(bc)c	bc
 b.*		&#	b(bc)c	bc
 .*c		&#	b(bc)c	bc
 # plain strings, with the NOSPEC flag
 abc		m	abc	abc
 abc		m	xabcy	abc
 abc		m	xyz
 a*b		m	aba*b	a*b
 a*b		m	ab
 ""		mC	EMPTY
 # cases involving NULs
 aZb		&	a	a
 aZb		&p	a
 aZb		&p#	(aZb)	aZb
 aZ*b		&p#	(ab)	ab
 a.b		&#	(aZb)	aZb
 a.*		&#	(aZb)c	aZb
 # word boundaries (ick)
 [[:<:]]a	&	a	a
 [[:<:]]a	&	ba
 [[:<:]]a	&	-a	a
 a[[:>:]]	&	a	a
 a[[:>:]]	&	ab
 a[[:>:]]	&	a-	a
 [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc	abc
 [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc-q	abc
 [[:<:]]a.c[[:>:]]	&	axc-dayc-dazce-abc	axc
 [[:<:]]b.c[[:>:]]	&	a_bxc-byc_d-bzc-q	bzc
 [[:<:]].x..[[:>:]]	&	y_xa_-_xb_y-_xc_-axdc	_xc_
 [[:<:]]a_b[[:>:]]	&	x_a_b
 # past problems, and suspected problems
 (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])	-	A1	A1
 abcdefghijklmnop	i	abcdefghijklmnop	abcdefghijklmnop
 abcdefghijklmnopqrstuv	i	abcdefghijklmnopqrstuv	abcdefghijklmnopqrstuv
 (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])	-	CC11	CC11
 CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a	-	CC11	CC11
 Char \([a-z0-9_]*\)\[.*	b	Char xyz[k	Char xyz[k	xyz
 a?b	-	ab	ab
 -\{0,1\}[0-9]*$	b	-5	-5
 a*a*a*a*a*a*a*	&	aaaaaa	aaaaaa
--- a/scsh/regexp/timer.c
+++ b/scsh/regexp/timer.c
@ -1,164 +0,0 @@
 /*
 * Simple timing program for regcomp().
 * Usage: timer ncomp nexec nsub
 *	or
 *	timer ncomp nexec nsub regexp string [ answer [ sub ] ]
 *
 * The second form is for timing repetitions of a single test case.
 * The first form's test data is a compiled-in copy of the "tests" file.
 * Ncomp, nexec, nsub are how many times to do each regcomp, regexec,
 * and regsub.  The way to time an operation individually is to do something
 * like "timer 1 50 1".
 */
 #include <stdio.h>
 struct try {
 	char *re, *str, *ans, *src, *dst;
 } tests[] = {
 #include "timer.t.h"
 { NULL, NULL, NULL, NULL, NULL }
 };
 #include <regexp.h>
 int errreport = 0;		/* Report errors via errseen? */
 char *errseen = NULL;		/* Error message. */
 char *progname;
 /* ARGSUSED */
 main(argc, argv)
 int argc;
 char *argv[];
 {
 	int ncomp, nexec, nsub;
 	struct try one;
 	char dummy[512];
 	if (argc < 4) {
 		ncomp = 1;
 		nexec = 1;
 		nsub = 1;
 	} else {
 		ncomp = atoi(argv[1]);
 		nexec = atoi(argv[2]);
 		nsub = atoi(argv[3]);
 	}
 	progname = argv[0];
 	if (argc > 5) {
 		one.re = argv[4];
 		one.str = argv[5];
 		if (argc > 6)
 			one.ans = argv[6];
 		else
 			one.ans = "y";
 		if (argc > 7) {	
 			one.src = argv[7];
 			one.dst = "xxx";
 		} else {
 			one.src = "x";
 			one.dst = "x";
 		}
 		errreport = 1;
 		try(one, ncomp, nexec, nsub);
 	} else
 		multiple(ncomp, nexec, nsub);
 	exit(0);
 }
 void
 regerror(s)
 char *s;
 {
 	if (errreport)
 		errseen = s;
 	else
 		error(s, "");
 }
 #ifndef ERRAVAIL
 error(s1, s2)
 char *s1;
 char *s2;
 {
 	fprintf(stderr, "regexp: ");
 	fprintf(stderr, s1, s2);
 	fprintf(stderr, "\n");
 	exit(1);
 }
 #endif
 int lineno = 0;
 multiple(ncomp, nexec, nsub)
 int ncomp, nexec, nsub;
 {
 	register int i;
 	extern char *strchr();
 	errreport = 1;
 	for (i = 0; tests[i].re != NULL; i++) {
 		lineno++;
 		try(tests[i], ncomp, nexec, nsub);
 	}
 }
 try(fields, ncomp, nexec, nsub)
 struct try fields;
 int ncomp, nexec, nsub;
 {
 	regexp *r;
 	char dbuf[BUFSIZ];
 	register int i;
 	errseen = NULL;
 	r = regcomp(fields.re);
 	if (r == NULL) {
 		if (*fields.ans != 'c')
 			complain("regcomp failure in `%s'", fields.re);
 		return;
 	}
 	if (*fields.ans == 'c') {
 		complain("unexpected regcomp success in `%s'", fields.re);
 		free((char *)r);
 		return;
 	}
 	for (i = ncomp-1; i > 0; i--) {
 		free((char *)r);
 		r = regcomp(fields.re);
 	}
 	if (!regexec(r, fields.str)) {
 		if (*fields.ans != 'n')
 			complain("regexec failure in `%s'", "");
 		free((char *)r);
 		return;
 	}
 	if (*fields.ans == 'n') {
 		complain("unexpected regexec success", "");
 		free((char *)r);
 		return;
 	}
 	for (i = nexec-1; i > 0; i--)
 		(void) regexec(r, fields.str);
 	errseen = NULL;
 	for (i = nsub; i > 0; i--)
 		regsub(r, fields.src, dbuf);
 	if (errseen != NULL) {	
 		complain("regsub complaint", "");
 		free((char *)r);
 		return;
 	}
 	if (strcmp(dbuf, fields.dst) != 0)
 		complain("regsub result `%s' wrong", dbuf);
 	free((char *)r);
 }
 complain(s1, s2)
 char *s1;
 char *s2;
 {
 	fprintf(stderr, "try: %d: ", lineno);
 	fprintf(stderr, s1, s2);
 	fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
 }
--- a/scsh/regexp/try.c
+++ b/scsh/regexp/try.c
@ -1,220 +0,0 @@
 /*
 * Simple test program for regexp(3) stuff.  Knows about debugging hooks.
 * Usage: try re [string [output [-]]]
 * The re is compiled and dumped, regexeced against the string, the result
 * is applied to output using regsub().  The - triggers a running narrative
 * from regexec().  Dumping and narrative don't happen unless DEBUG.
 *
 * If there are no arguments, stdin is assumed to be a stream of lines with
 * five fields:  a r.e., a string to match it against, a result code, a
 * source string for regsub, and the proper result.  Result codes are 'c'
 * for compile failure, 'y' for match success, 'n' for match failure.
 * Field separator is tab.
 */
 #include <stdio.h>
 #include <regexp.h>
 #ifdef ERRAVAIL
 char *progname;
 extern char *mkprogname();
 #endif
 #ifdef DEBUG
 extern int regnarrate;
 #endif
 char buf[BUFSIZ];
 int errreport = 0;		/* Report errors via errseen? */
 char *errseen = NULL;		/* Error message. */
 int status = 0;			/* Exit status. */
 /* ARGSUSED */
 main(argc, argv)
 int argc;
 char *argv[];
 {
 	regexp *r;
 	int i;
 #ifdef ERRAVAIL
 	progname = mkprogname(argv[0]);
 #endif
 	if (argc == 1) {
 		multiple();
 		exit(status);
 	}
 	r = regcomp(argv[1]);
 	if (r == NULL)
 		error("regcomp failure", "");
 #ifdef DEBUG
 	regdump(r);
 	if (argc > 4)
 		regnarrate++;
 #endif
 	if (argc > 2) {
 		i = regexec(r, argv[2]);
 		printf("%d", i);
 		for (i = 1; i < NSUBEXP; i++)
 			if (r->startp[i] != NULL && r->endp[i] != NULL)
 				printf(" \\%d", i);
 		printf("\n");
 	}
 	if (argc > 3) {
 		regsub(r, argv[3], buf);
 		printf("%s\n", buf);
 	}
 	exit(status);
 }
 void
 regerror(s)
 char *s;
 {
 	if (errreport)
 		errseen = s;
 	else
 		error(s, "");
 }
 #ifndef ERRAVAIL
 error(s1, s2)
 char *s1;
 char *s2;
 {
 	fprintf(stderr, "regexp: ");
 	fprintf(stderr, s1, s2);
 	fprintf(stderr, "\n");
 	exit(1);
 }
 #endif
 int lineno;
 regexp badregexp;		/* Implicit init to 0. */
 multiple()
 {
 	char rbuf[BUFSIZ];
 	char *field[5];
 	char *scan;
 	int i;
 	regexp *r;
 	extern char *strchr();
 	errreport = 1;
 	lineno = 0;
 	while (fgets(rbuf, sizeof(rbuf), stdin) != NULL) {
 		rbuf[strlen(rbuf)-1] = '\0';	/* Dispense with \n. */
 		lineno++;
 		scan = rbuf;
 		for (i = 0; i < 5; i++) {
 			field[i] = scan;
 			if (field[i] == NULL) {
 				complain("bad testfile format", "");
 				exit(1);
 			}
 			scan = strchr(scan, '\t');
 			if (scan != NULL)
 				*scan++ = '\0';
 		}
 		try(field);
 	}
 	/* And finish up with some internal testing... */
 	lineno = 9990;
 	errseen = NULL;
 	if (regcomp((char *)NULL) != NULL || errseen == NULL)
 		complain("regcomp(NULL) doesn't complain", "");
 	lineno = 9991;
 	errseen = NULL;
 	if (regexec((regexp *)NULL, "foo") || errseen == NULL)
 		complain("regexec(NULL, ...) doesn't complain", "");
 	lineno = 9992;
 	r = regcomp("foo");
 	if (r == NULL) {
 		complain("regcomp(\"foo\") fails", "");
 		return;
 	}
 	lineno = 9993;
 	errseen = NULL;
 	if (regexec(r, (char *)NULL) || errseen == NULL)
 		complain("regexec(..., NULL) doesn't complain", "");
 	lineno = 9994;
 	errseen = NULL;
 	regsub((regexp *)NULL, "foo", rbuf);
 	if (errseen == NULL)
 		complain("regsub(NULL, ..., ...) doesn't complain", "");
 	lineno = 9995;
 	errseen = NULL;
 	regsub(r, (char *)NULL, rbuf);
 	if (errseen == NULL)
 		complain("regsub(..., NULL, ...) doesn't complain", "");
 	lineno = 9996;
 	errseen = NULL;
 	regsub(r, "foo", (char *)NULL);
 	if (errseen == NULL)
 		complain("regsub(..., ..., NULL) doesn't complain", "");
 	lineno = 9997;
 	errseen = NULL;
 	if (regexec(&badregexp, "foo") || errseen == NULL)
 		complain("regexec(nonsense, ...) doesn't complain", "");
 	lineno = 9998;
 	errseen = NULL;
 	regsub(&badregexp, "foo", rbuf);
 	if (errseen == NULL)
 		complain("regsub(nonsense, ..., ...) doesn't complain", "");
 }
 try(fields)
 char **fields;
 {
 	regexp *r;
 	char dbuf[BUFSIZ];
 	errseen = NULL;
 	r = regcomp(fields[0]);
 	if (r == NULL) {
 		if (*fields[2] != 'c')
 			complain("regcomp failure in `%s'", fields[0]);
 		return;
 	}
 	if (*fields[2] == 'c') {
 		complain("unexpected regcomp success in `%s'", fields[0]);
 		free((char *)r);
 		return;
 	}
 	if (!regexec(r, fields[1])) {
 		if (*fields[2] != 'n')
 			complain("regexec failure in `%s'", fields[0]);
 		free((char *)r);
 		return;
 	}
 	if (*fields[2] == 'n') {
 		complain("unexpected regexec success", "");
 		free((char *)r);
 		return;
 	}
 	errseen = NULL;
 	regsub(r, fields[3], dbuf);
 	if (errseen != NULL) {
 		complain("regsub complaint", "");
 		free((char *)r);
 		return;
 	}
 	if (strcmp(dbuf, fields[4]) != 0)
 		complain("regsub result `%s' wrong", dbuf);
 	free((char *)r);
 }
 complain(s1, s2)
 char *s1;
 char *s2;
 {
 	fprintf(stderr, "try: %d: ", lineno);
 	fprintf(stderr, s1, s2);
 	fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
 	status = 1;
 }