804 lines
21 KiB
Plaintext
804 lines
21 KiB
Plaintext
|
Date: Mon, 1 Jul 1996 23:22:47 GMT
|
||
|
From: Bill Sommerfeld <sommerfeld@orchard.medford.ma.us>
|
||
|
To: shivers@lcs.mit.edu, bdc@ai.mit.edu
|
||
|
Subject: scsh patch for precompiled regexps..
|
||
|
|
||
|
I meant to send this out months ago but I was just too hosed with work.
|
||
|
|
||
|
Here's what I have right now:
|
||
|
|
||
|
There are three pieces here:
|
||
|
diffs to the "core" scsh
|
||
|
diffs to Henry Spencer's latest regexp library
|
||
|
a copy of Henry Spencer's latest regexp library..
|
||
|
|
||
|
It appears to work (it passes the same regression tests as the C library..).
|
||
|
|
||
|
Let me know if I didn't include something needed for this to work..
|
||
|
|
||
|
- Bill
|
||
|
|
||
|
diff -rc scsh-0.4.2/scsh/re.scm scsh-0.4.2-regexp/scsh/re.scm
|
||
|
*** scsh-0.4.2/scsh/re.scm Fri Oct 27 04:58:56 1995
|
||
|
--- scsh-0.4.2-regexp/scsh/re.scm Sat Apr 6 21:07:41 1996
|
||
|
***************
|
||
|
*** 34,49 ****
|
||
|
|
||
|
;;; Bogus stub definitions for low-level match routines:
|
||
|
|
||
|
! (define regexp? string?)
|
||
|
! (define (make-regexp str) str)
|
||
|
|
||
|
! (define (regexp-exec regexp str . maybe-start)
|
||
|
(let ((start (optional-arg maybe-start 0))
|
||
|
(start-vec (make-vector 10))
|
||
|
(end-vec (make-vector 10)))
|
||
|
! (and (%regexp-match regexp str start start-vec end-vec)
|
||
|
! (make-regexp-match str start-vec end-vec))))
|
||
|
!
|
||
|
|
||
|
;;; Convert a string into a regex pattern that matches that string exactly --
|
||
|
;;; in other words, quote the special chars with backslashes.
|
||
|
--- 34,53 ----
|
||
|
|
||
|
;;; Bogus stub definitions for low-level match routines:
|
||
|
|
||
|
! (define-record iregexp
|
||
|
! string)
|
||
|
|
||
|
! (define regexp? iregexp?)
|
||
|
!
|
||
|
! (define (make-regexp str)
|
||
|
! (make-iregexp (compile-regexp str)))
|
||
|
!
|
||
|
! (define (regexp-exec r s . maybe-start)
|
||
|
(let ((start (optional-arg maybe-start 0))
|
||
|
(start-vec (make-vector 10))
|
||
|
(end-vec (make-vector 10)))
|
||
|
! (and (%regexp-exec-1 (iregexp:string r) s start start-vec end-vec)
|
||
|
! (make-regexp-match s start-vec end-vec))))
|
||
|
|
||
|
;;; Convert a string into a regex pattern that matches that string exactly --
|
||
|
;;; in other words, quote the special chars with backslashes.
|
||
|
***************
|
||
|
*** 58,75 ****
|
||
|
(cons #\\ result)
|
||
|
result))))))
|
||
|
|
||
|
! (define-foreign %regexp-match/errno (reg_match (string regexp)
|
||
|
! (string s)
|
||
|
! (integer start)
|
||
|
! (vector-desc start-vec)
|
||
|
! (vector-desc end-vec))
|
||
|
! static-string ; Error string or #f if all is ok.
|
||
|
! bool) ; match?
|
||
|
!
|
||
|
! (define (%regexp-match regexp string start start-vec end-vec)
|
||
|
! (receive (err match?) (%regexp-match/errno regexp string start
|
||
|
! start-vec end-vec)
|
||
|
! (if err (error err %regexp-match regexp string start) match?)))
|
||
|
|
||
|
|
||
|
;;; I do this one in C, I'm not sure why:
|
||
|
--- 62,79 ----
|
||
|
(cons #\\ result)
|
||
|
result))))))
|
||
|
|
||
|
! ;;;(define-foreign %regexp-match/errno (reg_match (string regexp)
|
||
|
! ;;; (string s)
|
||
|
! ;;; (integer start)
|
||
|
! ;;; (vector-desc start-vec)
|
||
|
! ;;; (vector-desc end-vec))
|
||
|
! ;;; static-string ; Error string or #f if all is ok.
|
||
|
! ;;; bool) ; match?
|
||
|
!
|
||
|
! ;;;(define (%regexp-match regexp string start start-vec end-vec)
|
||
|
! ;;; (receive (err match?) (%regexp-match/errno regexp string start
|
||
|
! ;;; start-vec end-vec)
|
||
|
! ;;; (if err (error err %regexp-match regexp string start) match?)))
|
||
|
|
||
|
|
||
|
;;; I do this one in C, I'm not sure why:
|
||
|
***************
|
||
|
*** 79,81 ****
|
||
|
--- 83,166 ----
|
||
|
(filter_stringvec (string regexp) ((C "char const ** ~a") cvec))
|
||
|
static-string ; error message -- #f if no error.
|
||
|
integer) ; number of files that pass the filter.
|
||
|
+
|
||
|
+ ;;; precompiled regexps.
|
||
|
+
|
||
|
+ (define-foreign %regexp-compiled-length (reg_comp_len (string regexp))
|
||
|
+ static-string
|
||
|
+ integer)
|
||
|
+
|
||
|
+ (define-foreign %regexp-compile (reg_comp_comp (string regexp)
|
||
|
+ (string-desc re-buf))
|
||
|
+ static-string)
|
||
|
+
|
||
|
+ (define (%regexp-exec-1 r s start sv ev)
|
||
|
+ (receive (err match?) (%regexp-exec r s start sv ev)
|
||
|
+ (if err (error err s start)
|
||
|
+ match?)))
|
||
|
+
|
||
|
+ (define-foreign %regexp-exec (reg_exec (string-desc regexp)
|
||
|
+ (string s)
|
||
|
+ (integer start)
|
||
|
+ (vector-desc start-vec)
|
||
|
+ (vector-desc end-vec))
|
||
|
+ static-string
|
||
|
+ bool)
|
||
|
+
|
||
|
+
|
||
|
+ (define (compile-regexp e)
|
||
|
+ (receive (err len)
|
||
|
+ (%regexp-compiled-length e)
|
||
|
+ (if err (error err e)
|
||
|
+ (let ((buf (make-string len)))
|
||
|
+ (%regexp-compile e buf)
|
||
|
+ buf))))
|
||
|
+
|
||
|
+
|
||
|
+
|
||
|
+ (define-foreign %regexp-subst (reg_subst (string-desc regexp)
|
||
|
+ (string m)
|
||
|
+ (string s)
|
||
|
+ (integer start)
|
||
|
+ (vector-desc start-vec)
|
||
|
+ (vector-desc end-vec)
|
||
|
+ (string-desc outbuf))
|
||
|
+ static-string
|
||
|
+ integer)
|
||
|
+
|
||
|
+ (define-foreign %regexp-subst-len (reg_subst_len (string-desc regexp)
|
||
|
+ (string m)
|
||
|
+ (string s)
|
||
|
+ (integer start)
|
||
|
+ (vector-desc start-vec)
|
||
|
+ (vector-desc end-vec))
|
||
|
+ static-string
|
||
|
+ integer)
|
||
|
+
|
||
|
+
|
||
|
+ (define (regexp-subst re match replacement)
|
||
|
+ (let ((cr (iregexp:string re))
|
||
|
+ (matchstr (regexp-match:string match))
|
||
|
+ (startvec (regexp-match:start match))
|
||
|
+ (endvec (regexp-match:end match)))
|
||
|
+ (receive (err outlen)
|
||
|
+ (%regexp-subst-len cr
|
||
|
+ matchstr
|
||
|
+ replacement
|
||
|
+ 0
|
||
|
+ startvec
|
||
|
+ endvec)
|
||
|
+ (if err (error err matchstr replacement)
|
||
|
+ (let ((outbuf (make-string outlen)))
|
||
|
+ (receive (err outlen)
|
||
|
+ (%regexp-subst cr
|
||
|
+ matchstr
|
||
|
+ replacement
|
||
|
+ 0
|
||
|
+ startvec
|
||
|
+ endvec
|
||
|
+ outbuf)
|
||
|
+ (if err (error err matchstr replacement)
|
||
|
+ (substring outbuf 0 outlen))))))))
|
||
|
+
|
||
|
+
|
||
|
\ No newline at end of file
|
||
|
diff -rc scsh-0.4.2/scsh/re1.c scsh-0.4.2-regexp/scsh/re1.c
|
||
|
*** scsh-0.4.2/scsh/re1.c Fri Oct 27 04:58:58 1995
|
||
|
--- scsh-0.4.2-regexp/scsh/re1.c Sat Apr 6 21:01:15 1996
|
||
|
***************
|
||
|
*** 19,24 ****
|
||
|
--- 19,150 ----
|
||
|
/* Stash error msg in global. */
|
||
|
void regerror(char *msg) {regexp_error = msg;}
|
||
|
|
||
|
+ /*
|
||
|
+ ** Return NULL normally, error string on error.
|
||
|
+ ** Stash number of bytes needed for compiled regexp into `*len'
|
||
|
+ */
|
||
|
+
|
||
|
+ char *reg_comp_len(const char *re, int *len)
|
||
|
+ {
|
||
|
+ int l;
|
||
|
+
|
||
|
+ regexp_error = NULL;
|
||
|
+ *len = regcomp_len(re);
|
||
|
+ return regexp_error;
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ ** Return NULL normally, error string on error.
|
||
|
+ ** Compile regexp into string described by `cr'.
|
||
|
+ */
|
||
|
+
|
||
|
+ char *reg_comp_comp(const char *re, scheme_value cr)
|
||
|
+ {
|
||
|
+ int len = STRING_LENGTH(cr);
|
||
|
+ regexp *r = (regexp *)&STRING_REF(cr, 0);
|
||
|
+
|
||
|
+ regexp_error = NULL;
|
||
|
+ r = regcomp_comp(re, r, len);
|
||
|
+ return regexp_error;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Return NULL normally, error string on error.
|
||
|
+ ** Stash match info in start_vec and end_vec.
|
||
|
+ ** Returns boolean match/no-match in hit.
|
||
|
+ */
|
||
|
+
|
||
|
+ char *reg_exec(scheme_value cr, const char *string, int start,
|
||
|
+ scheme_value start_vec, scheme_value end_vec, int *hit)
|
||
|
+ {
|
||
|
+ regexp *r = (regexp *)&STRING_REF(cr, 0);
|
||
|
+
|
||
|
+ if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
|
||
|
+ return "Illegal start vector";
|
||
|
+ }
|
||
|
+
|
||
|
+ if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
|
||
|
+ return "Illegal end vector";
|
||
|
+ }
|
||
|
+
|
||
|
+ regexp_error = 0;
|
||
|
+ *hit = 0;
|
||
|
+
|
||
|
+ if( regexec(r, string+start) ) {
|
||
|
+ int i;
|
||
|
+ for(i=0; i<NSUBEXP; i++) {
|
||
|
+ const char *s = r->startp[i];
|
||
|
+ const char *e = r->endp[i];
|
||
|
+ VECTOR_REF(start_vec,i) = s?ENTER_FIXNUM(s - string):SCHFALSE;
|
||
|
+ VECTOR_REF(end_vec,i) = e?ENTER_FIXNUM(e - string):SCHFALSE;
|
||
|
+ r->startp[i] = NULL;
|
||
|
+ r->endp[i] = NULL;
|
||
|
+ }
|
||
|
+ *hit = 1;
|
||
|
+ }
|
||
|
+ return regexp_error;
|
||
|
+ }
|
||
|
+
|
||
|
+ char *reg_subst(scheme_value cr, const char *match,
|
||
|
+ const char *src, int start,
|
||
|
+ scheme_value start_vec, scheme_value end_vec,
|
||
|
+ scheme_value outbuf, int *len)
|
||
|
+ {
|
||
|
+ int i;
|
||
|
+ regexp *r = (regexp *)&STRING_REF(cr, 0);
|
||
|
+
|
||
|
+ if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
|
||
|
+ return "Illegal start vector";
|
||
|
+ }
|
||
|
+
|
||
|
+ if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
|
||
|
+ return "Illegal end vector";
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i=0; i<NSUBEXP; i++)
|
||
|
+ {
|
||
|
+ scheme_value se = VECTOR_REF(start_vec, i);
|
||
|
+ scheme_value ee = VECTOR_REF(end_vec, i);
|
||
|
+ r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
|
||
|
+ r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
|
||
|
+ }
|
||
|
+
|
||
|
+ regexp_error = NULL;
|
||
|
+ regnsub (r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
|
||
|
+ *len = strlen(&STRING_REF(outbuf, 0));
|
||
|
+ return regexp_error;
|
||
|
+ }
|
||
|
+
|
||
|
+ char *reg_subst_len(scheme_value cr, const char *match,
|
||
|
+ const char *src, int start,
|
||
|
+ scheme_value start_vec, scheme_value end_vec,
|
||
|
+ int *len)
|
||
|
+ {
|
||
|
+ int i;
|
||
|
+ regexp *r = (regexp *)&STRING_REF(cr, 0);
|
||
|
+
|
||
|
+ if( VECTOR_LENGTH(start_vec) != NSUBEXP ) {
|
||
|
+ return "Illegal start vector";
|
||
|
+ }
|
||
|
+
|
||
|
+ if( VECTOR_LENGTH(end_vec) != NSUBEXP ) {
|
||
|
+ return "Illegal end vector";
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i=0; i<NSUBEXP; i++)
|
||
|
+ {
|
||
|
+ scheme_value se = VECTOR_REF(start_vec, i);
|
||
|
+ scheme_value ee = VECTOR_REF(end_vec, i);
|
||
|
+ r->startp[i] = FIXNUMP(se)?(match + EXTRACT_FIXNUM(se)):NULL;
|
||
|
+ r->endp[i] = FIXNUMP(ee)? (match + EXTRACT_FIXNUM(ee)):NULL;
|
||
|
+ }
|
||
|
+
|
||
|
+ regexp_error = NULL;
|
||
|
+ *len = regsublen (r, src);
|
||
|
+ return regexp_error;
|
||
|
+ }
|
||
|
+
|
||
|
+
|
||
|
+ #if 0
|
||
|
/* Return NULL normally, error string on error.
|
||
|
** Stash match info in start_vec and end_vec.
|
||
|
** Returns boolean match/no-match in hit.
|
||
|
***************
|
||
|
*** 56,61 ****
|
||
|
--- 182,188 ----
|
||
|
Free(prog);
|
||
|
return regexp_error;
|
||
|
}
|
||
|
+ #endif
|
||
|
|
||
|
|
||
|
char *filter_stringvec(const char *re, char const **stringvec, int *nummatch)
|
||
|
diff -rc scsh-0.4.2/scsh/re1.h scsh-0.4.2-regexp/scsh/re1.h
|
||
|
*** scsh-0.4.2/scsh/re1.h Sun Oct 22 08:34:34 1995
|
||
|
--- scsh-0.4.2-regexp/scsh/re1.h Sat Apr 6 17:54:09 1996
|
||
|
***************
|
||
|
*** 1,6 ****
|
||
|
--- 1,21 ----
|
||
|
+ #if 0
|
||
|
char *reg_match(const char *re, const char *string, int start,
|
||
|
scheme_value start_vec, scheme_value end_vec,
|
||
|
int *hit);
|
||
|
+ #endif
|
||
|
|
||
|
char *filter_stringvec(const char *re, char const **stringvec,
|
||
|
int *nummatch);
|
||
|
+
|
||
|
+ char *reg_comp_len(const char *re, int *len);
|
||
|
+ char *reg_comp_comp(const char *re, scheme_value cr);
|
||
|
+
|
||
|
+ char *reg_exec(scheme_value cr, const char *string, int start,
|
||
|
+ scheme_value start_vec, scheme_value end_vec, int *hit);
|
||
|
+
|
||
|
+ char *reg_subst(scheme_value cr, const char *match,
|
||
|
+ const char *src, int start,
|
||
|
+ scheme_value start_vec, scheme_value end_vec,
|
||
|
+ scheme_value outbuf, int *len);
|
||
|
+
|
||
|
+
|
||
|
|
||
|
Only in scsh-0.4.2-regexp/scsh: re2.scm
|
||
|
diff -rc scsh-0.4.2/scsh/scsh-interfaces.scm scsh-0.4.2-regexp/scsh/scsh-interfaces.scm
|
||
|
*** scsh-0.4.2/scsh/scsh-interfaces.scm Tue Oct 31 19:19:30 1995
|
||
|
--- scsh-0.4.2-regexp/scsh/scsh-interfaces.scm Sat Apr 6 18:48:12 1996
|
||
|
***************
|
||
|
*** 413,418 ****
|
||
|
--- 413,419 ----
|
||
|
make-regexp
|
||
|
regexp?
|
||
|
regexp-exec
|
||
|
+ regexp-subst
|
||
|
regexp-quote))
|
||
|
|
||
|
|
||
|
|
||
|
regexp library changes:
|
||
|
|
||
|
*** Makefile 1996/04/06 19:24:49 1.1
|
||
|
--- Makefile 1996/04/06 20:46:26
|
||
|
***************
|
||
|
*** 5,11 ****
|
||
|
# Things you might want to put in TEST:
|
||
|
# -DDEBUG debugging hooks
|
||
|
# -I. regexp.h from current directory, not /usr/include
|
||
|
! TEST=-I.
|
||
|
|
||
|
# Things you might want to put in PROF:
|
||
|
# -pg profiler
|
||
|
--- 5,11 ----
|
||
|
# Things you might want to put in TEST:
|
||
|
# -DDEBUG debugging hooks
|
||
|
# -I. regexp.h from current directory, not /usr/include
|
||
|
! TEST=-I. -DDEBUG
|
||
|
|
||
|
# Things you might want to put in PROF:
|
||
|
# -pg profiler
|
||
|
*** regexp.c 1996/04/06 19:24:49 1.1
|
||
|
--- regexp.c 1996/04/06 22:34:55
|
||
|
***************
|
||
|
*** 105,110 ****
|
||
|
--- 105,111 ----
|
||
|
* Utility definitions.
|
||
|
*/
|
||
|
#define FAIL(m) { regerror(m); return(NULL); }
|
||
|
+ #define FAILN(m) { regerror(m); return(-1); }
|
||
|
#define ISREPN(c) ((c) == '*' || (c) == '+' || (c) == '?')
|
||
|
#define META "^$.[()|?+*\\"
|
||
|
|
||
|
***************
|
||
|
*** 162,173 ****
|
||
|
const char *exp;
|
||
|
{
|
||
|
register regexp *r;
|
||
|
! register char *scan;
|
||
|
int flags;
|
||
|
struct comp co;
|
||
|
|
||
|
if (exp == NULL)
|
||
|
! FAIL("NULL argument to regcomp");
|
||
|
|
||
|
/* First pass: determine size, legality. */
|
||
|
co.regparse = (char *)exp;
|
||
|
--- 163,193 ----
|
||
|
const char *exp;
|
||
|
{
|
||
|
register regexp *r;
|
||
|
! size_t len;
|
||
|
!
|
||
|
! len = regcomp_len(exp);
|
||
|
! if (len <= 0)
|
||
|
! return NULL;
|
||
|
!
|
||
|
! /* Allocate space. */
|
||
|
! r = (regexp *)malloc(len);
|
||
|
!
|
||
|
! if (r == NULL)
|
||
|
! FAIL("out of space");
|
||
|
! return regcomp_comp(exp, r, len);
|
||
|
! }
|
||
|
!
|
||
|
!
|
||
|
! size_t
|
||
|
! regcomp_len(exp)
|
||
|
! const char *exp;
|
||
|
! {
|
||
|
int flags;
|
||
|
+ register regexp *r;
|
||
|
struct comp co;
|
||
|
|
||
|
if (exp == NULL)
|
||
|
! FAILN("NULL argument to regcomp");
|
||
|
|
||
|
/* First pass: determine size, legality. */
|
||
|
co.regparse = (char *)exp;
|
||
|
***************
|
||
|
*** 178,198 ****
|
||
|
co.regcode = co.regdummy;
|
||
|
regc(&co, MAGIC);
|
||
|
if (reg(&co, 0, &flags) == NULL)
|
||
|
! return(NULL);
|
||
|
|
||
|
/* Small enough for pointer-storage convention? */
|
||
|
if (co.regsize >= 0x7fffL) /* Probably could be 0xffffL. */
|
||
|
! FAIL("regexp too big");
|
||
|
|
||
|
! /* Allocate space. */
|
||
|
! r = (regexp *)malloc(sizeof(regexp) + (size_t)co.regsize);
|
||
|
! if (r == NULL)
|
||
|
! FAIL("out of space");
|
||
|
|
||
|
/* Second pass: emit code. */
|
||
|
co.regparse = (char *)exp;
|
||
|
co.regnpar = 1;
|
||
|
co.regcode = r->program;
|
||
|
regc(&co, MAGIC);
|
||
|
if (reg(&co, 0, &flags) == NULL)
|
||
|
return(NULL);
|
||
|
--- 198,228 ----
|
||
|
co.regcode = co.regdummy;
|
||
|
regc(&co, MAGIC);
|
||
|
if (reg(&co, 0, &flags) == NULL)
|
||
|
! return -1;
|
||
|
|
||
|
/* Small enough for pointer-storage convention? */
|
||
|
if (co.regsize >= 0x7fffL) /* Probably could be 0xffffL. */
|
||
|
! FAILN("regexp too big");
|
||
|
|
||
|
! return (sizeof(regexp) + (size_t)co.regsize);
|
||
|
! }
|
||
|
!
|
||
|
!
|
||
|
! regexp *
|
||
|
! regcomp_comp(exp, r, len)
|
||
|
! const char *exp;
|
||
|
! register regexp *r;
|
||
|
! size_t len;
|
||
|
! {
|
||
|
! register char *scan;
|
||
|
! int flags;
|
||
|
! struct comp co;
|
||
|
|
||
|
/* Second pass: emit code. */
|
||
|
co.regparse = (char *)exp;
|
||
|
co.regnpar = 1;
|
||
|
co.regcode = r->program;
|
||
|
+ co.regsize = len - sizeof(regexp);
|
||
|
regc(&co, MAGIC);
|
||
|
if (reg(&co, 0, &flags) == NULL)
|
||
|
return(NULL);
|
||
|
***************
|
||
|
*** 200,206 ****
|
||
|
/* Dig out information for optimizations. */
|
||
|
r->regstart = '\0'; /* Worst-case defaults. */
|
||
|
r->reganch = 0;
|
||
|
! r->regmust = NULL;
|
||
|
r->regmlen = 0;
|
||
|
scan = r->program+1; /* First BRANCH. */
|
||
|
if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
|
||
|
--- 230,236 ----
|
||
|
/* Dig out information for optimizations. */
|
||
|
r->regstart = '\0'; /* Worst-case defaults. */
|
||
|
r->reganch = 0;
|
||
|
! r->regmust = 0;
|
||
|
r->regmlen = 0;
|
||
|
scan = r->program+1; /* First BRANCH. */
|
||
|
if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
|
||
|
***************
|
||
|
*** 229,235 ****
|
||
|
longest = OPERAND(scan);
|
||
|
len = strlen(OPERAND(scan));
|
||
|
}
|
||
|
! r->regmust = longest;
|
||
|
r->regmlen = (int)len;
|
||
|
}
|
||
|
}
|
||
|
--- 259,265 ----
|
||
|
longest = OPERAND(scan);
|
||
|
len = strlen(OPERAND(scan));
|
||
|
}
|
||
|
! r->regmust = longest - r->program;
|
||
|
r->regmlen = (int)len;
|
||
|
}
|
||
|
}
|
||
|
***************
|
||
|
*** 648,655 ****
|
||
|
struct exec {
|
||
|
char *reginput; /* String-input pointer. */
|
||
|
char *regbol; /* Beginning of input, for ^ check. */
|
||
|
! char **regstartp; /* Pointer to startp array. */
|
||
|
! char **regendp; /* Ditto for endp. */
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
--- 678,685 ----
|
||
|
struct exec {
|
||
|
char *reginput; /* String-input pointer. */
|
||
|
char *regbol; /* Beginning of input, for ^ check. */
|
||
|
! const char **regstartp; /* Pointer to startp array. */
|
||
|
! const char **regendp; /* Ditto for endp. */
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
***************
|
||
|
*** 690,696 ****
|
||
|
}
|
||
|
|
||
|
/* If there is a "must appear" string, look for it. */
|
||
|
! if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
|
||
|
return(0);
|
||
|
|
||
|
/* Mark beginning of line for ^ . */
|
||
|
--- 720,727 ----
|
||
|
}
|
||
|
|
||
|
/* If there is a "must appear" string, look for it. */
|
||
|
! if ((prog->regmlen > 0) &&
|
||
|
! strstr(string, &prog->program[prog->regmust]) == NULL)
|
||
|
return(0);
|
||
|
|
||
|
/* Mark beginning of line for ^ . */
|
||
|
***************
|
||
|
*** 729,736 ****
|
||
|
char *string;
|
||
|
{
|
||
|
register int i;
|
||
|
! register char **stp;
|
||
|
! register char **enp;
|
||
|
|
||
|
ep->reginput = string;
|
||
|
|
||
|
--- 760,767 ----
|
||
|
char *string;
|
||
|
{
|
||
|
register int i;
|
||
|
! register const char **stp;
|
||
|
! register const char **enp;
|
||
|
|
||
|
ep->reginput = string;
|
||
|
|
||
|
***************
|
||
|
*** 1004,1011 ****
|
||
|
printf("start `%c' ", r->regstart);
|
||
|
if (r->reganch)
|
||
|
printf("anchored ");
|
||
|
! if (r->regmust != NULL)
|
||
|
! printf("must have \"%s\"", r->regmust);
|
||
|
printf("\n");
|
||
|
}
|
||
|
|
||
|
--- 1035,1042 ----
|
||
|
printf("start `%c' ", r->regstart);
|
||
|
if (r->reganch)
|
||
|
printf("anchored ");
|
||
|
! if (r->regmlen > 0)
|
||
|
! printf("must have \"%s\"", &r->program[r->regmust]);
|
||
|
printf("\n");
|
||
|
}
|
||
|
|
||
|
*** regexp.h 1996/04/06 19:24:49 1.1
|
||
|
--- regexp.h 1996/04/07 01:52:19
|
||
|
***************
|
||
|
*** 6,16 ****
|
||
|
*/
|
||
|
#define NSUBEXP 10
|
||
|
typedef struct regexp {
|
||
|
! char *startp[NSUBEXP];
|
||
|
! char *endp[NSUBEXP];
|
||
|
char regstart; /* Internal use only. */
|
||
|
char reganch; /* Internal use only. */
|
||
|
! char *regmust; /* Internal use only. */
|
||
|
int regmlen; /* Internal use only. */
|
||
|
char program[1]; /* Unwarranted chumminess with compiler. */
|
||
|
} regexp;
|
||
|
--- 6,16 ----
|
||
|
*/
|
||
|
#define NSUBEXP 10
|
||
|
typedef struct regexp {
|
||
|
! const char *startp[NSUBEXP];
|
||
|
! const char *endp[NSUBEXP];
|
||
|
char regstart; /* Internal use only. */
|
||
|
char reganch; /* Internal use only. */
|
||
|
! int regmust; /* Internal use only. */
|
||
|
int regmlen; /* Internal use only. */
|
||
|
char program[1]; /* Unwarranted chumminess with compiler. */
|
||
|
} regexp;
|
||
|
***************
|
||
|
*** 18,21 ****
|
||
|
--- 18,27 ----
|
||
|
extern regexp *regcomp(const char *re);
|
||
|
extern int regexec(regexp *rp, const char *s);
|
||
|
extern void regsub(const regexp *rp, const char *src, char *dst);
|
||
|
+ extern void regnsub(const regexp *rp, const char *src, char *dst, size_t len);
|
||
|
+ extern size_t regsublen(const regexp *rp, const char *src);
|
||
|
+
|
||
|
extern void regerror(char *message);
|
||
|
+ extern size_t regcomp_len(const char *exp);
|
||
|
+ extern regexp *regcomp_comp(const char *exp, struct regexp *r, size_t len);
|
||
|
+
|
||
|
*** regsub.c 1996/04/06 19:24:49 1.1
|
||
|
--- regsub.c 1996/04/07 02:10:29
|
||
|
***************
|
||
|
*** 11,25 ****
|
||
|
/*
|
||
|
- regsub - perform substitutions after a regexp match
|
||
|
*/
|
||
|
void
|
||
|
! regsub(rp, source, dest)
|
||
|
const regexp *rp;
|
||
|
const char *source;
|
||
|
char *dest;
|
||
|
{
|
||
|
register regexp * const prog = (regexp *)rp;
|
||
|
! register char *src = (char *)source;
|
||
|
register char *dst = dest;
|
||
|
register char c;
|
||
|
register int no;
|
||
|
register size_t len;
|
||
|
--- 11,42 ----
|
||
|
/*
|
||
|
- regsub - perform substitutions after a regexp match
|
||
|
*/
|
||
|
+
|
||
|
+ void regsub(rp, source, dest)
|
||
|
+ const regexp *rp;
|
||
|
+ const char *source;
|
||
|
+ char *dest;
|
||
|
+ {
|
||
|
+ regnsub(rp, source, dest, BUFSIZ);
|
||
|
+ }
|
||
|
+
|
||
|
+
|
||
|
+
|
||
|
+ /*
|
||
|
+ - regnsub - perform bounds-checked substitutions after a regexp match
|
||
|
+ */
|
||
|
void
|
||
|
! regnsub(rp, source, dest, destlen)
|
||
|
const regexp *rp;
|
||
|
const char *source;
|
||
|
char *dest;
|
||
|
+ size_t destlen;
|
||
|
{
|
||
|
register regexp * const prog = (regexp *)rp;
|
||
|
! register const char *src = (char *)source;
|
||
|
register char *dst = dest;
|
||
|
+ char *dstend = dest + destlen;
|
||
|
+ char *odst;
|
||
|
register char c;
|
||
|
register int no;
|
||
|
register size_t len;
|
||
|
***************
|
||
|
*** 45,55 ****
|
||
|
if (c == '\\' && (*src == '\\' || *src == '&'))
|
||
|
c = *src++;
|
||
|
*dst++ = c;
|
||
|
} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
|
||
|
! prog->endp[no] > prog->startp[no]) {
|
||
|
len = prog->endp[no] - prog->startp[no];
|
||
|
! (void) strncpy(dst, prog->startp[no], len);
|
||
|
dst += len;
|
||
|
if (*(dst-1) == '\0') { /* strncpy hit NUL. */
|
||
|
regerror("damaged match string");
|
||
|
return;
|
||
|
--- 62,83 ----
|
||
|
if (c == '\\' && (*src == '\\' || *src == '&'))
|
||
|
c = *src++;
|
||
|
*dst++ = c;
|
||
|
+ if (dst >= dstend)
|
||
|
+ {
|
||
|
+ regerror("output buffer too small");
|
||
|
+ return;
|
||
|
+ }
|
||
|
} else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
|
||
|
! prog->endp[no] > prog->startp[no]) {
|
||
|
len = prog->endp[no] - prog->startp[no];
|
||
|
! odst = dst;
|
||
|
dst += len;
|
||
|
+ if (dst >= dstend)
|
||
|
+ {
|
||
|
+ regerror("output buffer too small");
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ (void) strncpy(odst, prog->startp[no], len);
|
||
|
if (*(dst-1) == '\0') { /* strncpy hit NUL. */
|
||
|
regerror("damaged match string");
|
||
|
return;
|
||
|
***************
|
||
|
*** 58,60 ****
|
||
|
--- 86,131 ----
|
||
|
}
|
||
|
*dst++ = '\0';
|
||
|
}
|
||
|
+
|
||
|
+ size_t regsublen(rp, source)
|
||
|
+ const regexp *rp;
|
||
|
+ const char *source;
|
||
|
+ {
|
||
|
+ register regexp * const prog = (regexp *)rp;
|
||
|
+ register char *src = (char *)source;
|
||
|
+ register char c;
|
||
|
+ register int no;
|
||
|
+ register int len = 0;
|
||
|
+
|
||
|
+ if (prog == NULL || source == NULL) {
|
||
|
+ regerror("NULL parameter to regsublen");
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ if ((unsigned char)*(prog->program) != MAGIC) {
|
||
|
+ regerror("damaged regexp");
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+ while ((c = *src++) != '\0') {
|
||
|
+ if (c == '&')
|
||
|
+ no = 0;
|
||
|
+ else if (c == '\\' && isdigit(*src))
|
||
|
+ no = *src++ - '0';
|
||
|
+ else
|
||
|
+ no = -1;
|
||
|
+ if (no < 0) { /* Ordinary character. */
|
||
|
+ if (c == '\\' && (*src == '\\' || *src == '&'))
|
||
|
+ src++;
|
||
|
+ len++;
|
||
|
+ } else {
|
||
|
+ const char *s = prog->startp[no];
|
||
|
+ const char *e = prog->endp[no];
|
||
|
+ if ((s != NULL) && (e != NULL) && (e > s)) {
|
||
|
+ len += e-s;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return len+1;
|
||
|
+ }
|
||
|
+
|
||
|
+
|
||
|
|
||
|
Original regexp code from henry:
|
||
|
[unpacked & deleted -Olin]
|