removed old regexp stuff
This commit is contained in:
parent
734daac16f
commit
adbee59277
69
scsh/re.c
69
scsh/re.c
|
@ -1,69 +0,0 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by cig.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> /* For malloc. */
|
||||
#include "libcig.h"
|
||||
|
||||
/* Make sure foreign-function stubs interface to the C funs correctly: */
|
||||
#include "re1.h"
|
||||
|
||||
scheme_value df_re_byte_len(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_byte_len(const char *, int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(2, nargs, "re_byte_len");
|
||||
r1 = re_byte_len(cig_string_body(args[1]), &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_FIXNUM(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_compile(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_compile(const char *, scheme_value );
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
|
||||
cig_check_nargs(3, nargs, "re_compile");
|
||||
r1 = re_compile(cig_string_body(args[2]), args[1]);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_exec(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_exec(scheme_value , const char *, int , scheme_value , scheme_value , int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(6, nargs, "re_exec");
|
||||
r1 = re_exec(args[5], cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_match(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_match(const char *, const char *, int , scheme_value , scheme_value , int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(6, nargs, "re_match");
|
||||
r1 = re_match(cig_string_body(args[5]), cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
248
scsh/re.scm
248
scsh/re.scm
|
@ -1,248 +0,0 @@
|
|||
;;; Regular expression matching for scsh
|
||||
;;; Copyright (c) 1994 by Olin Shivers.
|
||||
|
||||
(foreign-source
|
||||
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
|
||||
"#include \"re1.h\""
|
||||
"" ""
|
||||
)
|
||||
|
||||
;;; Match data for regexp matches.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define-record regexp-match
|
||||
string ; The string against which we matched.
|
||||
start ; 10 elt vec
|
||||
end) ; 10 elt vec
|
||||
|
||||
(define (match:start match . maybe-index)
|
||||
(vector-ref (regexp-match:start match)
|
||||
(:optional maybe-index 0)))
|
||||
|
||||
(define (match:end match . maybe-index)
|
||||
(vector-ref (regexp-match:end match)
|
||||
(:optional maybe-index 0)))
|
||||
|
||||
(define (match:substring match . maybe-index)
|
||||
(let* ((i (:optional maybe-index 0))
|
||||
(start (vector-ref (regexp-match:start match) i)))
|
||||
(and start (substring (regexp-match:string match)
|
||||
start
|
||||
(vector-ref (regexp-match:end match) i)))))
|
||||
|
||||
;;; Compiling regexps
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define-record %regexp
|
||||
string ; The string form of the regexp.
|
||||
bytes ; The compiled representation, stuffed into a Scheme string.
|
||||
((disclose self) (list "Regexp" (%regexp:string self))))
|
||||
|
||||
(define regexp? %regexp?)
|
||||
|
||||
|
||||
(define (make-regexp pattern)
|
||||
(receive (err len) (%regexp-compiled-length pattern)
|
||||
(if err (error err make-regexp pattern)
|
||||
(let ((buf (make-string len)))
|
||||
(%regexp-compile pattern buf)
|
||||
(make-%regexp pattern buf)))))
|
||||
|
||||
(define-foreign %regexp-compiled-length (re_byte_len (string pattern))
|
||||
static-string ; Error msg or #f
|
||||
integer) ; number of bytes needed to compile REGEXP.
|
||||
|
||||
(define-foreign %regexp-compile (re_compile (string pattern)
|
||||
(string-desc bytes))
|
||||
static-string) ; Error msg or #f
|
||||
|
||||
|
||||
|
||||
;;; Executing compiled regexps
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (regexp-exec regexp str . maybe-start)
|
||||
(let ((start (:optional maybe-start 0))
|
||||
(start-vec (make-vector 10))
|
||||
(end-vec (make-vector 10)))
|
||||
(receive (err match?)
|
||||
(%regexp-exec (%regexp:bytes regexp) str start start-vec end-vec)
|
||||
(if err (error err regexp-exec regexp str start)
|
||||
(and match?
|
||||
(make-regexp-match str start-vec end-vec))))))
|
||||
|
||||
(define-foreign %regexp-exec (re_exec (string-desc compiled-regexp)
|
||||
(string s)
|
||||
(integer start)
|
||||
(vector-desc start-vec)
|
||||
(vector-desc end-vec))
|
||||
static-string ; Error msg or #f
|
||||
bool) ; Matched?
|
||||
|
||||
|
||||
;;; Compile&match regexps in one go
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; I could do this with the separate compile and execute procedures,
|
||||
;;; but I go straight to C just for fun.
|
||||
|
||||
(define (string-match pattern string . maybe-start)
|
||||
(let ((start (:optional maybe-start 0))
|
||||
(start-vec (make-vector 10))
|
||||
(end-vec (make-vector 10)))
|
||||
(receive (err match?) (%string-match pattern string start
|
||||
start-vec end-vec)
|
||||
(if err (error err string-match pattern string start)
|
||||
(and match? (make-regexp-match string start-vec end-vec))))))
|
||||
|
||||
(define-foreign %string-match (re_match (string pattern)
|
||||
(string s)
|
||||
(integer start)
|
||||
(vector-desc start-vec)
|
||||
(vector-desc end-vec))
|
||||
static-string ; Error string or #f if all is ok.
|
||||
bool) ; match?
|
||||
|
||||
|
||||
|
||||
;;; Substitutions
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (regexp-substitute port match . items)
|
||||
(let* ((str (regexp-match:string match))
|
||||
(sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(range (lambda (item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values 0 (vector-ref sv 0)))
|
||||
((eq? 'post item) (values (vector-ref ev 0)
|
||||
(string-length str)))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
regexp-substitute))))))
|
||||
(if port
|
||||
|
||||
;; Output port case.
|
||||
(for-each (lambda (item)
|
||||
(if (string? item) (write-string item port)
|
||||
(receive (si ei) (range item)
|
||||
(write-string str port si ei))))
|
||||
items)
|
||||
|
||||
;; Here's the string case. Make two passes -- one to
|
||||
;; compute the length of the target string, one to fill it in.
|
||||
(let* ((len (reduce (lambda (i item)
|
||||
(+ i (if (string? item) (string-length item)
|
||||
(receive (si ei) (range item) (- ei si)))))
|
||||
0 items))
|
||||
(ans (make-string len)))
|
||||
|
||||
(reduce (lambda (index item)
|
||||
(cond ((string? item)
|
||||
(string-replace! ans index item)
|
||||
(+ index (string-length item)))
|
||||
(else (receive (si ei) (range item)
|
||||
(substring-replace! ans index str si ei)
|
||||
(+ index (- ei si))))))
|
||||
0 items)
|
||||
ans))))
|
||||
|
||||
|
||||
|
||||
(define (regexp-substitute/global port re str . items)
|
||||
(let ((range (lambda (start sv ev item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values start (vector-ref sv 0)))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
regexp-substitute/global)))))
|
||||
(num-posts (reduce (lambda (count item)
|
||||
(+ count (if (eq? item 'post) 1 0)))
|
||||
0 items)))
|
||||
(if (and port (< num-posts 2))
|
||||
|
||||
;; Output port case, with zero or one POST items.
|
||||
(let recur ((start 0))
|
||||
(let ((match (string-match re str start)))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match)))
|
||||
(for-each (lambda (item)
|
||||
(cond ((string? item) (write-string item port))
|
||||
((procedure? item) (write-string (item match) port))
|
||||
((eq? 'post item) (recur (vector-ref ev 0)))
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(write-string str port si ei)))))
|
||||
items))
|
||||
|
||||
(write-string str port start)))) ; No match.
|
||||
|
||||
(let* ((pieces (let recur ((start 0))
|
||||
(let ((match (string-match re str start))
|
||||
(cached-post #f))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match)))
|
||||
(reduce (lambda (pieces item)
|
||||
(cond ((string? item)
|
||||
(cons item pieces))
|
||||
|
||||
((procedure? item)
|
||||
(cons (item match) pieces))
|
||||
|
||||
((eq? 'post item)
|
||||
(if (not cached-post)
|
||||
(set! cached-post
|
||||
(recur (vector-ref ev 0))))
|
||||
(append cached-post pieces))
|
||||
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(cons (substring str si ei)
|
||||
pieces)))))
|
||||
'() items))
|
||||
|
||||
;; No match. Return str[start,end].
|
||||
(list (if (zero? start) str
|
||||
(substring str start (string-length str))))))))
|
||||
(pieces (reverse pieces)))
|
||||
(if port (for-each (lambda (p) (write-string p port)) pieces)
|
||||
(apply string-append pieces))))))
|
||||
|
||||
|
||||
|
||||
;;; Miscellaneous
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Convert a string into a regex pattern that matches that string exactly --
|
||||
;;; in other words, quote the special chars with backslashes.
|
||||
|
||||
(define (regexp-quote string)
|
||||
(let lp ((i (- (string-length string) 1))
|
||||
(result '()))
|
||||
(if (< i 0) (list->string result)
|
||||
(lp (- i 1)
|
||||
(let* ((c (string-ref string i))
|
||||
(result (cons c result)))
|
||||
(if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+))
|
||||
(cons #\\ result)
|
||||
result))))))
|
||||
|
||||
|
||||
;;; Count the number of possible sub-matches in a regexp
|
||||
;;; (i.e., the number of left parens).
|
||||
|
||||
(define (regexp-num-submatches s)
|
||||
(let* ((len (string-length s))
|
||||
(len-1 (- len 1)))
|
||||
(let lp ((i 0) (nsm 0))
|
||||
(if (= i len) nsm
|
||||
(case (string-ref s i)
|
||||
((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm))
|
||||
((#\() (lp (+ i 1) (+ nsm 1)))
|
||||
(else (lp (+ i 1) nsm)))))))
|
194
scsh/re1.c
194
scsh/re1.c
|
@ -1,194 +0,0 @@
|
|||
/* Scheme48 interface to Henry Spencer's regular expression package.
|
||||
** Copyright (c) 1993, 1994 by Olin Shivers.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "regexp.h"
|
||||
#include "cstuff.h"
|
||||
|
||||
/* Make sure our exports match up w/the implementation: */
|
||||
#include "re1.h"
|
||||
|
||||
/* Not multi-threaded reentrant. */
|
||||
static char *regexp_error;
|
||||
|
||||
/* Stash error msg in global. */
|
||||
void regerror(char *msg) {regexp_error = msg;}
|
||||
|
||||
/*
|
||||
** Return NULL normally, error string on error.
|
||||
** Stash number of bytes needed for compiled regexp into `*len'
|
||||
*/
|
||||
|
||||
char *re_byte_len(const char *re, int *len)
|
||||
{
|
||||
int l;
|
||||
|
||||
regexp_error = 0;
|
||||
*len = regcomp_len(re);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return NULL normally, error string on error.
|
||||
** Compile regexp into string described by `cr'.
|
||||
*/
|
||||
|
||||
char *re_compile(const char *re, s48_value cr)
|
||||
{
|
||||
int len = S48_STRING_LENGTH(cr);
|
||||
regexp *r = (regexp *) &S48_STRING_REF(cr, 0);
|
||||
|
||||
regexp_error = 0;
|
||||
regcomp_comp(re, r, len);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
/* Return NULL normally, error string on error.
|
||||
** Stash match info in start_vec and end_vec.
|
||||
** Returns boolean match/no-match in hit.
|
||||
*/
|
||||
|
||||
char *re_exec(s48_value cr, const char *string, int start,
|
||||
s48_value start_vec, s48_value end_vec, int *hit)
|
||||
{
|
||||
regexp *r = (regexp *) &S48_STRING_REF(cr, 0);
|
||||
|
||||
*hit = 0;
|
||||
|
||||
if( S48_VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
||||
return "Illegal start vector"; /* never trigger. */
|
||||
if( S48_VECTOR_LENGTH(end_vec) != NSUBEXP )
|
||||
return "Illegal end vector";
|
||||
|
||||
regexp_error = 0;
|
||||
|
||||
if( regexec(r, string+start) ) {
|
||||
int i;
|
||||
for(i=0; i<NSUBEXP; i++) {
|
||||
const char *s = r->startp[i];
|
||||
const char *e = r->endp[i];
|
||||
S48_VECTOR_REF(start_vec,i) = s ? s48_enter_fixnum(s - string) : S48_FALSE;
|
||||
S48_VECTOR_REF(end_vec,i) = e ? s48_enter_fixnum(e - string) : S48_FALSE;
|
||||
r->startp[i] = 0; /* Why did Sommerfeld */
|
||||
r->endp[i] = 0; /* put these here? */
|
||||
}
|
||||
*hit = 1;
|
||||
}
|
||||
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
|
||||
char *re_subst(s48_value cr, const char *match,
|
||||
const char *src, int start,
|
||||
s48_value start_vec, s48_value end_vec,
|
||||
s48_value outbuf, int *len)
|
||||
{
|
||||
int i;
|
||||
regexp *r = (regexp *) &S48_STRING_REF(cr, 0);
|
||||
|
||||
if( S48_VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
||||
return "Illegal start vector"; /* never trigger. */
|
||||
if( S48_VECTOR_LENGTH(end_vec) != NSUBEXP )
|
||||
return "Illegal end vector";
|
||||
|
||||
for (i=0; i<NSUBEXP; i++) {
|
||||
s48_value se = S48_VECTOR_REF(start_vec, i);
|
||||
s48_value ee = S48_VECTOR_REF(end_vec, i);
|
||||
r->startp[i] = S48_FIXNUM_P(se) ? (match + s48_extract_fixnum(se)) : 0;
|
||||
r->endp[i] = S48_FIXNUM_P(ee) ? (match + s48_extract_fixnum(ee)) : 0;
|
||||
}
|
||||
|
||||
regexp_error = 0;
|
||||
regnsub(r, src, &S48_STRING_REF(outbuf, 0), S48_STRING_LENGTH(outbuf));
|
||||
*len = strlen(&S48_STRING_REF(outbuf, 0));
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
char *re_subst_len(s48_value cr, const char *match,
|
||||
const char *src, int start,
|
||||
s48_value start_vec, s48_value end_vec,
|
||||
int *len)
|
||||
{
|
||||
int i;
|
||||
regexp *r = (regexp *) &S48_STRING_REF(cr, 0);
|
||||
|
||||
if( S48_VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
||||
return "Illegal start vector"; /* never trigger. */
|
||||
if( S48_VECTOR_LENGTH(end_vec) != NSUBEXP )
|
||||
return "Illegal end vector";
|
||||
|
||||
for (i=0; i<NSUBEXP; i++) {
|
||||
s48_value se = S48_VECTOR_REF(start_vec, i);
|
||||
s48_value ee = S48_VECTOR_REF(end_vec, i);
|
||||
r->startp[i] = S48_FIXNUM_P(se) ? (match + s48_extract_fixnum(se)) : 0;
|
||||
r->endp[i] = S48_FIXNUM_P(ee) ? (match + s48_extract_fixnum(ee)) : 0;
|
||||
}
|
||||
|
||||
regexp_error = 0;
|
||||
*len = regsublen(r, src);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
|
||||
/* Return NULL normally, error string on error.
|
||||
** Stash match info in start_vec and end_vec.
|
||||
** Returns boolean match/no-match in hit.
|
||||
*/
|
||||
|
||||
char *re_match(const char *re, const char *string, int start,
|
||||
s48_value start_vec, s48_value end_vec, int *hit)
|
||||
{
|
||||
regexp *prog;
|
||||
|
||||
regexp_error = 0;
|
||||
*hit = 0;
|
||||
prog = regcomp(re);
|
||||
if( !prog ) return regexp_error;
|
||||
|
||||
if( S48_VECTOR_LENGTH(start_vec) != NSUBEXP ) { /* These two tests */
|
||||
Free(prog);
|
||||
return "Illegal start vector";
|
||||
}
|
||||
|
||||
if( S48_VECTOR_LENGTH(end_vec) != NSUBEXP ) { /* should never trigger. */
|
||||
Free(prog);
|
||||
return "Illegal end vector";
|
||||
}
|
||||
|
||||
if( regexec(prog, string+start) ) {
|
||||
int i;
|
||||
for(i=0; i<NSUBEXP; i++) {
|
||||
const char *s = prog->startp[i];
|
||||
const char *e = prog->endp[i];
|
||||
S48_VECTOR_REF(start_vec,i) = s ? s48_enter_fixnum(s - string) : S48_FALSE;
|
||||
S48_VECTOR_REF(end_vec,i) = e ? s48_enter_fixnum(e - string) : S48_FALSE;
|
||||
}
|
||||
*hit = 1;
|
||||
}
|
||||
|
||||
Free(prog);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
|
||||
char *filter_stringvec(const char *re, char const **stringvec, int *nummatch)
|
||||
{
|
||||
regexp *prog;
|
||||
regexp_error = 0;
|
||||
|
||||
if( prog=regcomp(re) ) {
|
||||
char const **p = stringvec;
|
||||
char const **q = p;
|
||||
|
||||
while(*p) {
|
||||
if( regexec(prog, *p) ) *q++ = *p;
|
||||
p++;
|
||||
}
|
||||
Free(prog);
|
||||
*nummatch = q-stringvec;
|
||||
}
|
||||
|
||||
return regexp_error;
|
||||
}
|
Loading…
Reference in New Issue