1995-10-13 23:34:21 -04:00
|
|
|
/* Scheme48 interface to Henry Spencer's regular expression package.
|
|
|
|
** Copyright (c) 1993, 1994 by Olin Shivers.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "regexp.h"
|
|
|
|
#include "cstuff.h"
|
|
|
|
|
1995-10-22 06:05:39 -04:00
|
|
|
/* Make sure our exports match up w/the implementation: */
|
|
|
|
#include "re1.h"
|
|
|
|
|
1995-10-13 23:34:21 -04:00
|
|
|
/* Not multi-threaded reentrant. */
|
|
|
|
static char *regexp_error;
|
|
|
|
|
|
|
|
/* Stash error msg in global. */
|
|
|
|
void regerror(char *msg) {regexp_error = msg;}
|
|
|
|
|
1996-09-12 00:26:13 -04:00
|
|
|
/*
|
|
|
|
** Return NULL normally, error string on error.
|
|
|
|
** Stash number of bytes needed for compiled regexp into `*len'
|
|
|
|
*/
|
|
|
|
|
|
|
|
char *re_byte_len(const char *re, int *len)
|
|
|
|
{
|
|
|
|
int l;
|
|
|
|
|
|
|
|
regexp_error = 0;
|
|
|
|
*len = regcomp_len(re);
|
|
|
|
return regexp_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Return NULL normally, error string on error.
|
|
|
|
** Compile regexp into string described by `cr'.
|
|
|
|
*/
|
|
|
|
|
|
|
|
char *re_compile(const char *re, scheme_value cr)
|
|
|
|
{
|
|
|
|
int len = STRING_LENGTH(cr);
|
|
|
|
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
|
|
|
|
|
|
|
regexp_error = 0;
|
|
|
|
r = regcomp_comp(re, r, len);
|
|
|
|
return regexp_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return NULL normally, error string on error.
|
|
|
|
** Stash match info in start_vec and end_vec.
|
|
|
|
** Returns boolean match/no-match in hit.
|
|
|
|
*/
|
|
|
|
|
|
|
|
char *re_exec(scheme_value cr, const char *string, int start,
|
|
|
|
scheme_value start_vec, scheme_value end_vec, int *hit)
|
|
|
|
{
|
|
|
|
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
|
|
|
|
|
|
|
*hit = 0;
|
|
|
|
|
|
|
|
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
|
|
|
return "Illegal start vector"; /* never trigger. */
|
|
|
|
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
|
|
|
|
return "Illegal end vector";
|
|
|
|
|
|
|
|
regexp_error = 0;
|
|
|
|
|
|
|
|
if( regexec(r, string+start) ) {
|
|
|
|
int i;
|
|
|
|
for(i=0; i<NSUBEXP; i++) {
|
|
|
|
const char *s = r->startp[i];
|
|
|
|
const char *e = r->endp[i];
|
|
|
|
VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE;
|
|
|
|
VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE;
|
|
|
|
r->startp[i] = 0; /* Why did Sommerfeld */
|
|
|
|
r->endp[i] = 0; /* put these here? */
|
|
|
|
}
|
|
|
|
*hit = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return regexp_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
char *re_subst(scheme_value cr, const char *match,
|
|
|
|
const char *src, int start,
|
|
|
|
scheme_value start_vec, scheme_value end_vec,
|
|
|
|
scheme_value outbuf, int *len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
|
|
|
|
|
|
|
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
|
|
|
return "Illegal start vector"; /* never trigger. */
|
|
|
|
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
|
|
|
|
return "Illegal end vector";
|
|
|
|
|
|
|
|
for (i=0; i<NSUBEXP; i++) {
|
|
|
|
scheme_value se = VECTOR_REF(start_vec, i);
|
|
|
|
scheme_value ee = VECTOR_REF(end_vec, i);
|
|
|
|
r->startp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0;
|
|
|
|
r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
regexp_error = 0;
|
|
|
|
regnsub(r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
|
|
|
|
*len = strlen(&STRING_REF(outbuf, 0));
|
|
|
|
return regexp_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *re_subst_len(scheme_value cr, const char *match,
|
|
|
|
const char *src, int start,
|
|
|
|
scheme_value start_vec, scheme_value end_vec,
|
|
|
|
int *len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
|
|
|
|
|
|
|
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
|
|
|
return "Illegal start vector"; /* never trigger. */
|
|
|
|
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
|
|
|
|
return "Illegal end vector";
|
|
|
|
|
|
|
|
for (i=0; i<NSUBEXP; i++) {
|
|
|
|
scheme_value se = VECTOR_REF(start_vec, i);
|
|
|
|
scheme_value ee = VECTOR_REF(end_vec, i);
|
|
|
|
r->startp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0;
|
|
|
|
r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
regexp_error = 0;
|
|
|
|
*len = regsublen(r, src);
|
|
|
|
return regexp_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1995-10-13 23:34:21 -04:00
|
|
|
/* Return NULL normally, error string on error.
|
|
|
|
** Stash match info in start_vec and end_vec.
|
|
|
|
** Returns boolean match/no-match in hit.
|
|
|
|
*/
|
|
|
|
|
1996-09-12 00:26:13 -04:00
|
|
|
char *re_match(const char *re, const char *string, int start,
|
|
|
|
scheme_value start_vec, scheme_value end_vec, int *hit)
|
1995-10-13 23:34:21 -04:00
|
|
|
{
|
1995-10-27 04:58:58 -04:00
|
|
|
regexp *prog;
|
1995-10-13 23:34:21 -04:00
|
|
|
|
1995-10-27 04:58:58 -04:00
|
|
|
regexp_error = 0;
|
1995-10-13 23:34:21 -04:00
|
|
|
*hit = 0;
|
1995-10-27 04:58:58 -04:00
|
|
|
prog = regcomp(re);
|
1995-10-13 23:34:21 -04:00
|
|
|
if( !prog ) return regexp_error;
|
|
|
|
|
1996-09-12 00:26:13 -04:00
|
|
|
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) { /* These two tests */
|
1995-10-13 23:34:21 -04:00
|
|
|
Free(prog);
|
|
|
|
return "Illegal start vector";
|
|
|
|
}
|
|
|
|
|
1996-09-12 00:26:13 -04:00
|
|
|
if( VECTOR_LENGTH(end_vec) != NSUBEXP ) { /* should never trigger. */
|
1995-10-13 23:34:21 -04:00
|
|
|
Free(prog);
|
|
|
|
return "Illegal end vector";
|
|
|
|
}
|
|
|
|
|
|
|
|
if( regexec(prog, string+start) ) {
|
|
|
|
int i;
|
|
|
|
for(i=0; i<NSUBEXP; i++) {
|
1996-09-12 00:26:13 -04:00
|
|
|
const char *s = prog->startp[i];
|
|
|
|
const char *e = prog->endp[i];
|
|
|
|
VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE;
|
|
|
|
VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE;
|
1995-10-13 23:34:21 -04:00
|
|
|
}
|
|
|
|
*hit = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
Free(prog);
|
|
|
|
return regexp_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1995-10-22 06:05:39 -04:00
|
|
|
char *filter_stringvec(const char *re, char const **stringvec, int *nummatch)
|
1995-10-13 23:34:21 -04:00
|
|
|
{
|
1995-10-27 04:58:58 -04:00
|
|
|
regexp *prog;
|
|
|
|
regexp_error = 0;
|
1995-10-13 23:34:21 -04:00
|
|
|
|
1995-10-27 04:58:58 -04:00
|
|
|
if( prog=regcomp(re) ) {
|
|
|
|
char const **p = stringvec;
|
|
|
|
char const **q = p;
|
|
|
|
|
|
|
|
while(*p) {
|
|
|
|
if( regexec(prog, *p) ) *q++ = *p;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
Free(prog);
|
|
|
|
*nummatch = q-stringvec;
|
1995-10-13 23:34:21 -04:00
|
|
|
}
|
|
|
|
|
1995-10-27 04:58:58 -04:00
|
|
|
return regexp_error;
|
1995-10-13 23:34:21 -04:00
|
|
|
}
|