scsh-0.6/scsh/rx/regexp1.c

225 lines
6.2 KiB
C

/* Copyright (c) 1993-2000 by Richard Kelsey and Jonathan Rees.
See file COPYING. */
/*
* Scheme 48/POSIX regex interface
*/
#include <sys/types.h>
#include <regex.h> /* POSIX.2 */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "scheme48.h"
extern void s48_init_posix_regex(void);
static s48_value posix_compile_regexp(s48_value pattern,
s48_value extended_p,
s48_value ignore_case_p,
s48_value submatches_p,
s48_value newline_p),
posix_regexp_match(s48_value sch_regex,
s48_value string,
s48_value submatches_p,
s48_value bol_p,
s48_value eol_p,
s48_value sch_start),
posix_regexp_error_message(s48_value pattern,
s48_value extended_p,
s48_value ignore_case_p,
s48_value submatches_p,
s48_value newline_p),
posix_free_regexp(s48_value sch_regex);
/*
* Record type imported from Scheme.
*/
static s48_value posix_regexp_match_type_binding = S48_FALSE;
/*
* Install all exported functions in Scheme48.
*/
void
s48_init_posix_regexp(void)
{
/* Export our stuff. */
S48_EXPORT_FUNCTION(posix_compile_regexp);
S48_EXPORT_FUNCTION(posix_regexp_match);
S48_EXPORT_FUNCTION(posix_regexp_error_message);
S48_EXPORT_FUNCTION(posix_free_regexp);
/* Protect and import the regex-match type. */
S48_GC_PROTECT_GLOBAL(posix_regexp_match_type_binding);
posix_regexp_match_type_binding =
s48_get_imported_binding("posix-regexp-match-type");
}
/*
* Interface to regcomp. We encode the flags, make the return value, and
* then call regcomp() to fill it in.
*/
static s48_value
posix_compile_regexp(s48_value pattern,
s48_value extended_p, s48_value ignore_case_p,
s48_value submatches_p, s48_value newline_p)
{
s48_value sch_regex;
int status;
S48_DECLARE_GC_PROTECT(1);
int flags = (S48_EXTRACT_BOOLEAN(extended_p) ? REG_EXTENDED : 0) |
(S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE : 0) |
(S48_EXTRACT_BOOLEAN(submatches_p) ? 0 : REG_NOSUB) |
(S48_EXTRACT_BOOLEAN(newline_p) ? REG_NEWLINE : 0);
S48_GC_PROTECT_1(pattern);
S48_CHECK_STRING(pattern);
sch_regex = S48_MAKE_VALUE(regex_t);
status = regcomp(S48_UNSAFE_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
S48_UNSAFE_EXTRACT_STRING(pattern),
flags);
S48_GC_UNPROTECT();
if (status == 0)
return sch_regex;
else
return S48_UNSAFE_ENTER_FIXNUM(status); /* not that it can do them much good */
}
/*
* Interface to regexec.
*
* Returns #f if there is no match, #t if there is a match and submatches_p
* is false, and a list of regex-match records otherwise.
*
* Most of this is making the buffer for the match structs and then translating
* them into Scheme match records.
*/
static s48_value
posix_regexp_match(s48_value sch_regex, s48_value string,
s48_value submatches_p,
s48_value bol_p, s48_value eol_p,
s48_value sch_start)
{
int status;
s48_value result;
int start, len;
/* re_nsub doesn't include the full pattern */
size_t nmatch = 1 + S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t)->re_nsub;
regmatch_t *pmatch,
pmatch_buffer[32];
int flags = (S48_EXTRACT_BOOLEAN(bol_p) ? 0 : REG_NOTBOL) |
(S48_EXTRACT_BOOLEAN(eol_p) ? 0 : REG_NOTEOL);
start = s48_extract_fixnum(sch_start);
len = S48_STRING_LENGTH(string);
if ((start < 0) || (start > len))
s48_raise_range_error(sch_start,
s48_enter_fixnum(0),
s48_enter_fixnum(len));
if (nmatch <= 32)
pmatch = pmatch_buffer;
else {
pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
if (pmatch == NULL)
s48_raise_out_of_memory_error(); }
status = regexec(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
S48_UNSAFE_EXTRACT_STRING(string) + start,
nmatch, pmatch, flags);
if (status == REG_NOMATCH)
result = S48_FALSE;
else if (! S48_EXTRACT_BOOLEAN(submatches_p))
result = S48_TRUE;
else {
s48_value match = S48_FALSE;
s48_value matches = S48_NULL;
int i;
S48_DECLARE_GC_PROTECT(2);
S48_GC_PROTECT_2(match, matches);
for(i = nmatch - 1; i > -1; i--) {
if (pmatch[i].rm_so == -1)
match = S48_FALSE;
else {
match = s48_make_record(posix_regexp_match_type_binding);
S48_UNSAFE_RECORD_SET(match, 0,
s48_enter_fixnum(pmatch[i].rm_so + start));
S48_UNSAFE_RECORD_SET(match, 1,
s48_enter_fixnum(pmatch[i].rm_eo + start));
S48_UNSAFE_RECORD_SET(match, 2, S48_FALSE); } /* submatches */
matches = s48_cons(match, matches); }
S48_GC_UNPROTECT();
result = matches; }
if (nmatch > 32)
free(pmatch);
return result;
}
/*
* Interface to regcomp.
*
* This takes the same arguments as `compile_regexp' but returns the error
* message, if any, that `regcomp()' returns. For some reason `regerror()'
* requires both the status code and the compiled pattern buffer returned
* by `regcomp()'. `compile_regexp' only returned the status so we have to
* redo the compilation.
*
*/
static s48_value
posix_regexp_error_message(s48_value pattern,
s48_value extended_p, s48_value ignore_case_p,
s48_value submatches_p, s48_value newline_p)
{
regex_t compiled_regex;
int status;
int flags = (S48_EXTRACT_BOOLEAN(extended_p) ? REG_EXTENDED : 0) |
(S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE : 0) |
(S48_EXTRACT_BOOLEAN(submatches_p) ? 0 : REG_NOSUB) |
(S48_EXTRACT_BOOLEAN(newline_p) ? REG_NEWLINE : 0);
S48_CHECK_STRING(pattern);
status = regcomp(&compiled_regex, S48_UNSAFE_EXTRACT_STRING(pattern), flags);
if (status == 0)
return S48_FALSE;
else {
size_t buffer_size;
s48_value buffer;
buffer_size = regerror(status, &compiled_regex, NULL, 0);
/* For string lengths C counts the nul, Scheme doesn't. */
buffer = s48_make_string(buffer_size - 1, ' ');
regerror(status,
&compiled_regex,
S48_UNSAFE_EXTRACT_STRING(buffer),
buffer_size);
return buffer; }
}
/*
* Stub for regfree().
*/
static s48_value
posix_free_regexp(s48_value sch_regex)
{
regfree(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t));
return S48_UNSPECIFIC;
}