/* Copyright (c) 1993-2000 by Richard Kelsey and Jonathan Rees. See file COPYING. */ /* * Scheme 48/POSIX regex interface */ #include #include /* POSIX.2 */ #include #include #include "scheme48.h" extern void s48_init_posix_regex(void); static s48_value posix_compile_regexp(s48_value pattern, s48_value extended_p, s48_value ignore_case_p, s48_value submatches_p, s48_value newline_p), posix_regexp_match(s48_value sch_regex, s48_value string, s48_value submatches_p, s48_value bol_p, s48_value eol_p, s48_value sch_start), posix_regexp_error_message(s48_value pattern, s48_value extended_p, s48_value ignore_case_p, s48_value submatches_p, s48_value newline_p), posix_free_regexp(s48_value sch_regex); /* * Record type imported from Scheme. */ static s48_value posix_regexp_match_type_binding = S48_FALSE; /* * Install all exported functions in Scheme48. */ void s48_init_posix_regexp(void) { /* Export our stuff. */ S48_EXPORT_FUNCTION(posix_compile_regexp); S48_EXPORT_FUNCTION(posix_regexp_match); S48_EXPORT_FUNCTION(posix_regexp_error_message); S48_EXPORT_FUNCTION(posix_free_regexp); /* Protect and import the regex-match type. */ S48_GC_PROTECT_GLOBAL(posix_regexp_match_type_binding); posix_regexp_match_type_binding = s48_get_imported_binding("posix-regexp-match-type"); } /* * Interface to regcomp. We encode the flags, make the return value, and * then call regcomp() to fill it in. */ static s48_value posix_compile_regexp(s48_value pattern, s48_value extended_p, s48_value ignore_case_p, s48_value submatches_p, s48_value newline_p) { s48_value sch_regex; int status; S48_DECLARE_GC_PROTECT(1); int flags = S48_EXTRACT_BOOLEAN(extended_p) ? REG_EXTENDED : 0 | S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE : 0 | S48_EXTRACT_BOOLEAN(submatches_p) ? 0 : REG_NOSUB | S48_EXTRACT_BOOLEAN(newline_p) ? REG_NEWLINE : 0; S48_GC_PROTECT_1(pattern); S48_CHECK_STRING(pattern); sch_regex = S48_MAKE_VALUE(regex_t); status = regcomp(S48_UNSAFE_EXTRACT_VALUE_POINTER(sch_regex, regex_t), S48_UNSAFE_EXTRACT_STRING(pattern), flags); S48_GC_UNPROTECT(); if (status == 0) return sch_regex; else return S48_UNSAFE_ENTER_FIXNUM(status); /* not that it can do them much good */ } /* * Interface to regexec. * * Returns #f if there is no match, #t if there is a match and submatches_p * is false, and a list of regex-match records otherwise. * * Most of this is making the buffer for the match structs and then translating * them into Scheme match records. */ static s48_value posix_regexp_match(s48_value sch_regex, s48_value string, s48_value submatches_p, s48_value bol_p, s48_value eol_p, s48_value sch_start) { int status; s48_value result; int start, len; /* re_nsub doesn't include the full pattern */ size_t nmatch = 1 + S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t)->re_nsub; regmatch_t *pmatch, pmatch_buffer[32]; int flags = S48_EXTRACT_BOOLEAN(bol_p) ? 0 : REG_NOTBOL | S48_EXTRACT_BOOLEAN(eol_p) ? 0 : REG_NOTEOL; start = s48_extract_fixnum(sch_start); len = S48_STRING_LENGTH(string); if ((start < 0) || (start > len)) s48_raise_range_error(sch_start, s48_enter_fixnum(0), s48_enter_fixnum(len)); if (nmatch <= 32) pmatch = pmatch_buffer; else { pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t)); if (pmatch == NULL) s48_raise_out_of_memory_error(); } status = regexec(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t), S48_UNSAFE_EXTRACT_STRING(string) + start, nmatch, pmatch, flags); if (status == REG_NOMATCH) result = S48_FALSE; else if (! S48_EXTRACT_BOOLEAN(submatches_p)) result = S48_TRUE; else { s48_value match = S48_FALSE; s48_value matches = S48_NULL; int i; S48_DECLARE_GC_PROTECT(2); S48_GC_PROTECT_2(match, matches); for(i = nmatch - 1; i > -1; i--) { if (pmatch[i].rm_so == -1) match = S48_FALSE; else { match = s48_make_record(posix_regexp_match_type_binding); S48_UNSAFE_RECORD_SET(match, 0, s48_enter_fixnum(pmatch[i].rm_so + start)); S48_UNSAFE_RECORD_SET(match, 1, s48_enter_fixnum(pmatch[i].rm_eo + start)); S48_UNSAFE_RECORD_SET(match, 2, S48_FALSE); } /* submatches */ matches = s48_cons(match, matches); } S48_GC_UNPROTECT(); result = matches; } if (nmatch > 32) free(pmatch); return result; } /* * Interface to regcomp. * * This takes the same arguments as `compile_regexp' but returns the error * message, if any, that `regcomp()' returns. For some reason `regerror()' * requires both the status code and the compiled pattern buffer returned * by `regcomp()'. `compile_regexp' only returned the status so we have to * redo the compilation. * */ static s48_value posix_regexp_error_message(s48_value pattern, s48_value extended_p, s48_value ignore_case_p, s48_value submatches_p, s48_value newline_p) { regex_t compiled_regex; int status; int flags = S48_EXTRACT_BOOLEAN(extended_p) ? REG_EXTENDED : 0 | S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE : 0 | S48_EXTRACT_BOOLEAN(submatches_p) ? 0 : REG_NOSUB | S48_EXTRACT_BOOLEAN(newline_p) ? REG_NEWLINE : 0; S48_CHECK_STRING(pattern); status = regcomp(&compiled_regex, S48_UNSAFE_EXTRACT_STRING(pattern), flags); if (status == 0) return S48_FALSE; else { size_t buffer_size; s48_value buffer; buffer_size = regerror(status, &compiled_regex, NULL, 0); /* For string lengths C counts the nul, Scheme doesn't. */ buffer = s48_make_string(buffer_size - 1, ' '); regerror(status, &compiled_regex, S48_UNSAFE_EXTRACT_STRING(buffer), buffer_size); return buffer; } } /* * Stub for regfree(). */ static s48_value posix_free_regexp(s48_value sch_regex) { regfree(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t)); return S48_UNSPECIFIC; }