From 3e8d64fdb7678b6b17c0c6ec2ae1188e55038b1f Mon Sep 17 00:00:00 2001 From: Yuichi Nishiwaki Date: Mon, 31 Mar 2014 20:43:36 +0900 Subject: [PATCH] initial regexp support --- CMakeLists.txt | 1 + cmake/FindREGEX.cmake | 64 +++++++++++ contrib/CMakeLists.txt | 3 + contrib/regexp/CMakeLists.txt | 13 +++ contrib/regexp/src/regexp.c | 193 ++++++++++++++++++++++++++++++++++ include/config.h | 7 ++ src/CMakeLists.txt | 6 +- src/init.c | 8 ++ 8 files changed, 292 insertions(+), 3 deletions(-) create mode 100644 cmake/FindREGEX.cmake create mode 100644 contrib/CMakeLists.txt create mode 100644 contrib/regexp/CMakeLists.txt create mode 100644 contrib/regexp/src/regexp.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 802af85e..da567df5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ set(CMAKE_C_FLAGS_DEBUG "-g -DDEBUG=1") include_directories(include extlib) # build picrin +include(contrib/CMakeLists.txt) include(src/CMakeLists.txt) include(tools/CMakeLists.txt) diff --git a/cmake/FindREGEX.cmake b/cmake/FindREGEX.cmake new file mode 100644 index 00000000..bcae6f94 --- /dev/null +++ b/cmake/FindREGEX.cmake @@ -0,0 +1,64 @@ +# -*- cmake -*- +# +# FindRegex.cmake: Try to find Regex +# +# Copyright (C) 2005-2013 EDF-EADS-Phimeca +# +# This library is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# along with this library. If not, see . +# +# @author dutka +# @date 2010-02-04 16:44:49 +0100 (Thu, 04 Feb 2010) +# +# +# - Try to find Regex +# Once done this will define +# +# REGEX_FOUND - System has Regex +# REGEX_INCLUDE_DIR - The Regex include directory +# REGEX_LIBRARIES - The libraries needed to use Regex +# REGEX_DEFINITIONS - Compiler switches required for using Regex + +IF (REGEX_INCLUDE_DIR AND REGEX_LIBRARIES) + # in cache already + SET(Regex_FIND_QUIETLY TRUE) +ENDIF (REGEX_INCLUDE_DIR AND REGEX_LIBRARIES) + +#IF (NOT WIN32) +# # use pkg-config to get the directories and then use these values +# # in the FIND_PATH() and FIND_LIBRARY() calls +# FIND_PACKAGE(PkgConfig) +# PKG_CHECK_MODULES(PC_REGEX regex) +# SET(REGEX_DEFINITIONS ${PC_REGEX_CFLAGS_OTHER}) +#ENDIF (NOT WIN32) + +FIND_PATH(REGEX_INCLUDE_DIR regex.h + HINTS + ${REGEX_INCLUDEDIR} + ${PC_LIBXML_INCLUDE_DIRS} + PATH_SUFFIXES regex + ) + +FIND_LIBRARY(REGEX_LIBRARIES NAMES c regex + HINTS + ${PC_REGEX_LIBDIR} + ${PC_REGEX_LIBRARY_DIRS} + ) + +INCLUDE(FindPackageHandleStandardArgs) + +# handle the QUIETLY and REQUIRED arguments and set REGEX_FOUND to TRUE if +# all listed variables are TRUE +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Regex DEFAULT_MSG REGEX_LIBRARIES REGEX_INCLUDE_DIR) + +MARK_AS_ADVANCED(REGEX_INCLUDE_DIR REGEX_LIBRARIES) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt new file mode 100644 index 00000000..f206ac80 --- /dev/null +++ b/contrib/CMakeLists.txt @@ -0,0 +1,3 @@ +include(contrib/regexp/CMakeLists.txt) + +add_definitions("-DPIC_CONTRIB_INITS=${PICRIN_CONTRIB_INITS}") diff --git a/contrib/regexp/CMakeLists.txt b/contrib/regexp/CMakeLists.txt new file mode 100644 index 00000000..0e28d430 --- /dev/null +++ b/contrib/regexp/CMakeLists.txt @@ -0,0 +1,13 @@ +# regex +find_package(REGEX) + +if (REGEX_FOUND) + add_definitions(${REGEX_DEFINITIONS}) + include_directories(${REGEX_INCLUDE_DIR}) + + file(GLOB PICRIN_REGEX_SOURCES ${PROJECT_SOURCE_DIR}/contrib/regexp/src/*.c) + + list(APPEND PICRIN_CONTRIB_INITS "void pic_init_regexp(pic_state *)\; pic_init_regexp(pic)\;") + list(APPEND PICRIN_CONTRIB_LIBRARIES ${REGEX_LIBRARIES}) + list(APPEND PICRIN_CONTRIB_SOURCES ${PICRIN_REGEX_SOURCES}) +endif() diff --git a/contrib/regexp/src/regexp.c b/contrib/regexp/src/regexp.c new file mode 100644 index 00000000..5ee5d477 --- /dev/null +++ b/contrib/regexp/src/regexp.c @@ -0,0 +1,193 @@ +#include "picrin.h" +#include "picrin/data.h" +#include "picrin/pair.h" +#include "picrin/string.h" +#include "picrin/cont.h" + +#include + +struct pic_regexp_t { + regex_t reg; + const char *flags; +}; + +static void +regexp_dtor(pic_state *pic, void *data) +{ + struct pic_regexp_t *preg; + + preg = data; + regfree(&preg->reg); + pic_free(pic, data); +} + +static const pic_data_type regexp_type = { "regexp", regexp_dtor }; + +#define pic_regexp_p(o) (pic_data_type_p((o), ®exp_type)) +#define pic_regexp_data_ptr(o) ((struct pic_regexp_t *)pic_data_ptr(o)->data) + +static pic_value +pic_regexp_regexp(pic_state *pic) +{ + const char *ptrn, *flags = ""; + int cflags, err; + struct pic_regexp_t *reg; + + pic_get_args(pic, "z|z", &ptrn, &flags); + + cflags = REG_EXTENDED; + + while (*flags) { + switch (*flags++) { + case 'g': + case 'G': + /* pass */ + break; + case 'i': + case 'I': + cflags |= REG_ICASE; + break; + case 'm': + case 'M': + cflags |= REG_NEWLINE; + break; + } + } + + reg = pic_alloc(pic, sizeof(struct pic_regexp_t)); + reg->flags = flags; + + if ((err = regcomp(®->reg, ptrn, cflags)) != 0) { + char errbuf[regerror(err, ®->reg, NULL, 0)]; + + regerror(err, ®->reg, errbuf, sizeof errbuf); + regexp_dtor(pic, ®->reg); + + pic_errorf(pic, "regexp compilation error: %s", errbuf); + } + + return pic_obj_value(pic_data_alloc(pic, ®exp_type, reg)); +} + +static pic_value +pic_regexp_regexp_p(pic_state *pic) +{ + pic_value obj; + + pic_get_args(pic, "o", &obj); + + return pic_bool_value(pic_regexp_p(obj)); +} + +static pic_value +pic_regexp_regexp_match(pic_state *pic) +{ + pic_value reg; + const char *input; + regmatch_t match[100]; + pic_value matches, positions; + pic_str *str; + int i, offset; + + pic_get_args(pic, "oz", ®, &input); + + pic_assert_type(pic, reg, regexp); + + matches = pic_nil_value(); + positions = pic_nil_value(); + + if (strchr(pic_regexp_data_ptr(reg)->flags, 'g') != NULL) { + /* global search */ + + offset = 0; + while (regexec(&pic_regexp_data_ptr(reg)->reg, input, 1, match, 0) != REG_NOMATCH) { + pic_push(pic, pic_obj_value(pic_str_new(pic, input, match[0].rm_eo - match[0].rm_so)), matches); + pic_push(pic, pic_int_value(offset), positions); + + offset += match[0].rm_eo; + input += match[0].rm_eo; + } + } else { + /* local search */ + + if (regexec(&pic_regexp_data_ptr(reg)->reg, input, 100, match, 0) == 0) { + for (i = 0; i < 100; ++i) { + if (match[i].rm_so == -1) { + break; + } + str = pic_str_new(pic, input + match[i].rm_so, match[i].rm_eo - match[i].rm_so); + pic_push(pic, pic_obj_value(str), matches); + pic_push(pic, pic_int_value(match[i].rm_so), positions); + } + } + } + + if (pic_nil_p(matches)) { + matches = pic_false_value(); + positions = pic_false_value(); + } else { + matches = pic_reverse(pic, matches); + positions = pic_reverse(pic, positions); + } + return pic_values2(pic, matches, positions); +} + +static pic_value +pic_regexp_regexp_split(pic_state *pic) +{ + pic_value reg; + const char *input; + regmatch_t match; + pic_value output = pic_nil_value(); + + pic_get_args(pic, "oz", ®, &input); + + pic_assert_type(pic, reg, regexp); + + while (regexec(&pic_regexp_data_ptr(reg)->reg, input, 1, &match, 0) != REG_NOMATCH) { + pic_push(pic, pic_obj_value(pic_str_new(pic, input, match.rm_so)), output); + + input += match.rm_eo; + } + + pic_push(pic, pic_obj_value(pic_str_new_cstr(pic, input)), output); + + return pic_reverse(pic, output); +} + +static pic_value +pic_regexp_regexp_replace(pic_state *pic) +{ + pic_value reg; + const char *input; + regmatch_t match; + pic_str *txt, *output = pic_str_new(pic, NULL, 0); + + pic_get_args(pic, "ozs", ®, &input, &txt); + + pic_assert_type(pic, reg, regexp); + + while (regexec(&pic_regexp_data_ptr(reg)->reg, input, 1, &match, 0) != REG_NOMATCH) { + output = pic_strcat(pic, output, pic_str_new(pic, input, match.rm_so)); + output = pic_strcat(pic, output, txt); + + input += match.rm_eo; + } + + output = pic_strcat(pic, output, pic_str_new(pic, input, strlen(input))); + + return pic_obj_value(output); +} + +void +pic_init_regexp(pic_state *pic) +{ + pic_deflibrary ("(picrin regexp)") { + pic_defun(pic, "regexp", pic_regexp_regexp); + pic_defun(pic, "regexp?", pic_regexp_regexp_p); + pic_defun(pic, "regexp-match", pic_regexp_regexp_match); + /* pic_defun(pic, "regexp-search", pic_regexp_regexp_search); */ + pic_defun(pic, "regexp-split", pic_regexp_regexp_split); + pic_defun(pic, "regexp-replace", pic_regexp_regexp_replace); + } +} diff --git a/include/config.h b/include/config.h index b289d9f6..2d5fd2a2 100644 --- a/include/config.h +++ b/include/config.h @@ -2,6 +2,9 @@ * See Copyright Notice in picrin.h */ +/** contribution libraries */ +/* #define PIC_CONTRIB_INITS */ + /** switch normal VM and direct threaded VM */ /* #define PIC_DIRECT_THREADED_VM 1 */ @@ -42,6 +45,10 @@ /* #define GC_DEBUG 1 */ /* #define GC_DEBUG_DETAIL 1 */ +#ifndef PIC_CONTRIB_INITS +# define PIC_CONTRIB_INITS +#endif + #ifndef PIC_DIRECT_THREADED_VM # if defined(__GNUC__) || defined(__CLANG__) # define PIC_DIRECT_THREADED_VM 1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6a9705c0..93af4e22 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,6 @@ set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${PROJECT_SOURCE set(XFILE_SOURCES extlib/xfile/xfile.c) # build! -file(GLOB C_SOURCES ${PROJECT_SOURCE_DIR}/src/*.c) -add_library(picrin SHARED ${C_SOURCES} ${FLEX_scan_OUTPUTS} ${XFILE_SOURCES}) -target_link_libraries(picrin m) +file(GLOB PICRIN_SOURCES ${PROJECT_SOURCE_DIR}/src/*.c) +add_library(picrin SHARED ${PICRIN_SOURCES} ${FLEX_scan_OUTPUTS} ${XFILE_SOURCES} ${PICRIN_CONTRIB_SOURCES}) +target_link_libraries(picrin m ${PICRIN_CONTRIB_LIBRARIES}) diff --git a/src/init.c b/src/init.c index 537dbdf6..57a48c55 100644 --- a/src/init.c +++ b/src/init.c @@ -51,6 +51,12 @@ pic_load_stdlib(pic_state *pic) } +void +pic_init_contrib(pic_state *pic) +{ + PIC_CONTRIB_INITS +} + #define PUSH_SYM(pic, lst, name) \ lst = pic_cons(pic, pic_symbol_value(pic_intern_cstr(pic, name)), lst) @@ -109,6 +115,8 @@ pic_init_core(pic_state *pic) pic_load_stdlib(pic); DONE; + pic_init_contrib(pic); DONE; + pic_defun(pic, "features", pic_features); }