Merge branch 'regexp'
This commit is contained in:
commit
d0072a1406
|
@ -21,6 +21,7 @@ set(CMAKE_C_FLAGS_DEBUG "-g -DDEBUG=1")
|
||||||
include_directories(include extlib)
|
include_directories(include extlib)
|
||||||
|
|
||||||
# build picrin
|
# build picrin
|
||||||
|
include(contrib/CMakeLists.txt)
|
||||||
include(src/CMakeLists.txt)
|
include(src/CMakeLists.txt)
|
||||||
include(tools/CMakeLists.txt)
|
include(tools/CMakeLists.txt)
|
||||||
|
|
||||||
|
|
14
README.md
14
README.md
|
@ -47,6 +47,20 @@ Picrin is a lightweight scheme implementation intended to comply with full R7RS
|
||||||
|
|
||||||
Explicit renaming macro family.
|
Explicit renaming macro family.
|
||||||
|
|
||||||
|
- `(picrin regexp)`
|
||||||
|
|
||||||
|
- `(regexp? obj)`
|
||||||
|
- `(regexp ptrn [flags])`
|
||||||
|
|
||||||
|
Compiles pattern string into a regexp object. A string `flags` may contain any of #\g, #\i, #\m.
|
||||||
|
|
||||||
|
- `(regexp-match re input)`
|
||||||
|
|
||||||
|
Returns two values: a list of match strings, and a list of match indeces.
|
||||||
|
|
||||||
|
- `(regexp-replace re input txt)`
|
||||||
|
- `(regexp-split re input)`
|
||||||
|
|
||||||
- `(picrin user)`
|
- `(picrin user)`
|
||||||
|
|
||||||
When you start the REPL, you are dropped in here.
|
When you start the REPL, you are dropped in here.
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
# -*- cmake -*-
|
||||||
|
#
|
||||||
|
# FindRegex.cmake: Try to find Regex
|
||||||
|
#
|
||||||
|
# Copyright (C) 2005-2013 EDF-EADS-Phimeca
|
||||||
|
#
|
||||||
|
# This library is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This library is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# along with this library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# @author dutka
|
||||||
|
# @date 2010-02-04 16:44:49 +0100 (Thu, 04 Feb 2010)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# - Try to find Regex
|
||||||
|
# Once done this will define
|
||||||
|
#
|
||||||
|
# REGEX_FOUND - System has Regex
|
||||||
|
# REGEX_INCLUDE_DIR - The Regex include directory
|
||||||
|
# REGEX_LIBRARIES - The libraries needed to use Regex
|
||||||
|
# REGEX_DEFINITIONS - Compiler switches required for using Regex
|
||||||
|
|
||||||
|
IF (REGEX_INCLUDE_DIR AND REGEX_LIBRARIES)
|
||||||
|
# in cache already
|
||||||
|
SET(Regex_FIND_QUIETLY TRUE)
|
||||||
|
ENDIF (REGEX_INCLUDE_DIR AND REGEX_LIBRARIES)
|
||||||
|
|
||||||
|
#IF (NOT WIN32)
|
||||||
|
# # use pkg-config to get the directories and then use these values
|
||||||
|
# # in the FIND_PATH() and FIND_LIBRARY() calls
|
||||||
|
# FIND_PACKAGE(PkgConfig)
|
||||||
|
# PKG_CHECK_MODULES(PC_REGEX regex)
|
||||||
|
# SET(REGEX_DEFINITIONS ${PC_REGEX_CFLAGS_OTHER})
|
||||||
|
#ENDIF (NOT WIN32)
|
||||||
|
|
||||||
|
FIND_PATH(REGEX_INCLUDE_DIR regex.h
|
||||||
|
HINTS
|
||||||
|
${REGEX_INCLUDEDIR}
|
||||||
|
${PC_LIBXML_INCLUDE_DIRS}
|
||||||
|
PATH_SUFFIXES regex
|
||||||
|
)
|
||||||
|
|
||||||
|
FIND_LIBRARY(REGEX_LIBRARIES NAMES c regex
|
||||||
|
HINTS
|
||||||
|
${PC_REGEX_LIBDIR}
|
||||||
|
${PC_REGEX_LIBRARY_DIRS}
|
||||||
|
)
|
||||||
|
|
||||||
|
INCLUDE(FindPackageHandleStandardArgs)
|
||||||
|
|
||||||
|
# handle the QUIETLY and REQUIRED arguments and set REGEX_FOUND to TRUE if
|
||||||
|
# all listed variables are TRUE
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Regex DEFAULT_MSG REGEX_LIBRARIES REGEX_INCLUDE_DIR)
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(REGEX_INCLUDE_DIR REGEX_LIBRARIES)
|
|
@ -0,0 +1,3 @@
|
||||||
|
include(contrib/regexp/CMakeLists.txt)
|
||||||
|
|
||||||
|
add_definitions("-DPIC_CONTRIB_INITS=${PICRIN_CONTRIB_INITS}")
|
|
@ -0,0 +1,13 @@
|
||||||
|
# regex
|
||||||
|
find_package(REGEX)
|
||||||
|
|
||||||
|
if (REGEX_FOUND)
|
||||||
|
add_definitions(${REGEX_DEFINITIONS})
|
||||||
|
include_directories(${REGEX_INCLUDE_DIR})
|
||||||
|
|
||||||
|
file(GLOB PICRIN_REGEX_SOURCES ${PROJECT_SOURCE_DIR}/contrib/regexp/src/*.c)
|
||||||
|
|
||||||
|
list(APPEND PICRIN_CONTRIB_INITS "void pic_init_regexp(pic_state *)\; pic_init_regexp(pic)\;")
|
||||||
|
list(APPEND PICRIN_CONTRIB_LIBRARIES ${REGEX_LIBRARIES})
|
||||||
|
list(APPEND PICRIN_CONTRIB_SOURCES ${PICRIN_REGEX_SOURCES})
|
||||||
|
endif()
|
|
@ -0,0 +1,193 @@
|
||||||
|
#include "picrin.h"
|
||||||
|
#include "picrin/data.h"
|
||||||
|
#include "picrin/pair.h"
|
||||||
|
#include "picrin/string.h"
|
||||||
|
#include "picrin/cont.h"
|
||||||
|
|
||||||
|
#include <regex.h>
|
||||||
|
|
||||||
|
struct pic_regexp_t {
|
||||||
|
regex_t reg;
|
||||||
|
const char *flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
regexp_dtor(pic_state *pic, void *data)
|
||||||
|
{
|
||||||
|
struct pic_regexp_t *preg;
|
||||||
|
|
||||||
|
preg = data;
|
||||||
|
regfree(&preg->reg);
|
||||||
|
pic_free(pic, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const pic_data_type regexp_type = { "regexp", regexp_dtor };
|
||||||
|
|
||||||
|
#define pic_regexp_p(o) (pic_data_type_p((o), ®exp_type))
|
||||||
|
#define pic_regexp_data_ptr(o) ((struct pic_regexp_t *)pic_data_ptr(o)->data)
|
||||||
|
|
||||||
|
static pic_value
|
||||||
|
pic_regexp_regexp(pic_state *pic)
|
||||||
|
{
|
||||||
|
const char *ptrn, *flags = "";
|
||||||
|
int cflags, err;
|
||||||
|
struct pic_regexp_t *reg;
|
||||||
|
|
||||||
|
pic_get_args(pic, "z|z", &ptrn, &flags);
|
||||||
|
|
||||||
|
cflags = REG_EXTENDED;
|
||||||
|
|
||||||
|
while (*flags) {
|
||||||
|
switch (*flags++) {
|
||||||
|
case 'g':
|
||||||
|
case 'G':
|
||||||
|
/* pass */
|
||||||
|
break;
|
||||||
|
case 'i':
|
||||||
|
case 'I':
|
||||||
|
cflags |= REG_ICASE;
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
case 'M':
|
||||||
|
cflags |= REG_NEWLINE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reg = pic_alloc(pic, sizeof(struct pic_regexp_t));
|
||||||
|
reg->flags = flags;
|
||||||
|
|
||||||
|
if ((err = regcomp(®->reg, ptrn, cflags)) != 0) {
|
||||||
|
char errbuf[regerror(err, ®->reg, NULL, 0)];
|
||||||
|
|
||||||
|
regerror(err, ®->reg, errbuf, sizeof errbuf);
|
||||||
|
regexp_dtor(pic, ®->reg);
|
||||||
|
|
||||||
|
pic_errorf(pic, "regexp compilation error: %s", errbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pic_obj_value(pic_data_alloc(pic, ®exp_type, reg));
|
||||||
|
}
|
||||||
|
|
||||||
|
static pic_value
|
||||||
|
pic_regexp_regexp_p(pic_state *pic)
|
||||||
|
{
|
||||||
|
pic_value obj;
|
||||||
|
|
||||||
|
pic_get_args(pic, "o", &obj);
|
||||||
|
|
||||||
|
return pic_bool_value(pic_regexp_p(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
static pic_value
|
||||||
|
pic_regexp_regexp_match(pic_state *pic)
|
||||||
|
{
|
||||||
|
pic_value reg;
|
||||||
|
const char *input;
|
||||||
|
regmatch_t match[100];
|
||||||
|
pic_value matches, positions;
|
||||||
|
pic_str *str;
|
||||||
|
int i, offset;
|
||||||
|
|
||||||
|
pic_get_args(pic, "oz", ®, &input);
|
||||||
|
|
||||||
|
pic_assert_type(pic, reg, regexp);
|
||||||
|
|
||||||
|
matches = pic_nil_value();
|
||||||
|
positions = pic_nil_value();
|
||||||
|
|
||||||
|
if (strchr(pic_regexp_data_ptr(reg)->flags, 'g') != NULL) {
|
||||||
|
/* global search */
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
while (regexec(&pic_regexp_data_ptr(reg)->reg, input, 1, match, 0) != REG_NOMATCH) {
|
||||||
|
pic_push(pic, pic_obj_value(pic_str_new(pic, input, match[0].rm_eo - match[0].rm_so)), matches);
|
||||||
|
pic_push(pic, pic_int_value(offset), positions);
|
||||||
|
|
||||||
|
offset += match[0].rm_eo;
|
||||||
|
input += match[0].rm_eo;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* local search */
|
||||||
|
|
||||||
|
if (regexec(&pic_regexp_data_ptr(reg)->reg, input, 100, match, 0) == 0) {
|
||||||
|
for (i = 0; i < 100; ++i) {
|
||||||
|
if (match[i].rm_so == -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
str = pic_str_new(pic, input + match[i].rm_so, match[i].rm_eo - match[i].rm_so);
|
||||||
|
pic_push(pic, pic_obj_value(str), matches);
|
||||||
|
pic_push(pic, pic_int_value(match[i].rm_so), positions);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pic_nil_p(matches)) {
|
||||||
|
matches = pic_false_value();
|
||||||
|
positions = pic_false_value();
|
||||||
|
} else {
|
||||||
|
matches = pic_reverse(pic, matches);
|
||||||
|
positions = pic_reverse(pic, positions);
|
||||||
|
}
|
||||||
|
return pic_values2(pic, matches, positions);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pic_value
|
||||||
|
pic_regexp_regexp_split(pic_state *pic)
|
||||||
|
{
|
||||||
|
pic_value reg;
|
||||||
|
const char *input;
|
||||||
|
regmatch_t match;
|
||||||
|
pic_value output = pic_nil_value();
|
||||||
|
|
||||||
|
pic_get_args(pic, "oz", ®, &input);
|
||||||
|
|
||||||
|
pic_assert_type(pic, reg, regexp);
|
||||||
|
|
||||||
|
while (regexec(&pic_regexp_data_ptr(reg)->reg, input, 1, &match, 0) != REG_NOMATCH) {
|
||||||
|
pic_push(pic, pic_obj_value(pic_str_new(pic, input, match.rm_so)), output);
|
||||||
|
|
||||||
|
input += match.rm_eo;
|
||||||
|
}
|
||||||
|
|
||||||
|
pic_push(pic, pic_obj_value(pic_str_new_cstr(pic, input)), output);
|
||||||
|
|
||||||
|
return pic_reverse(pic, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pic_value
|
||||||
|
pic_regexp_regexp_replace(pic_state *pic)
|
||||||
|
{
|
||||||
|
pic_value reg;
|
||||||
|
const char *input;
|
||||||
|
regmatch_t match;
|
||||||
|
pic_str *txt, *output = pic_str_new(pic, NULL, 0);
|
||||||
|
|
||||||
|
pic_get_args(pic, "ozs", ®, &input, &txt);
|
||||||
|
|
||||||
|
pic_assert_type(pic, reg, regexp);
|
||||||
|
|
||||||
|
while (regexec(&pic_regexp_data_ptr(reg)->reg, input, 1, &match, 0) != REG_NOMATCH) {
|
||||||
|
output = pic_strcat(pic, output, pic_str_new(pic, input, match.rm_so));
|
||||||
|
output = pic_strcat(pic, output, txt);
|
||||||
|
|
||||||
|
input += match.rm_eo;
|
||||||
|
}
|
||||||
|
|
||||||
|
output = pic_strcat(pic, output, pic_str_new(pic, input, strlen(input)));
|
||||||
|
|
||||||
|
return pic_obj_value(output);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pic_init_regexp(pic_state *pic)
|
||||||
|
{
|
||||||
|
pic_deflibrary ("(picrin regexp)") {
|
||||||
|
pic_defun(pic, "regexp", pic_regexp_regexp);
|
||||||
|
pic_defun(pic, "regexp?", pic_regexp_regexp_p);
|
||||||
|
pic_defun(pic, "regexp-match", pic_regexp_regexp_match);
|
||||||
|
/* pic_defun(pic, "regexp-search", pic_regexp_regexp_search); */
|
||||||
|
pic_defun(pic, "regexp-split", pic_regexp_regexp_split);
|
||||||
|
pic_defun(pic, "regexp-replace", pic_regexp_regexp_replace);
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,9 @@
|
||||||
* See Copyright Notice in picrin.h
|
* See Copyright Notice in picrin.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** contribution libraries */
|
||||||
|
/* #define PIC_CONTRIB_INITS */
|
||||||
|
|
||||||
/** switch normal VM and direct threaded VM */
|
/** switch normal VM and direct threaded VM */
|
||||||
/* #define PIC_DIRECT_THREADED_VM 1 */
|
/* #define PIC_DIRECT_THREADED_VM 1 */
|
||||||
|
|
||||||
|
@ -42,6 +45,10 @@
|
||||||
/* #define GC_DEBUG 1 */
|
/* #define GC_DEBUG 1 */
|
||||||
/* #define GC_DEBUG_DETAIL 1 */
|
/* #define GC_DEBUG_DETAIL 1 */
|
||||||
|
|
||||||
|
#ifndef PIC_CONTRIB_INITS
|
||||||
|
# define PIC_CONTRIB_INITS
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef PIC_DIRECT_THREADED_VM
|
#ifndef PIC_DIRECT_THREADED_VM
|
||||||
# if defined(__GNUC__) || defined(__CLANG__)
|
# if defined(__GNUC__) || defined(__CLANG__)
|
||||||
# define PIC_DIRECT_THREADED_VM 1
|
# define PIC_DIRECT_THREADED_VM 1
|
||||||
|
|
|
@ -7,6 +7,6 @@ set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${PROJECT_SOURCE
|
||||||
set(XFILE_SOURCES extlib/xfile/xfile.c)
|
set(XFILE_SOURCES extlib/xfile/xfile.c)
|
||||||
|
|
||||||
# build!
|
# build!
|
||||||
file(GLOB C_SOURCES ${PROJECT_SOURCE_DIR}/src/*.c)
|
file(GLOB PICRIN_SOURCES ${PROJECT_SOURCE_DIR}/src/*.c)
|
||||||
add_library(picrin SHARED ${C_SOURCES} ${FLEX_scan_OUTPUTS} ${XFILE_SOURCES})
|
add_library(picrin SHARED ${PICRIN_SOURCES} ${FLEX_scan_OUTPUTS} ${XFILE_SOURCES} ${PICRIN_CONTRIB_SOURCES})
|
||||||
target_link_libraries(picrin m)
|
target_link_libraries(picrin m ${PICRIN_CONTRIB_LIBRARIES})
|
||||||
|
|
|
@ -51,6 +51,12 @@ pic_load_stdlib(pic_state *pic)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pic_init_contrib(pic_state *pic)
|
||||||
|
{
|
||||||
|
PIC_CONTRIB_INITS
|
||||||
|
}
|
||||||
|
|
||||||
#define PUSH_SYM(pic, lst, name) \
|
#define PUSH_SYM(pic, lst, name) \
|
||||||
lst = pic_cons(pic, pic_symbol_value(pic_intern_cstr(pic, name)), lst)
|
lst = pic_cons(pic, pic_symbol_value(pic_intern_cstr(pic, name)), lst)
|
||||||
|
|
||||||
|
@ -109,6 +115,8 @@ pic_init_core(pic_state *pic)
|
||||||
|
|
||||||
pic_load_stdlib(pic); DONE;
|
pic_load_stdlib(pic); DONE;
|
||||||
|
|
||||||
|
pic_init_contrib(pic); DONE;
|
||||||
|
|
||||||
pic_defun(pic, "features", pic_features);
|
pic_defun(pic, "features", pic_features);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue