just a snapshot
This commit is contained in:
parent
c9f54e79da
commit
4e935c24a5
139
Makefile
139
Makefile
|
@ -14,7 +14,7 @@ INSTALL = /usr/local/bin/install -c
|
|||
INSTALL_PROGRAM = ${INSTALL}
|
||||
INSTALL_DATA = ${INSTALL} -m 644
|
||||
|
||||
LDFLAGS =
|
||||
LDFLAGS = -g
|
||||
LIBOBJS =
|
||||
|
||||
RM = rm -f
|
||||
|
@ -41,10 +41,10 @@ mandir = $(prefix)/man/man$(manext)
|
|||
# LDFLAGS = -N
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CPPFLAGS) $(DEFS) -I$(srcdir)/c $(CFLAGS) -o $@ $<
|
||||
$(CC) -c $(CPPFLAGS) $(DEFS) -I$(srcdir)/c -I$(srcdir)/cig $(CFLAGS) -o $@ $<
|
||||
|
||||
# You might want to change RUNNABLE to "s48"
|
||||
RUNNABLE = s4853
|
||||
RUNNABLE = scsh06
|
||||
MANPAGE = $(RUNNABLE).$(manext)
|
||||
LIB = $(libdir)/$(RUNNABLE)
|
||||
|
||||
|
@ -107,9 +107,12 @@ CIGOBJS = cig/libcig.o cig/libcig1.o
|
|||
|
||||
#JMG: the scsh-lib
|
||||
LIBSCSH = scsh/lib$(VM).a
|
||||
SCSHVMHACKS = proc2.o
|
||||
SCSHVMHACKS = scsh/proc2.o
|
||||
|
||||
#JMG: and it's object files
|
||||
#JMG left out: scsh/re.o scsh/re1.o scsh/regexp/regexp.o scsh/regexp/regsub.o
|
||||
# scsh/fdports1.o
|
||||
# scsh/rdelim.o
|
||||
SCSHOBJS = \
|
||||
scsh/dirstuff1.o \
|
||||
scsh/flock.o scsh/flock1.o \
|
||||
|
@ -119,25 +122,21 @@ SCSHOBJS = \
|
|||
scsh/machine/libansi.o \
|
||||
scsh/network.o scsh/network1.o \
|
||||
scsh/putenv.o \
|
||||
scsh/re.o scsh/re1.o \
|
||||
scsh/regexp/regexp.o \
|
||||
scsh/regexp/regsub.o \
|
||||
scsh/rx/re-low.o scsh/rx/re1.o \
|
||||
scsh/select.o scsh/select1.o \
|
||||
scsh/sleep1.o \
|
||||
scsh/syscalls.o scsh/syscalls1.o \
|
||||
scsh/time.o scsh/time1.o \
|
||||
scsh/tty.o scsh/tty1.o \
|
||||
scsh/userinfo1.o \
|
||||
scsh/rdelim.o \
|
||||
scsh/fdports1.o \
|
||||
scsh/sighandlers1.o scsh/sighandlers.o
|
||||
scsh/sighandlers1.o scsh/sighandlers.o \
|
||||
scsh/regexp/libregex.a
|
||||
|
||||
UNIX_OBJS = c/unix/misc.o c/unix/io.o c/unix/fd-io.o c/unix/event.o
|
||||
|
||||
#JMG: I omit process_args.o and SCSHVMHACKS at the moment
|
||||
OBJS = c/scheme48vm.o c/scheme48heap.o c/extension.o c/external.o \
|
||||
$(CIGOBJS)
|
||||
#$(SCSHOBJS)
|
||||
#JMG: I omit process_args.o and SCSHVMHACKS no longer
|
||||
S48OBJS = c/scheme48vm.o c/scheme48heap.o c/extension.o c/external.o
|
||||
OBJS = scsh/process_args.o $(S48OBJS) $(CIGOBJS) $(SCSHOBJS) $(SCSHVMHACKS)
|
||||
|
||||
FAKEHS = c/fake/dlfcn.h c/fake/sigact.h c/fake/strerror.h \
|
||||
c/fake/sys-select.h
|
||||
|
@ -159,9 +158,16 @@ enough: $(VM) $(IMAGE) go $(LIBCIG) scsh $(LIBSCSH)
|
|||
# External code to include in the VM
|
||||
# After changing any of these you should delete `scheme48vm' and remake it.
|
||||
|
||||
CIGGED = flock network select syscalls tty time sighandlers
|
||||
#re rdelim
|
||||
CIGGEDOBJ = $(patsubst %,scsh/%.o, $(CIGGED))
|
||||
CIGGEDINIT = $(patsubst %,s48_init_%, $(CIGGED))
|
||||
|
||||
EXTERNAL_OBJECTS = $(SOCKET_OBJECTS) $(LOOKUP_OBJECTS)
|
||||
EXTERNAL_FLAGS = $(SOCKET_FLAGS)
|
||||
EXTERNAL_INITIALIZERS = $(SOCKET_INITIALIZERS) $(LOOKUP_INITIALIZERS) ciginit
|
||||
EXTERNAL_INITIALIZERS = $(SOCKET_INITIALIZERS) $(LOOKUP_INITIALIZERS) s48_init_cig \
|
||||
$(CIGGEDINIT)
|
||||
|
||||
|
||||
# Rules for any external code.
|
||||
|
||||
|
@ -197,6 +203,7 @@ JMG: scsh stuff
|
|||
# This says how to process .scm files with cig to make .c stubs.
|
||||
.SUFFIXES: .scm
|
||||
.scm.c:
|
||||
# $(srcdir)/cig/cigscript $*
|
||||
$(srcdir)/$(VM) -o $(srcdir)/$(VM) -i $(CIG) < $< > $*.c
|
||||
|
||||
# These .h files mediate between the code exported from foo1.c
|
||||
|
@ -205,6 +212,7 @@ JMG: scsh stuff
|
|||
scsh/dirstuff1.o: scsh/dirstuff1.h
|
||||
scsh/rdelim.o: scsh/fdports1.h
|
||||
scsh/userinfo1.o: scsh/userinfo1.h
|
||||
|
||||
scsh/fdports1.o scsh/fdports.o: scsh/fdports1.h
|
||||
scsh/flock1.o scsh/flock.o: scsh/flock1.h
|
||||
scsh/network1.o scsh/network.o: scsh/network1.h
|
||||
|
@ -214,11 +222,17 @@ scsh/syscalls1.o scsh/syscalls.o: scsh/syscalls1.h
|
|||
scsh/time1.o scsh/time.o: scsh/time1.h
|
||||
scsh/tty1.o scsh/tty.o: scsh/tty1.h
|
||||
|
||||
scsh/rx/re1.o scsh/rx/re-low.o: scsh/rx/re1.h
|
||||
|
||||
scsh/syscalls.o: scsh/syscalls1.h scsh/dirstuff1.h scsh/fdports1.h \
|
||||
scsh/select1.h scsh/userinfo1.h
|
||||
|
||||
scsh/sighandlers1.o scsh/sighandlers.o: scsh/sighandlers1.h
|
||||
|
||||
# Not really, but making regexp/libregex.a makes the regexp/regex.h file that
|
||||
# re-low.c actually does need.
|
||||
scsh/rx/re-low.o: scsh/regexp/libregex.a
|
||||
|
||||
include $(srcdir)/scsh/machine/Makefile.inc
|
||||
# Berkeley make wants to see this instead: (or use GNU make on BSD. -bri)
|
||||
#.include "$(srcdir)/scsh/machine/Makefile.inc"
|
||||
|
@ -233,16 +247,19 @@ $(VM): c/main.o $(OBJS) $(UNIX_OBJS) $(LIBOBJS) $(EXTERNAL_OBJECTS)
|
|||
$(EXTERNAL_OBJECTS) $(EXTERNAL_LD_FLAGS) && \
|
||||
rm -f /tmp/s48_external_$$$$.c
|
||||
|
||||
|
||||
|
||||
|
||||
#JMG: again cig and scsh-lib
|
||||
$(LIBCIG): c/main.o $(OBJS)
|
||||
# $(CC) -r -o $@ main.o $(OBJS)
|
||||
$(RM) $@
|
||||
$(AR) $@ c/main.o $(OBJS)
|
||||
$(AR) $@ c/main.o $(OBJS)
|
||||
$(RANLIB) $@
|
||||
|
||||
$(LIBSCSH): smain.o $(OBJS)
|
||||
$(LIBSCSH): c/smain.o $(OBJS)
|
||||
$(RM) $@
|
||||
$(AR) $@ smain.o $(OBJS)
|
||||
$(AR) $@ c/smain.o $(OBJS)
|
||||
$(RANLIB) $@
|
||||
|
||||
c/main.o: c/main.c c/scheme48vm.h c/scheme48heap.h
|
||||
|
@ -275,8 +292,10 @@ c/fake/strerror.o: c/fake/strerror.h
|
|||
$(IMAGE): $(VM) scheme/env/init-defpackage.scm scheme/more-interfaces.scm \
|
||||
scheme/link-packages.scm scheme/more-packages.scm \
|
||||
$(usual-files) build/initial.debug build/build-usual-image
|
||||
build/build-usual-image . "`pwd`/scheme" '$(IMAGE)' './$(VM)' \
|
||||
'$(INITIAL)'
|
||||
sh $(srcdir)/build/build-usual-image "$(srcdir)" "$(LIB)" "$(IMAGE)" \
|
||||
"$(VM)" "$(INITIAL)"
|
||||
# build/build-usual-image . "`pwd`/scheme" '$(IMAGE)' './$(VM)' \
|
||||
# '$(INITIAL)'
|
||||
|
||||
### Fake targets: all clean install man dist
|
||||
|
||||
|
@ -361,14 +380,19 @@ clean: clean-cig clean-scsh
|
|||
-rm -f $(VM) *.o c/unix/*.o c/*.o c/fake/*.o \
|
||||
TAGS $(IMAGE) \
|
||||
build/*.tmp $(MANPAGE) build/linker.image \
|
||||
scheme/debug/*.image scheme/debug/*.debug config.cache \
|
||||
scheme/debug/*.image scheme/debug/*.debug \
|
||||
scheme/vm/scheme48vm.c scheme/vm/scheme48heap.c \
|
||||
go $(distname)
|
||||
clean-cig:
|
||||
-rm -f cig/*.o $(CIG) $(CIG).image $(LIBCIG)
|
||||
|
||||
clean-scm2c:
|
||||
rm -f scsh/flock.c scsh/network.c scsh/rdelim.c \
|
||||
scsh/re.c scsh/select.c scsh/syscalls.c scsh/tty.c scsh/time.c
|
||||
|
||||
#JMG: moved config.cache to distclean
|
||||
distclean: clean
|
||||
rm -f Makefile config.log config.status c/sysdep.h
|
||||
rm -f Makefile config.log config.status c/sysdep.h config.cache
|
||||
|
||||
check: $(VM) $(IMAGE) scheme/debug/check.scm
|
||||
( \
|
||||
|
@ -564,9 +588,9 @@ scheme/debug/medium.image: $(LINKER_IMAGE) $(CONFIG_FILES)
|
|||
# The following have not been updated for the new directory organization
|
||||
|
||||
c/smain.o: c/main.c
|
||||
$(CC) -c $(CPPFLAGS) $(DEFS) $(CFLAGS) -DSTATIC_AREAS -o $@ c/main.c
|
||||
$(CC) -c $(CPPFLAGS) $(DEFS) $(CFLAGS) -DSTATIC_AREAS -o $@ $(srcdir)/c/main.c
|
||||
|
||||
mini: mini-heap.o smain.o
|
||||
mini: mini-heap.o c/smain.o
|
||||
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ c/smain.o mini-heap.o $(OBJS) $(LIBS)
|
||||
|
||||
mini-heap.o: mini-heap.c
|
||||
|
@ -634,6 +658,7 @@ i-know-what-i-am-doing:
|
|||
mv ../scheme/vm/scheme48vm.c ../scheme/vm/scheme48heap.c ../c
|
||||
cig: $(CIG) $(CIG).image $(LIBCIG)
|
||||
|
||||
|
||||
$(CIG): $(VM) $(IMAGE) $(srcdir)/cig/cig.scm $(srcdir)/cig/libcig.scm
|
||||
(echo ",batch"; \
|
||||
echo ",translate =scheme48/ $(srcdir)/scheme/"; \
|
||||
|
@ -646,6 +671,7 @@ $(CIG): $(VM) $(IMAGE) $(srcdir)/cig/cig.scm $(srcdir)/cig/libcig.scm
|
|||
| ./$(VM) -i ./$(IMAGE)
|
||||
$(srcdir)/cig/image2script $(LIB)/$(VM) </tmp/cig > $(CIG)
|
||||
-chmod +x $(CIG)
|
||||
# mv /tmp/cig $(srcdir)/cig/standalone.image
|
||||
$(RM) /tmp/cig
|
||||
|
||||
$(CIG)2:
|
||||
|
@ -713,7 +739,6 @@ SCHEME =scsh/awk.scm \
|
|||
scsh/procobj.scm \
|
||||
scsh/pty.scm \
|
||||
scsh/rdelim.scm \
|
||||
scsh/re.scm \
|
||||
scsh/rw.scm \
|
||||
scsh/scsh-condition.scm \
|
||||
scsh/scsh-interfaces.scm \
|
||||
|
@ -744,7 +769,6 @@ scsh/flock.c: scsh/flock.scm
|
|||
scsh/jcontrol2.c: scsh/jcontrol2.scm
|
||||
scsh/network.c: scsh/network.scm
|
||||
scsh/rdelim.c: scsh/rdelim.scm
|
||||
scsh/re.c: scsh/re.scm
|
||||
scsh/select.c: scsh/select.scm
|
||||
scsh/syscalls.c: scsh/syscalls.scm
|
||||
scsh/tty.c: scsh/tty.scm
|
||||
|
@ -756,19 +780,55 @@ scsh/scsh: scsh/scsh-tramp.c
|
|||
-DIMAGE=\"$(LIB)/scsh.image\" \
|
||||
scsh/scsh-tramp.c
|
||||
|
||||
#JMG : should be ,open external-calls
|
||||
loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
|
||||
$(srcdir)/scsh/machine/packages.scm \
|
||||
$(srcdir)/scsh/rx/packages.scm \
|
||||
$(srcdir)/scsh/rx/cond-package.scm \
|
||||
$(srcdir)/scsh/scsh-package.scm \
|
||||
$(srcdir)/scsh/lib/string-pack.scm \
|
||||
$(srcdir)/scsh/lib/list-pack.scm \
|
||||
$(srcdir)/scsh/lib/ccp-pack.scm
|
||||
|
||||
#JMG : not anymore appropriate
|
||||
#echo ",open external-calls"; \
|
||||
# echo "(lookup-all-externals)"; \
|
||||
|
||||
bs: build/build-scsh-image
|
||||
sh $(srcdir)/build/build-scsh-image "$(srcdir)" "$(LIB)" "$(IMAGE)" \
|
||||
"$(VM)" cig/cig.image
|
||||
|
||||
loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
|
||||
$(srcdir)/scsh/machine/packages.scm \
|
||||
$(srcdir)/scsh/rx/packages.scm \
|
||||
$(srcdir)/scsh/rx/cond-package.scm \
|
||||
$(srcdir)/scsh/scsh-package.scm \
|
||||
$(srcdir)/scsh/lib/string-pack.scm \
|
||||
$(srcdir)/scsh/lib/list-pack.scm \
|
||||
$(srcdir)/scsh/lib/ccp-pack.scm
|
||||
|
||||
scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
||||
(echo ",translate =scheme48/ $(srcdir)/scheme/"; \
|
||||
echo ",batch on"; \
|
||||
echo ",load-package floatnums"; \
|
||||
echo ",open externals"; \
|
||||
echo "(lookup-all-externals)"; \
|
||||
echo ",config"; \
|
||||
echo ",load $(srcdir)/scsh/let-opt.scm"; \
|
||||
echo ",load $(srcdir)/scsh/scsh-interfaces.scm"; \
|
||||
echo ",load $(srcdir)/scsh/machine/packages.scm"; \
|
||||
echo ",load $(srcdir)/scsh/scsh-package.scm") \
|
||||
| ./$(VM) -o ./$(VM) -h 5000000 -i $(CIG).image
|
||||
echo ",load $(loads)"; \
|
||||
echo ",load-package scsh"; \
|
||||
echo ",load-package events"; \
|
||||
echo ",load-package scsh-here-string-hax"; \
|
||||
echo ",translate =scheme48/ $(LIB)/"; \
|
||||
echo ",load-package list-lib"; \
|
||||
echo ",load-package string-lib"; \
|
||||
echo ",load-package ccp-lib"; \
|
||||
echo ",in scsh-level-0"; \
|
||||
echo "(init-scsh-signal)";\
|
||||
echo ",user"; \
|
||||
echo ",open floatnums"; \
|
||||
echo ",open scsh"; \
|
||||
echo ",open list-lib string-lib ccp-lib"; \
|
||||
echo ",dump scsh06.image"; \
|
||||
echo "(dump-scsh \"scsh/scsh.image\")" \
|
||||
echo ",batch off") \
|
||||
| ./$(VM) -o ./$(VM) -i $(CIG).image -h 10000000
|
||||
|
||||
#scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
||||
# (echo ",translate =scheme48/ $(srcdir)/"; \
|
||||
|
@ -803,8 +863,8 @@ scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
|||
# < scsh/scsh.image > $@
|
||||
# -chmod +x $@
|
||||
|
||||
scsh/regexp/libregexp.a:
|
||||
cd ./scsh/regexp; $(MAKE)
|
||||
scsh/regexp/libregex.a:
|
||||
cd ./scsh/regexp; $(MAKE) lib
|
||||
|
||||
scsh/scsh.vm: $(LIBSCSH) $(VM) scsh/scsh.image
|
||||
./$(VM) -o ./$(VM) -h 8000000 -i scsh/scsh.image \
|
||||
|
@ -826,6 +886,9 @@ install-scsh: scsh
|
|||
do $(INSTALL_DATA) $$f $(LIB)/scsh/; done
|
||||
|
||||
clean-scsh:
|
||||
$(RM) scsh/*.o scsh/regexp/*.o scsh/machine/*.o scsh/*.image scsh/scsh
|
||||
$(RM) $(LIBSCSH) scsh/scsh.vm
|
||||
$(RM) scsh/*.o scsh/regexp/*.o scsh/rx/*.o scsh/machine/*.o
|
||||
$(RM) scsh/*.image
|
||||
$(RM) $(LIBSCSH) scsh/scsh$(EXEEXT) scsh/scsh.vm
|
||||
-cd scsh/regexp; $(MAKE) clean
|
||||
|
||||
|
||||
|
|
51
Makefile.in
51
Makefile.in
|
@ -122,13 +122,15 @@ SCSHOBJS = \
|
|||
scsh/machine/libansi.o \
|
||||
scsh/network.o scsh/network1.o \
|
||||
scsh/putenv.o \
|
||||
scsh/rx/re-low.o scsh/rx/re1.o \
|
||||
scsh/select.o scsh/select1.o \
|
||||
scsh/sleep1.o \
|
||||
scsh/syscalls.o scsh/syscalls1.o \
|
||||
scsh/time.o scsh/time1.o \
|
||||
scsh/tty.o scsh/tty1.o \
|
||||
scsh/userinfo1.o \
|
||||
scsh/sighandlers1.o scsh/sighandlers.o
|
||||
scsh/sighandlers1.o scsh/sighandlers.o \
|
||||
scsh/regexp/libregex.a
|
||||
|
||||
UNIX_OBJS = c/unix/misc.o c/unix/io.o c/unix/fd-io.o c/unix/event.o
|
||||
|
||||
|
@ -227,6 +229,8 @@ scsh/syscalls.o: scsh/syscalls1.h scsh/dirstuff1.h scsh/fdports1.h \
|
|||
|
||||
scsh/sighandlers1.o scsh/sighandlers.o: scsh/sighandlers1.h
|
||||
|
||||
# Not really, but making regexp/libregex.a makes the regexp/regex.h file that
|
||||
# re-low.c actually does need.
|
||||
scsh/rx/re-low.o: scsh/regexp/libregex.a
|
||||
|
||||
include $(srcdir)/scsh/machine/Makefile.inc
|
||||
|
@ -244,17 +248,7 @@ $(VM): c/main.o $(OBJS) $(UNIX_OBJS) $(LIBOBJS) $(EXTERNAL_OBJECTS)
|
|||
rm -f /tmp/s48_external_$$$$.c
|
||||
|
||||
|
||||
$(CIGVM): c/main.o $(S48OBJS) $(CIGOBJS) $(UNIX_OBJS) $(LIBOBJS) \
|
||||
$(LOOKUP_OBJECTS)
|
||||
rm -f /tmp/s48_external_$$$$.c && \
|
||||
build/build-external-modules /tmp/s48_external_$$$$.c \
|
||||
$(LOOKUP_INITIALIZERS) s48_init_cig && \
|
||||
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ c/main.o $(LOOKUP_OBJECTS) \
|
||||
$(S48OBJS) $(CIGOBJS) $(UNIX_OBJS) \
|
||||
/tmp/s48_external_$$$$.c \
|
||||
$(LIBOBJS) $(LIBS) \
|
||||
$(EXTERNAL_LD_FLAGS) && \
|
||||
rm -f /tmp/s48_external_$$$$.c
|
||||
|
||||
|
||||
#JMG: again cig and scsh-lib
|
||||
$(LIBCIG): c/main.o $(OBJS)
|
||||
|
@ -745,7 +739,6 @@ SCHEME =scsh/awk.scm \
|
|||
scsh/procobj.scm \
|
||||
scsh/pty.scm \
|
||||
scsh/rdelim.scm \
|
||||
scsh/re.scm \
|
||||
scsh/rw.scm \
|
||||
scsh/scsh-condition.scm \
|
||||
scsh/scsh-interfaces.scm \
|
||||
|
@ -773,10 +766,9 @@ SCHEME =scsh/awk.scm \
|
|||
############################################################
|
||||
cig/libcig.c: cig/libcig.scm
|
||||
scsh/flock.c: scsh/flock.scm
|
||||
#scsh/jcontrol2.c: scsh/jcontrol2.scm
|
||||
scsh/jcontrol2.c: scsh/jcontrol2.scm
|
||||
scsh/network.c: scsh/network.scm
|
||||
scsh/rdelim.c: scsh/rdelim.scm
|
||||
#scsh/re.c: scsh/re.scm
|
||||
scsh/select.c: scsh/select.scm
|
||||
scsh/syscalls.c: scsh/syscalls.scm
|
||||
scsh/tty.c: scsh/tty.scm
|
||||
|
@ -805,31 +797,38 @@ bs: build/build-scsh-image
|
|||
sh $(srcdir)/build/build-scsh-image "$(srcdir)" "$(LIB)" "$(IMAGE)" \
|
||||
"$(VM)" cig/cig.image
|
||||
|
||||
loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
|
||||
$(srcdir)/scsh/machine/packages.scm \
|
||||
$(srcdir)/scsh/rx/packages.scm \
|
||||
$(srcdir)/scsh/rx/cond-package.scm \
|
||||
$(srcdir)/scsh/scsh-package.scm \
|
||||
$(srcdir)/scsh/lib/string-pack.scm \
|
||||
$(srcdir)/scsh/lib/list-pack.scm \
|
||||
$(srcdir)/scsh/lib/ccp-pack.scm
|
||||
|
||||
scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
||||
# sh $(srcdir)/build/build-scsh-image "$(srcdir)" "$(LIB)" "$(IMAGE)" \
|
||||
# "$(VM)" cig/cig.image
|
||||
(echo ",translate =scheme48/ $(srcdir)/scheme/"; \
|
||||
echo ",batch on"; \
|
||||
echo ",load-package floatnums"; \
|
||||
echo ",config"; \
|
||||
echo ",load $(srcdir)/scsh/let-opt.scm"; \
|
||||
echo ",load $(srcdir)/scsh/scsh-interfaces.scm"; \
|
||||
echo ",load $(srcdir)/scsh/machine/packages.scm"; \
|
||||
echo ",load $(srcdir)/scsh/scsh-package.scm"; \
|
||||
echo ",load $(loads)"; \
|
||||
echo ",load-package scsh"; \
|
||||
echo ",load-package events"; \
|
||||
echo ",load-package scsh-here-string-hax"; \
|
||||
echo ",translate =scheme48/ $(LIB)/"; \
|
||||
echo ",load-package list-lib"; \
|
||||
echo ",load-package string-lib"; \
|
||||
echo ",load-package ccp-lib"; \
|
||||
echo ",in scsh-level-0"; \
|
||||
echo "(init-scsh-signal)";\
|
||||
echo ",user"; \
|
||||
echo ",open floatnums"; \
|
||||
echo ",open scsh"; \
|
||||
echo ",open list-lib string-lib ccp-lib"; \
|
||||
echo ",dump scsh06.image"; \
|
||||
echo "(dump-scsh \"scsh/scsh.image\")" \
|
||||
echo ",batch off") \
|
||||
| ./$(VM) -o ./$(VM) -i $(CIG).image -h 5000000
|
||||
| ./$(VM) -o ./$(VM) -i $(CIG).image -h 10000000
|
||||
|
||||
#scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
||||
# (echo ",translate =scheme48/ $(srcdir)/"; \
|
||||
|
@ -887,7 +886,9 @@ install-scsh: scsh
|
|||
do $(INSTALL_DATA) $$f $(LIB)/scsh/; done
|
||||
|
||||
clean-scsh:
|
||||
$(RM) scsh/*.o scsh/regexp/*.o scsh/machine/*.o scsh/*.image scsh/scsh
|
||||
$(RM) $(LIBSCSH) scsh/scsh.vm
|
||||
# -cd scsh/regexp; $(MAKE) clean JMG: no extra regep
|
||||
$(RM) scsh/*.o scsh/regexp/*.o scsh/rx/*.o scsh/machine/*.o
|
||||
$(RM) scsh/*.image
|
||||
$(RM) $(LIBSCSH) scsh/scsh$(EXEEXT) scsh/scsh.vm
|
||||
-cd scsh/regexp; $(MAKE) clean
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ vm=$4
|
|||
initial=$5
|
||||
USER=${USER-`logname 2>/dev/null || echo '*GOK*'`}
|
||||
|
||||
$vm -i $initial -a batch <<EOF
|
||||
./$vm -o ./$vm -i $initial batch <<EOF
|
||||
,load $srcdir/scheme/env/init-defpackage.scm
|
||||
((*structure-ref filenames 'set-translation!)
|
||||
"=scheme48/" "$srcdir/scheme/")
|
||||
|
|
7715
build/initial.debug
7715
build/initial.debug
File diff suppressed because it is too large
Load Diff
Binary file not shown.
28
c/main.c
28
c/main.c
|
@ -7,9 +7,12 @@
|
|||
#include "scheme48vm.h"
|
||||
#include "scheme48heap.h"
|
||||
|
||||
/* I bumped this up from 1.5 Mcell because the debugging info put us over
|
||||
** the top. -Olin
|
||||
*/
|
||||
#if !defined(DEFAULT_HEAP_SIZE)
|
||||
/* 1.5 megacell = 6 megabytes (3 meg per semispace) */
|
||||
#define DEFAULT_HEAP_SIZE 1500000L
|
||||
/* 4 megacell = 16 megabytes (8 meg per semispace) */
|
||||
#define DEFAULT_HEAP_SIZE 4000000L
|
||||
#endif
|
||||
|
||||
#if !defined(DEFAULT_STACK_SIZE)
|
||||
|
@ -34,10 +37,18 @@ extern void s48_initialize_external_modules(void);
|
|||
char *s48_object_file; /* specified via a command line argument */
|
||||
char *s48_reloc_file; /* dynamic loading will set this */
|
||||
|
||||
char *prog_name;
|
||||
|
||||
char ** process_args(char **argv,
|
||||
long *heap_size,
|
||||
long *stack_size,
|
||||
char **object_file,
|
||||
char **image_name);
|
||||
int
|
||||
main(argc, argv)
|
||||
int argc; char **argv;
|
||||
{
|
||||
char **argp; //JMG
|
||||
char *image_name = DEFAULT_IMAGE_NAME;
|
||||
long heap_size = DEFAULT_HEAP_SIZE; /* in numbers of cells */
|
||||
long stack_size = DEFAULT_STACK_SIZE; /* in numbers of cells */
|
||||
|
@ -57,10 +68,15 @@ main(argc, argv)
|
|||
|
||||
long vm_argc = 0;
|
||||
char *me = *argv; /* Save program name. */
|
||||
prog_name = *argv++;
|
||||
|
||||
s48_object_file = s48_reloc_file = NULL;
|
||||
argv=process_args(argv,
|
||||
&heap_size, &stack_size,
|
||||
&s48_object_file, &image_name);
|
||||
for(argc=0, argp=argv; *argp; argc++, argp++); /* Recompute argc. */
|
||||
|
||||
argv++; argc--; /* Skip program name. */
|
||||
/* argv++; argc--; Skip program name.
|
||||
|
||||
for (; argc > 0; argc--, argv++)
|
||||
if (argv[0][0] == '-')
|
||||
|
@ -84,7 +100,7 @@ main(argc, argv)
|
|||
break;
|
||||
case 'a':
|
||||
argc--;
|
||||
vm_argc = argc; /* remaining args are passed to the VM */
|
||||
vm_argc = argc; remaining args are passed to the VM
|
||||
argc = 0;
|
||||
break;
|
||||
case 'o':
|
||||
|
@ -114,7 +130,7 @@ Options: -h <total heap size in words>\n\
|
|||
-u [don't warn on unbound external identifiers]",
|
||||
me);
|
||||
return 1;
|
||||
}
|
||||
} */
|
||||
|
||||
s48_sysdep_init();
|
||||
s48_heap_init();
|
||||
|
@ -171,7 +187,7 @@ Options: -h <total heap size in words>\n\
|
|||
if (warn_undefined_imported_bindings_p)
|
||||
s48_warn_about_undefined_imported_bindings();
|
||||
|
||||
return_value = s48_call_startup_procedure(argv, vm_argc);
|
||||
return_value = s48_call_startup_procedure(argv, argc);
|
||||
|
||||
if (s48_reloc_file != NULL)
|
||||
if (0 != unlink(s48_reloc_file))
|
||||
|
|
26
c/scheme48.h
26
c/scheme48.h
|
@ -292,12 +292,12 @@ extern void s48_check_record_type(s48_value record, s48_value type_binding);
|
|||
#define S48_CAR_OFFSET 0
|
||||
#define S48_CAR(x) (s48_stob_ref((x), S48_STOBTYPE_PAIR, 0))
|
||||
#define S48_UNSAFE_CAR(x) (S48_STOB_REF((x), 0))
|
||||
#define S48_SET_CAR(x, v) (s48_stob_ref((x), S48_STOBTYPE_PAIR, 0, (v)))
|
||||
#define S48_SET_CAR(x, v) (s48_stob_set((x), S48_STOBTYPE_PAIR, 0, (v)))
|
||||
#define S48_UNSAFE_SET_CAR(x, v) S48_STOB_SET((x), 0, (v))
|
||||
#define S48_CDR_OFFSET 1
|
||||
#define S48_CDR(x) (s48_stob_ref((x), S48_STOBTYPE_PAIR, 1))
|
||||
#define S48_UNSAFE_CDR(x) (S48_STOB_REF((x), 1))
|
||||
#define S48_SET_CDR(x, v) (s48_stob_ref((x), S48_STOBTYPE_PAIR, 1, (v)))
|
||||
#define S48_SET_CDR(x, v) (s48_stob_set((x), S48_STOBTYPE_PAIR, 1, (v)))
|
||||
#define S48_UNSAFE_SET_CDR(x, v) S48_STOB_SET((x), 1, (v))
|
||||
#define S48_SYMBOL_TO_STRING_OFFSET 0
|
||||
#define S48_SYMBOL_TO_STRING(x) (s48_stob_ref((x), S48_STOBTYPE_SYMBOL, 0))
|
||||
|
@ -305,12 +305,12 @@ extern void s48_check_record_type(s48_value record, s48_value type_binding);
|
|||
#define S48_LOCATION_ID_OFFSET 0
|
||||
#define S48_LOCATION_ID(x) (s48_stob_ref((x), S48_STOBTYPE_LOCATION, 0))
|
||||
#define S48_UNSAFE_LOCATION_ID(x) (S48_STOB_REF((x), 0))
|
||||
#define S48_SET_LOCATION_ID(x, v) (s48_stob_ref((x), S48_STOBTYPE_LOCATION, 0, (v)))
|
||||
#define S48_SET_LOCATION_ID(x, v) (s48_stob_set((x), S48_STOBTYPE_LOCATION, 0, (v)))
|
||||
#define S48_UNSAFE_SET_LOCATION_ID(x, v) S48_STOB_SET((x), 0, (v))
|
||||
#define S48_CONTENTS_OFFSET 1
|
||||
#define S48_CONTENTS(x) (s48_stob_ref((x), S48_STOBTYPE_LOCATION, 1))
|
||||
#define S48_UNSAFE_CONTENTS(x) (S48_STOB_REF((x), 1))
|
||||
#define S48_SET_CONTENTS(x, v) (s48_stob_ref((x), S48_STOBTYPE_LOCATION, 1, (v)))
|
||||
#define S48_SET_CONTENTS(x, v) (s48_stob_set((x), S48_STOBTYPE_LOCATION, 1, (v)))
|
||||
#define S48_UNSAFE_SET_CONTENTS(x, v) S48_STOB_SET((x), 1, (v))
|
||||
#define S48_CLOSURE_TEMPLATE_OFFSET 0
|
||||
#define S48_CLOSURE_TEMPLATE(x) (s48_stob_ref((x), S48_STOBTYPE_CLOSURE, 0))
|
||||
|
@ -330,7 +330,7 @@ extern void s48_check_record_type(s48_value record, s48_value type_binding);
|
|||
#define S48_SHARED_BINDING_REF_OFFSET 2
|
||||
#define S48_SHARED_BINDING_REF(x) (s48_stob_ref((x), S48_STOBTYPE_SHARED_BINDING, 2))
|
||||
#define S48_UNSAFE_SHARED_BINDING_REF(x) (S48_STOB_REF((x), 2))
|
||||
#define S48_SHARED_BINDING_SET(x, v) (s48_stob_ref((x), S48_STOBTYPE_SHARED_BINDING, 2, (v)))
|
||||
#define S48_SHARED_BINDING_SET(x, v) (s48_stob_set((x), S48_STOBTYPE_SHARED_BINDING, 2, (v)))
|
||||
#define S48_UNSAFE_SHARED_BINDING_SET(x, v) S48_STOB_SET((x), 2, (v))
|
||||
#define S48_PORT_HANDLER_OFFSET 0
|
||||
#define S48_PORT_HANDLER(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 0))
|
||||
|
@ -338,42 +338,42 @@ extern void s48_check_record_type(s48_value record, s48_value type_binding);
|
|||
#define S48_PORT_STATUS_OFFSET 1
|
||||
#define S48_PORT_STATUS(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 1))
|
||||
#define S48_UNSAFE_PORT_STATUS(x) (S48_STOB_REF((x), 1))
|
||||
#define S48_SET_PORT_STATUS(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 1, (v)))
|
||||
#define S48_SET_PORT_STATUS(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 1, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_STATUS(x, v) S48_STOB_SET((x), 1, (v))
|
||||
#define S48_PORT_LOCK_OFFSET 2
|
||||
#define S48_PORT_LOCK(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 2))
|
||||
#define S48_UNSAFE_PORT_LOCK(x) (S48_STOB_REF((x), 2))
|
||||
#define S48_SET_PORT_LOCK(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 2, (v)))
|
||||
#define S48_SET_PORT_LOCK(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 2, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_LOCK(x, v) S48_STOB_SET((x), 2, (v))
|
||||
#define S48_PORT_LOCKEDP_OFFSET 3
|
||||
#define S48_PORT_LOCKEDP(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 3))
|
||||
#define S48_UNSAFE_PORT_LOCKEDP(x) (S48_STOB_REF((x), 3))
|
||||
#define S48_SET_PORT_LOCKEDP(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 3, (v)))
|
||||
#define S48_SET_PORT_LOCKEDP(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 3, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_LOCKEDP(x, v) S48_STOB_SET((x), 3, (v))
|
||||
#define S48_PORT_DATA_OFFSET 4
|
||||
#define S48_PORT_DATA(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 4))
|
||||
#define S48_UNSAFE_PORT_DATA(x) (S48_STOB_REF((x), 4))
|
||||
#define S48_SET_PORT_DATA(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 4, (v)))
|
||||
#define S48_SET_PORT_DATA(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 4, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_DATA(x, v) S48_STOB_SET((x), 4, (v))
|
||||
#define S48_PORT_BUFFER_OFFSET 5
|
||||
#define S48_PORT_BUFFER(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 5))
|
||||
#define S48_UNSAFE_PORT_BUFFER(x) (S48_STOB_REF((x), 5))
|
||||
#define S48_SET_PORT_BUFFER(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 5, (v)))
|
||||
#define S48_SET_PORT_BUFFER(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 5, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_BUFFER(x, v) S48_STOB_SET((x), 5, (v))
|
||||
#define S48_PORT_INDEX_OFFSET 6
|
||||
#define S48_PORT_INDEX(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 6))
|
||||
#define S48_UNSAFE_PORT_INDEX(x) (S48_STOB_REF((x), 6))
|
||||
#define S48_SET_PORT_INDEX(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 6, (v)))
|
||||
#define S48_SET_PORT_INDEX(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 6, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_INDEX(x, v) S48_STOB_SET((x), 6, (v))
|
||||
#define S48_PORT_LIMIT_OFFSET 7
|
||||
#define S48_PORT_LIMIT(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 7))
|
||||
#define S48_UNSAFE_PORT_LIMIT(x) (S48_STOB_REF((x), 7))
|
||||
#define S48_SET_PORT_LIMIT(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 7, (v)))
|
||||
#define S48_SET_PORT_LIMIT(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 7, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_LIMIT(x, v) S48_STOB_SET((x), 7, (v))
|
||||
#define S48_PORT_PENDING_EOFP_OFFSET 8
|
||||
#define S48_PORT_PENDING_EOFP(x) (s48_stob_ref((x), S48_STOBTYPE_PORT, 8))
|
||||
#define S48_UNSAFE_PORT_PENDING_EOFP(x) (S48_STOB_REF((x), 8))
|
||||
#define S48_SET_PORT_PENDING_EOFP(x, v) (s48_stob_ref((x), S48_STOBTYPE_PORT, 8, (v)))
|
||||
#define S48_SET_PORT_PENDING_EOFP(x, v) (s48_stob_set((x), S48_STOBTYPE_PORT, 8, (v)))
|
||||
#define S48_UNSAFE_SET_PORT_PENDING_EOFP(x, v) S48_STOB_SET((x), 8, (v))
|
||||
#define S48_CHANNEL_STATUS_OFFSET 0
|
||||
#define S48_CHANNEL_STATUS(x) (s48_stob_ref((x), S48_STOBTYPE_CHANNEL, 0))
|
||||
|
|
172
c/unix/event.c
172
c/unix/event.c
|
@ -23,6 +23,12 @@
|
|||
static void when_keyboard_interrupt();
|
||||
static void when_alarm_interrupt();
|
||||
static void when_sigpipe_interrupt();
|
||||
|
||||
//JMG:
|
||||
static void when_child_interrupt();
|
||||
static void when_hup_interrupt();
|
||||
|
||||
|
||||
bool s48_setcatcher(int signum, void (*catcher)(int));
|
||||
void s48_start_alarm_interrupts(void);
|
||||
|
||||
|
@ -38,6 +44,15 @@ s48_sysdep_init(void)
|
|||
errno);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//JMG: for scsh
|
||||
if (!s48_setcatcher(SIGCHLD, when_child_interrupt)
|
||||
|| !s48_setcatcher(SIGHUP, when_hup_interrupt)) {
|
||||
fprintf(stderr,
|
||||
"Failed to install signal handler for SIGCHLD, errno = %d\n",
|
||||
errno);
|
||||
exit(1);
|
||||
}
|
||||
s48_start_alarm_interrupts();
|
||||
}
|
||||
|
||||
|
@ -106,6 +121,8 @@ when_alarm_interrupt(int ign)
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define USEC_PER_POLL (1000000 / POLLS_PER_SECOND)
|
||||
|
||||
/* delta is in ticks, 0 cancels current alarm */
|
||||
|
@ -279,10 +296,10 @@ s48_get_next_event(long *ready_fd, long *status)
|
|||
/* fprintf(stderr, "[alarm]\n"); */
|
||||
return (ALARM_EVENT);
|
||||
}
|
||||
/*
|
||||
//JMG: scsh should handle this
|
||||
if (s48_os_signal_pending())
|
||||
return (OS_SIGNAL_EVENT);
|
||||
*/
|
||||
|
||||
block_interrupts();
|
||||
if ((keyboard_interrupt_count == 0)
|
||||
&& (alarm_time == -1 || s48_current_time < alarm_time)
|
||||
|
@ -587,3 +604,154 @@ queue_ready_ports(bool wait, long seconds, long ticks)
|
|||
return errno;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//JMG: for scsh
|
||||
static int child_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_child_interrupt(int ign)
|
||||
{
|
||||
child_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
|
||||
static int hup_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_hup_interrupt(int ign)
|
||||
{
|
||||
hup_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
|
||||
static int cont_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_cont_interrupt(int ign)
|
||||
{
|
||||
cont_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
|
||||
static int quit_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_quit_interrupt(int ign)
|
||||
{
|
||||
quit_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
|
||||
static int term_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_term_interrupt(int ign)
|
||||
{
|
||||
term_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
static int tstp_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_tstp_interrupt(int ign)
|
||||
{
|
||||
tstp_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
static int usr1_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_usr1_interrupt(int ign)
|
||||
{
|
||||
usr1_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
static int usr2_interrupt_count = 0;
|
||||
|
||||
static void
|
||||
when_usr2_interrupt(int ign)
|
||||
{
|
||||
usr2_interrupt_count += 1;
|
||||
NOTE_EVENT;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* This procedure is called periodically by the VM (if you uncomment the
|
||||
* call to it in s48_get_next_event() in c/unix/event.c).
|
||||
*
|
||||
* s48_set_os_signal() is a VM procedure. The two arguments are the type
|
||||
* of interrupt and one other value which can be used to return whatever
|
||||
* associated information is desired. The two values, along with the
|
||||
* current enabled-interrupts mask, are passed to the handler for os-signal
|
||||
* interrupts.
|
||||
*
|
||||
* A handler can be installed by doing
|
||||
(set-interrupt-handler! (enum interrupt os-signal)
|
||||
(lambda (type arg enabled-interrupts)
|
||||
(display type)
|
||||
(newline)
|
||||
(display arg)
|
||||
(newline)
|
||||
(display enabled-interrupts)
|
||||
(newline)))
|
||||
* The handler is called with all interrupts disabled. They are
|
||||
* reenabled when the handler returns (or if done by hand).
|
||||
*/
|
||||
enum scsh_os_signal{
|
||||
scsh_os_signal_io_completion,
|
||||
scsh_os_signal_post_gc,
|
||||
scsh_os_signal_keyboard,
|
||||
scsh_os_signal_alarm,
|
||||
scsh_os_signal_chld ,
|
||||
scsh_os_signal_cont,
|
||||
scsh_os_signal_hup,
|
||||
scsh_os_signal_quit,
|
||||
scsh_os_signal_term,
|
||||
scsh_os_signal_tstp,
|
||||
scsh_os_signal_usr1,
|
||||
scsh_os_signal_usr2,
|
||||
scsh_os_signal_info,
|
||||
scsh_os_signal_io,
|
||||
scsh_os_signal_poll,
|
||||
scsh_os_signal_prof,
|
||||
scsh_os_signal_pwr,
|
||||
scsh_os_signal_urg,
|
||||
scsh_os_signal_vtalrm,
|
||||
scsh_os_signal_winch,
|
||||
scsh_os_signal_xcpu,
|
||||
scsh_os_signal_xfsz
|
||||
};
|
||||
|
||||
int
|
||||
s48_os_signal_pending(void) {
|
||||
if (child_interrupt_count > 0) {
|
||||
fprintf(stderr, "cld c %d \n", child_interrupt_count);
|
||||
block_interrupts();
|
||||
--child_interrupt_count;
|
||||
allow_interrupts();
|
||||
s48_set_os_signal(S48_UNSAFE_ENTER_FIXNUM(scsh_os_signal_chld),
|
||||
S48_UNSAFE_ENTER_FIXNUM(SIGCHLD));
|
||||
return TRUE;
|
||||
}
|
||||
else if (hup_interrupt_count > 0){
|
||||
fprintf(stderr, "hup c %d \n", hup_interrupt_count);
|
||||
block_interrupts();
|
||||
--hup_interrupt_count;
|
||||
allow_interrupts();
|
||||
s48_set_os_signal(S48_UNSAFE_ENTER_FIXNUM(scsh_os_signal_hup),
|
||||
S48_UNSAFE_ENTER_FIXNUM(SIGHUP));
|
||||
return TRUE;
|
||||
}
|
||||
else return FALSE;
|
||||
}
|
||||
|
|
56
cig/cig.scm
56
cig/cig.scm
|
@ -773,7 +773,7 @@
|
|||
(define cfile-header-boilerplate
|
||||
"/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
step 4
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -795,6 +795,7 @@ step 3
|
|||
(define (define-foreign-process-form form oport)
|
||||
; c-names will be the list of c-names of all define-foreign-forms
|
||||
(define c-names '())
|
||||
(define init-name #f)
|
||||
(define (define-foreign-process-form2 form)
|
||||
(if (pair? form)
|
||||
(case (car form)
|
||||
|
@ -820,6 +821,10 @@ step 3
|
|||
(set! c-names (cons c-name c-names))
|
||||
(display (define-foreign->C-stub form) oport))))
|
||||
|
||||
((foreign-init-name)
|
||||
(let ((name (cdr form)))
|
||||
(set! init-name (car name))))
|
||||
|
||||
((foreign-source)
|
||||
(let ((forms (cdr form)))
|
||||
(if (pair? forms)
|
||||
|
@ -829,22 +834,39 @@ step 3
|
|||
(display x oport))
|
||||
(cdr forms)))))))))
|
||||
(define-foreign-process-form2 form)
|
||||
(reverse c-names))
|
||||
(values (reverse c-names) init-name))
|
||||
|
||||
; Frank: end
|
||||
(define (display-register c-names init-name oport)
|
||||
(if (not init-name)
|
||||
(error "no foreign-init-name statement found")
|
||||
(let ((register-txt
|
||||
(apply
|
||||
string-append
|
||||
(map (lambda (c-name)
|
||||
(format #f "~% S48_EXPORT_FUNCTION(~a);" c-name))
|
||||
c-names))))
|
||||
(format oport s48-init-boilerplate init-name register-txt))))
|
||||
|
||||
(define (process-define-foreign-stream iport oport)
|
||||
(display cfile-header-boilerplate oport)
|
||||
(let lp ()
|
||||
(let lp ((c-names '()) (init-name #f))
|
||||
(let ((form (read iport)))
|
||||
(cond ((not (eof-object? form))
|
||||
(define-foreign-process-form form oport)
|
||||
(lp))))))
|
||||
(if (eof-object? form)
|
||||
(display-register c-names init-name oport)
|
||||
(receive (new-c-names maybe-init-name)
|
||||
(define-foreign-process-form form oport)
|
||||
(let ((init-name (if maybe-init-name
|
||||
(if init-name
|
||||
(error "multiple foreign-init-name definitions")
|
||||
maybe-init-name)
|
||||
init-name)))
|
||||
(lp (append c-names new-c-names) init-name)))))))
|
||||
|
||||
; Frank: begin
|
||||
; (process-define-foreign-file fname) scans file fname.scm and produces a c-stub for every
|
||||
; scanned define-foreign form and places git in file fname.c.
|
||||
(define (process-define-foreign-file fname)
|
||||
; scanned define-foreign form and places it in file fname.c.
|
||||
(define (process-define-foreign-file fname init-name)
|
||||
(call-with-input-file (string-append fname ".scm")
|
||||
(lambda (iport)
|
||||
(call-with-output-file (string-append fname ".c")
|
||||
|
@ -860,7 +882,7 @@ step 3
|
|||
(map (lambda (c-name)
|
||||
(format #f "~% S48_EXPORT_FUNCTION(~a);" c-name))
|
||||
c-names))))
|
||||
(format oport s48-init-boilerplate (file-name-nondirectory fname) register-txt))
|
||||
(format oport s48-init-boilerplate init-name register-txt))
|
||||
(lp (append c-names (define-foreign-process-form form oport))))))))))))
|
||||
; Frank: end
|
||||
|
||||
|
@ -888,12 +910,9 @@ step 3
|
|||
(lp (- i 1))
|
||||
i))))
|
||||
|
||||
(define (cig-standalone-toplevel fname) ; ignore your args no longer.
|
||||
(display "This is cig standalone\n")
|
||||
(display "processing ")
|
||||
(display fname)
|
||||
(newline)
|
||||
(process-define-foreign-file (car fname))
|
||||
(define (cig-standalone-toplevel f-and-init-name) ; ignore your args no
|
||||
(process-define-foreign-stream (current-input-port)
|
||||
(current-output-port))
|
||||
0)
|
||||
|
||||
;;; This section defines the Scheme-side macro processor.
|
||||
|
@ -1056,7 +1075,8 @@ step 3
|
|||
|
||||
|
||||
(define-structure define-foreign-syntax (export (define-foreign :syntax)
|
||||
(foreign-source :syntax))
|
||||
(foreign-source :syntax)
|
||||
(foreign-init-name :syntax))
|
||||
(open scheme external-calls structure-refs cig-aux)
|
||||
(access signals) ; for ERROR
|
||||
(for-syntax (open scheme define-foreign-syntax-support))
|
||||
|
@ -1070,6 +1090,10 @@ step 3
|
|||
(syntax-rules ()
|
||||
((foreign-source stuff ...) #f)))
|
||||
|
||||
(define-syntax foreign-init-name
|
||||
(syntax-rules ()
|
||||
((foreign-init-name name) (lambda () name))))
|
||||
|
||||
(define (check-arg pred obj proc)
|
||||
(if (not (pred obj))
|
||||
(error "check-arg" pred obj proc)
|
||||
|
|
|
@ -331,8 +331,7 @@
|
|||
(set-exit-status! #f)
|
||||
(steal-port! (command-input))
|
||||
(steal-port! (command-output))
|
||||
; (steal-port! (command-error-output))))
|
||||
))
|
||||
(steal-port! (command-error-output))))
|
||||
(run-threads
|
||||
(round-robin-event-handler (command-level-queue level)
|
||||
command-quantum
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
|
||||
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@
|
|||
(c-define "S48_UNSAFE_~A(x) (S48_STOB_REF((x), ~D))" name i))
|
||||
(if (not (null? (cdar accs)))
|
||||
(let ((name (upcase (cadar accs))))
|
||||
(c-define "S48_~A(x, v) (s48_stob_ref((x), S48_STOBTYPE_~A, ~D, (v)))"
|
||||
(c-define "S48_~A(x, v) (s48_stob_set((x), S48_STOBTYPE_~A, ~D, (v)))"
|
||||
name type i)
|
||||
(c-define "S48_UNSAFE_~A(x, v) S48_STOB_SET((x), ~D, (v))" name i))))))
|
||||
stob-data)
|
||||
|
|
|
@ -79,7 +79,8 @@
|
|||
(decrement-channel-wait-count!)
|
||||
(channel-abort channel))
|
||||
(else
|
||||
(warn "channel in use by other than port owner" channel)
|
||||
(warn "channel in use by other than port owner"
|
||||
channel thread owner)
|
||||
(enqueue-thread! queue thread)
|
||||
#f)))
|
||||
#f)))
|
||||
|
|
|
@ -29,6 +29,22 @@
|
|||
(interaction-environment)
|
||||
(car package-option))))
|
||||
(really-load-into filename package #t)))
|
||||
; JMG For scsh.
|
||||
|
||||
(define (load-into-port port p)
|
||||
(compile-and-run-port port p
|
||||
(lambda (template)
|
||||
(invoke-template template p))
|
||||
(current-noise-port)
|
||||
#t)); JMG whatever #t means...
|
||||
|
||||
(define (load-port port . package-option)
|
||||
(let ((p (if (null? package-option)
|
||||
(interaction-environment)
|
||||
(car package-option))))
|
||||
(noting-undefined-variables p
|
||||
(lambda ()
|
||||
(load-into-port port p)))))
|
||||
|
||||
;----------------
|
||||
|
||||
|
|
|
@ -119,5 +119,4 @@
|
|||
; For scsh.
|
||||
|
||||
(define (interrupt-handlers-vector)
|
||||
(/ 1 0)
|
||||
(session-data-ref interrupt-handlers))
|
||||
|
|
|
@ -18,6 +18,29 @@
|
|||
|
||||
(define preferred-case (lambda (x) x))
|
||||
|
||||
(define (script-skip c port)
|
||||
(read-char port)
|
||||
(let lp ((state 0))
|
||||
(let ((advance-if (lambda (look-for)
|
||||
(let ((c (read-char port)))
|
||||
(if (eof-object? c)
|
||||
(reading-error port
|
||||
"EOF inside block comment -- #! missing a closing !#")
|
||||
(lp (cond ((char=? c look-for) (+ state 1))
|
||||
((char=? c #\newline) 1)
|
||||
((char=? c cr) state)
|
||||
(else 0))))))))
|
||||
(case state
|
||||
((0) (advance-if #\newline))
|
||||
((1) (advance-if #\!)) ; Found \n
|
||||
((2) (advance-if #\#)) ; Found \n!
|
||||
((3) (advance-if #\newline)) ; Found \n!#
|
||||
((4) (read port))
|
||||
(else
|
||||
(reading-error port "case other")))))) ; Found \n!#\n -- done.
|
||||
; was sub-read ^
|
||||
|
||||
|
||||
; scsh stop
|
||||
|
||||
(define (read . port-option)
|
||||
|
@ -233,7 +256,9 @@
|
|||
(for-each (lambda (c)
|
||||
(define-sharp-macro c number-sharp-macro))
|
||||
'(#\b #\o #\d #\x #\i #\e)))
|
||||
|
||||
|
||||
(define-sharp-macro #\! script-skip)
|
||||
|
||||
; Tokens
|
||||
|
||||
(define (sub-read-token c port)
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
; You must have already loaded Pre-Scheme.
|
||||
;
|
||||
; Then, for example,
|
||||
; (start-vm "=scheme48/../build/initial.image" 4000000 20000 '#())
|
||||
; (start-vm "=scheme48/../../build/initial.image" 4000000 20000 '#())
|
||||
; in the user package will start up the VM with the initial image.
|
||||
; Be patient. It will take a while. Running the initial image as
|
||||
; above on a SGI Indy (100 mhz R4000) it took over 70 minutes to
|
||||
|
|
|
@ -43,3 +43,35 @@
|
|||
(register-interrupt count)
|
||||
(apply old-handler stuff)))
|
||||
(loop (+ count 1))))))
|
||||
|
||||
|
||||
;;; extensions by JMG
|
||||
|
||||
(define (wait-interrupt-set set pre-event)
|
||||
(let ((event (next-event pre-event)))
|
||||
(if (memq (event-type event) set)
|
||||
event
|
||||
(wait-interrupt set event))))
|
||||
|
||||
|
||||
; would need placeholder-queue exported..
|
||||
|
||||
(define (placeholder-value-set? placeholder)
|
||||
(not (placeholder-queue placeholder)))
|
||||
|
||||
(define (most-recent-event? event)
|
||||
(eq? event most-recent-event))
|
||||
|
||||
(define (nonblockwait-interrupt type event )
|
||||
(general-nonblockwait-interrupt type event eq?))
|
||||
|
||||
(define (nonblockwait-interrupt-set set event )
|
||||
(general-nonblockwait-interrupt set event memq))
|
||||
|
||||
(define (general-nonblockwait-interrupt waiting-for event compare)
|
||||
(if (most-recent-event? event)
|
||||
#f
|
||||
(let ((event (next-event pre-event)))
|
||||
(if (compare (event-type event) waiting-for)
|
||||
event
|
||||
(nonblockwait-interrupt type event)))))
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -249,9 +250,9 @@ s48_value df_scheme_host_address2host_info(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = scheme_host_address2host_info(g1, &r2, &r3, &r4);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,2),(long) r4);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,2),(long) r4);//simple-assign
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -267,9 +268,9 @@ s48_value df_scheme_host_name2host_info(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = scheme_host_name2host_info(s48_extract_string(g1), &r2, &r3, &r4);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,2),(long) r4);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,2),(long) r4);//simple-assign
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -284,8 +285,8 @@ s48_value df_scheme_net_address2net_info(s48_value g1, s48_value g2, s48_value m
|
|||
|
||||
r1 = scheme_net_address2net_info(g1, g2, &r2, &r3);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -300,8 +301,8 @@ s48_value df_scheme_net_name2net_info(s48_value g1, s48_value g2, s48_value mv_v
|
|||
|
||||
r1 = scheme_net_name2net_info(s48_extract_string(g1), g2, &r2, &r3);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -318,10 +319,10 @@ s48_value df_scheme_serv_port2serv_info(s48_value g1, s48_value g2, s48_value mv
|
|||
|
||||
r1 = scheme_serv_port2serv_info(s48_extract_fixnum(g1), s48_extract_string(g2), &r2, &r3, &r4, &r5);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,2,s48_enter_fixnum(r4));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,3),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,3)),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -338,10 +339,10 @@ s48_value df_scheme_serv_name2serv_info(s48_value g1, s48_value g2, s48_value mv
|
|||
|
||||
r1 = scheme_serv_name2serv_info(s48_extract_string(g1), s48_extract_string(g2), &r2, &r3, &r4, &r5);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,2,s48_enter_fixnum(r4));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,3),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,3)),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -357,8 +358,8 @@ s48_value df_scheme_proto_num2proto_info(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = scheme_proto_num2proto_info(s48_extract_fixnum(g1), &r2, &r3, &r4);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,2,s48_enter_fixnum(r4));
|
||||
return ret1;
|
||||
}
|
||||
|
@ -375,8 +376,8 @@ s48_value df_scheme_proto_name2proto_info(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = scheme_proto_name2proto_info(s48_extract_string(g1), &r2, &r3, &r4);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,2,s48_enter_fixnum(r4));
|
||||
return ret1;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,83 @@
|
|||
#include <stdio.h>
|
||||
|
||||
extern char *prog_name;
|
||||
|
||||
#define streq(a,b) (strcmp((a),(b))==0)
|
||||
|
||||
static void usage(void) {
|
||||
fprintf(stderr, "Usage: %s [meta-arg] [vm-option+] [end-option scheme-args]\n"
|
||||
"meta-arg: \\ <script file name>\n"
|
||||
"\n"
|
||||
"vm-option: -h <total heap size in words>\n"
|
||||
" -s <stack size in words>\n"
|
||||
" -o <object file name>\n"
|
||||
"\n"
|
||||
"end-option: -i <image file name>\n"
|
||||
" -- (Terminates vm args.)\n"
|
||||
" -a (Terminates vm args. Obsolete.)\n",
|
||||
prog_name);}
|
||||
|
||||
static void bad_args(int nr) {
|
||||
fprintf(stderr, "reason : %d\n", nr);
|
||||
usage();
|
||||
exit(1); }
|
||||
|
||||
char ** process_args(char **argv,
|
||||
long *pheap_size,
|
||||
long *pstack_size,
|
||||
char **pobject_file,
|
||||
char **pimage_name) {
|
||||
extern char **process_meta_arg(char **);
|
||||
char ** deb = argv;
|
||||
|
||||
/* Handle an initial \ <fname> meta-arg expansion. */
|
||||
while ( *argv && streq(*argv, "\\") ) {
|
||||
argv++;
|
||||
if( !*argv ) bad_args(0); /* die */
|
||||
argv = process_meta_arg(argv);
|
||||
if( !argv ) {
|
||||
fprintf(stderr, "%s: \\ <fname> expansion failed.\n",
|
||||
prog_name);
|
||||
exit(1);}}
|
||||
|
||||
for (; *argv; argv++)
|
||||
if( argv[0][0] != '-' )
|
||||
bad_args(1); /* die */
|
||||
else
|
||||
switch (argv[0][1]) {
|
||||
default:
|
||||
bad_args(2); /* die */
|
||||
break;
|
||||
|
||||
case 'h': /* heapsize */
|
||||
argv++;
|
||||
if( !*argv ) bad_args(3); /* die */
|
||||
*pheap_size = atoi(*argv);
|
||||
if( *pheap_size <= 0 ) bad_args(4);
|
||||
break;
|
||||
|
||||
case 's':
|
||||
argv++;
|
||||
if( !*argv ) bad_args(5); /* die */
|
||||
*pstack_size = atoi(*argv);
|
||||
if (*pstack_size <= 0) bad_args(6);
|
||||
break;
|
||||
|
||||
case 'o': /* object file */
|
||||
argv++;
|
||||
if( !*argv ) bad_args(7); /* die */
|
||||
*pobject_file = *argv;
|
||||
break;
|
||||
|
||||
/* These switches terminate arg scanning. */
|
||||
case 'i':
|
||||
argv++;
|
||||
if( !*argv ) bad_args(8); /* die */
|
||||
*pimage_name = *argv++;
|
||||
return argv;
|
||||
|
||||
case '-':
|
||||
case 'a':
|
||||
argv++;
|
||||
return argv;}
|
||||
return argv;}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
;;; Copyright (c) 1993, 1994, 1995 by Olin Shivers.
|
||||
|
||||
;;; This is a GC'd abstraction for Unix process id's.
|
||||
;;; The problem with Unix pids is (a) they clutter up the kernel
|
||||
; ;; The problem with Unix pids is (a) they clutter up the kernel
|
||||
;;; process table until you wait(2) them, and (b) you can only
|
||||
;;; wait(2) them once. Scsh's process objects are similar, but
|
||||
;;; allow the storage to be allocated in the scsh address space,
|
||||
|
@ -64,7 +64,7 @@
|
|||
|
||||
;;; Is X a pid or a proc?
|
||||
|
||||
(define (pid/proc? x) (or (proc? x) (and (integer? x) (>= pid 0))))
|
||||
(define (pid/proc? x) (or (proc? x) (and (integer? x) (>= x 0))))
|
||||
|
||||
|
||||
;;; Process reaping
|
||||
|
@ -106,14 +106,14 @@
|
|||
(error "Illegal autoreap policy." new-policy))
|
||||
(else (set! *autoreap-policy* new-policy)
|
||||
(if (eq? new-policy 'early)
|
||||
(set-interrupt-handler interrupt/chld
|
||||
(set! procobj-handler
|
||||
(lambda (enabled-ints) (reap-zombies))))))))
|
||||
old-policy))
|
||||
|
||||
;;; New (scsh 0.6)
|
||||
|
||||
(define (install-autoreaping)
|
||||
(set-interrupt-handler interrupt/chld
|
||||
(set! procobj-handler
|
||||
(lambda (enabled-ints)
|
||||
(reap-zombies))))
|
||||
|
||||
|
@ -122,6 +122,8 @@
|
|||
;;; Return true if no more outstanding children; #f if some still live.
|
||||
|
||||
(define (reap-zombies)
|
||||
(display "reap-zombies was called" (current-error-port))
|
||||
(newline)
|
||||
(let lp ()
|
||||
(receive (pid status) (%wait-any (bitwise-ior wait/poll wait/stopped-children))
|
||||
(if pid
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
|
||||
This software is not subject to any license of the American Telephone
|
||||
and Telegraph Company or of the Regents of the University of California.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose on
|
||||
any computer system, and to alter it and redistribute it, subject
|
||||
to the following restrictions:
|
||||
|
||||
1. The author is not responsible for the consequences of use of this
|
||||
software, no matter how awful, even if they arise from flaws in it.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission. Since few users ever read sources,
|
||||
credits must appear in the documentation.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software. Since few users
|
||||
ever read sources, credits must appear in the documentation.
|
||||
|
||||
4. This notice may not be removed or altered.
|
|
@ -0,0 +1,138 @@
|
|||
# Generated automatically from Makefile.in by configure.
|
||||
CC = gcc
|
||||
CFLAGS1 = -g -O2
|
||||
|
||||
RANLIB = ranlib
|
||||
|
||||
# You probably want to take -DREDEBUG out of CFLAGS, and put something like
|
||||
# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
|
||||
# internal assertion checking and some debugging facilities).
|
||||
# Put -Dconst= in for a pre-ANSI compiler.
|
||||
# Do not take -DPOSIX_MISTAKE out.
|
||||
# REGCFLAGS isn't important to you (it's for my use in some special contexts).
|
||||
#CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
|
||||
CFLAGS=-I. -DPOSIX_MISTAKE $(REGCFLAGS) $(CFLAGS1)
|
||||
|
||||
# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want
|
||||
# the Berkeley __P macro, put -b in.
|
||||
MKHFLAGS=
|
||||
|
||||
# Flags for linking but not compiling, if any.
|
||||
LDFLAGS=
|
||||
|
||||
# Extra libraries for linking, if any.
|
||||
LIBS=
|
||||
|
||||
# Internal stuff, should not need changing.
|
||||
OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
|
||||
OBJS=$(OBJPRODN) split.o debug.o main.o
|
||||
H=cclass.h cname.h regex2.h utils.h
|
||||
REGSRC=regcomp.c regerror.c regexec.c regfree.c
|
||||
ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
|
||||
|
||||
# Stuff that matters only if you're trying to lint the package.
|
||||
LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
|
||||
LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
|
||||
JUNKLINT=possible pointer alignment|null effect
|
||||
|
||||
# arrangements to build forward-reference header files
|
||||
.SUFFIXES: .ih .h
|
||||
.c.ih:
|
||||
sh ./mkh $(MKHFLAGS) -p $< >$@
|
||||
|
||||
default: r
|
||||
|
||||
lib: purge $(OBJPRODN)
|
||||
rm -f libregex.a
|
||||
ar crv libregex.a $(OBJPRODN)
|
||||
$(RANLIB) libregex.a
|
||||
|
||||
purge:
|
||||
rm -f *.o
|
||||
|
||||
# stuff to build regex.h
|
||||
REGEXH=regex.h
|
||||
REGEXHSRC=regex2.h $(REGSRC)
|
||||
$(REGEXH): $(REGEXHSRC) mkh
|
||||
sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
|
||||
cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
|
||||
rm -f regex.tmp
|
||||
|
||||
# dependencies
|
||||
$(OBJPRODN) debug.o: utils.h regex.h regex2.h
|
||||
regcomp.o: cclass.h cname.h regcomp.ih
|
||||
regexec.o: engine.c engine.ih
|
||||
regerror.o: regerror.ih
|
||||
debug.o: debug.ih
|
||||
main.o: main.ih
|
||||
|
||||
# tester
|
||||
re: $(OBJS)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
|
||||
|
||||
# regression test
|
||||
r: re tests
|
||||
./re <tests
|
||||
./re -el <tests
|
||||
./re -er <tests
|
||||
|
||||
# 57 variants, and other stuff, for development use -- not useful to you
|
||||
ra: ./re tests
|
||||
-./re <tests
|
||||
-./re -el <tests
|
||||
-./re -er <tests
|
||||
|
||||
rx: ./re tests
|
||||
./re -x <tests
|
||||
./re -x -el <tests
|
||||
./re -x -er <tests
|
||||
|
||||
t: ./re tests
|
||||
-time ./re <tests
|
||||
-time ./re -cs <tests
|
||||
-time ./re -el <tests
|
||||
-time ./re -cs -el <tests
|
||||
|
||||
l: $(LINTC)
|
||||
lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
|
||||
|
||||
fullprint:
|
||||
ti README WHATSNEW notes todo | list
|
||||
ti *.h | list
|
||||
list *.c
|
||||
list regex.3 regex.7
|
||||
|
||||
print:
|
||||
ti README WHATSNEW notes todo | list
|
||||
ti *.h | list
|
||||
list reg*.c engine.c
|
||||
|
||||
|
||||
mf.tmp: Makefile
|
||||
sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
|
||||
|
||||
DTRH=cclass.h cname.h regex2.h utils.h
|
||||
PRE=COPYRIGHT README WHATSNEW
|
||||
POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
|
||||
FILES=$(PRE) Makefile $(POST)
|
||||
DTR=$(PRE) Makefile=mf.tmp $(POST)
|
||||
dtr: $(FILES) mf.tmp
|
||||
makedtr $(DTR) >$@
|
||||
rm mf.tmp
|
||||
|
||||
cio: $(FILES)
|
||||
cio $(FILES)
|
||||
|
||||
rdf: $(FILES)
|
||||
rcsdiff -c $(FILES) 2>&1 | p
|
||||
|
||||
# various forms of cleanup
|
||||
tidy:
|
||||
rm -f junk* core core.* *.core dtr *.tmp lint
|
||||
|
||||
clean: tidy
|
||||
rm -f *.o *.s *.ih re libregex.a
|
||||
|
||||
# don't do this one unless you know what you're doing
|
||||
spotless: clean
|
||||
rm -f mkh regex.h
|
|
@ -0,0 +1,138 @@
|
|||
VPATH = @srcdir@
|
||||
CC = @CC@
|
||||
CFLAGS1 = @CFLAGS1@
|
||||
|
||||
RANLIB = @RANLIB@
|
||||
|
||||
# You probably want to take -DREDEBUG out of CFLAGS, and put something like
|
||||
# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
|
||||
# internal assertion checking and some debugging facilities).
|
||||
# Put -Dconst= in for a pre-ANSI compiler.
|
||||
# Do not take -DPOSIX_MISTAKE out.
|
||||
# REGCFLAGS isn't important to you (it's for my use in some special contexts).
|
||||
#CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
|
||||
CFLAGS=-I. -DPOSIX_MISTAKE $(REGCFLAGS) $(CFLAGS1)
|
||||
|
||||
# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want
|
||||
# the Berkeley __P macro, put -b in.
|
||||
MKHFLAGS=
|
||||
|
||||
# Flags for linking but not compiling, if any.
|
||||
LDFLAGS=
|
||||
|
||||
# Extra libraries for linking, if any.
|
||||
LIBS=
|
||||
|
||||
# Internal stuff, should not need changing.
|
||||
OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
|
||||
OBJS=$(OBJPRODN) split.o debug.o main.o
|
||||
H=cclass.h cname.h regex2.h utils.h
|
||||
REGSRC=regcomp.c regerror.c regexec.c regfree.c
|
||||
ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
|
||||
|
||||
# Stuff that matters only if you're trying to lint the package.
|
||||
LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
|
||||
LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
|
||||
JUNKLINT=possible pointer alignment|null effect
|
||||
|
||||
# arrangements to build forward-reference header files
|
||||
.SUFFIXES: .ih .h
|
||||
.c.ih:
|
||||
sh ./mkh $(MKHFLAGS) -p $< >$@
|
||||
|
||||
default: r
|
||||
|
||||
lib: purge $(OBJPRODN)
|
||||
rm -f libregex.a
|
||||
ar crv libregex.a $(OBJPRODN)
|
||||
$(RANLIB) libregex.a
|
||||
|
||||
purge:
|
||||
rm -f *.o
|
||||
|
||||
# stuff to build regex.h
|
||||
REGEXH=regex.h
|
||||
REGEXHSRC=regex2.h $(REGSRC)
|
||||
$(REGEXH): $(REGEXHSRC) mkh
|
||||
sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
|
||||
cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
|
||||
rm -f regex.tmp
|
||||
|
||||
# dependencies
|
||||
$(OBJPRODN) debug.o: utils.h regex.h regex2.h
|
||||
regcomp.o: cclass.h cname.h regcomp.ih
|
||||
regexec.o: engine.c engine.ih
|
||||
regerror.o: regerror.ih
|
||||
debug.o: debug.ih
|
||||
main.o: main.ih
|
||||
|
||||
# tester
|
||||
re: $(OBJS)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
|
||||
|
||||
# regression test
|
||||
r: re tests
|
||||
./re <tests
|
||||
./re -el <tests
|
||||
./re -er <tests
|
||||
|
||||
# 57 variants, and other stuff, for development use -- not useful to you
|
||||
ra: ./re tests
|
||||
-./re <tests
|
||||
-./re -el <tests
|
||||
-./re -er <tests
|
||||
|
||||
rx: ./re tests
|
||||
./re -x <tests
|
||||
./re -x -el <tests
|
||||
./re -x -er <tests
|
||||
|
||||
t: ./re tests
|
||||
-time ./re <tests
|
||||
-time ./re -cs <tests
|
||||
-time ./re -el <tests
|
||||
-time ./re -cs -el <tests
|
||||
|
||||
l: $(LINTC)
|
||||
lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
|
||||
|
||||
fullprint:
|
||||
ti README WHATSNEW notes todo | list
|
||||
ti *.h | list
|
||||
list *.c
|
||||
list regex.3 regex.7
|
||||
|
||||
print:
|
||||
ti README WHATSNEW notes todo | list
|
||||
ti *.h | list
|
||||
list reg*.c engine.c
|
||||
|
||||
|
||||
mf.tmp: Makefile
|
||||
sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
|
||||
|
||||
DTRH=cclass.h cname.h regex2.h utils.h
|
||||
PRE=COPYRIGHT README WHATSNEW
|
||||
POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
|
||||
FILES=$(PRE) Makefile $(POST)
|
||||
DTR=$(PRE) Makefile=mf.tmp $(POST)
|
||||
dtr: $(FILES) mf.tmp
|
||||
makedtr $(DTR) >$@
|
||||
rm mf.tmp
|
||||
|
||||
cio: $(FILES)
|
||||
cio $(FILES)
|
||||
|
||||
rdf: $(FILES)
|
||||
rcsdiff -c $(FILES) 2>&1 | p
|
||||
|
||||
# various forms of cleanup
|
||||
tidy:
|
||||
rm -f junk* core core.* *.core dtr *.tmp lint
|
||||
|
||||
clean: tidy
|
||||
rm -f *.o *.s *.ih re libregex.a
|
||||
|
||||
# don't do this one unless you know what you're doing
|
||||
spotless: clean
|
||||
rm -f mkh regex.h
|
|
@ -0,0 +1,32 @@
|
|||
alpha3.7 release.
|
||||
Fri Nov 21 13:25:21 EST 1997
|
||||
henry@zoo.toronto.edu
|
||||
|
||||
See WHATSNEW for change listing.
|
||||
|
||||
installation notes:
|
||||
--------
|
||||
Read the comments at the beginning of Makefile before running.
|
||||
|
||||
Utils.h contains some things that just might have to be modified on
|
||||
some systems, as well as a nested include (ugh) of <assert.h>.
|
||||
|
||||
The "fake" directory contains quick-and-dirty fakes for some header
|
||||
files and routines that old systems may not have. Note also that
|
||||
-DUSEBCOPY will make utils.h substitute bcopy() for memmove().
|
||||
|
||||
After that, "make r" will build regcomp.o, regexec.o, regfree.o,
|
||||
and regerror.o (the actual routines), bundle them together into a test
|
||||
program, and run regression tests on them. No output is good output.
|
||||
|
||||
"make lib" builds just the .o files for the actual routines (when
|
||||
you're happy with testing and have adjusted CFLAGS for production),
|
||||
and puts them together into libregex.a. You can pick up either the
|
||||
library or *.o ("make lib" makes sure there are no other .o files left
|
||||
around to confuse things).
|
||||
|
||||
Main.c, debug.c, split.c are used for regression testing but are not part
|
||||
of the RE routines themselves.
|
||||
|
||||
Regex.h goes in /usr/include. All other .h files are internal only.
|
||||
--------
|
|
@ -0,0 +1,105 @@
|
|||
New in alpha3.7: A bit of cleanup aimed at maximizing portability,
|
||||
possibly at slight cost in efficiency. "ul" suffixes and "unsigned long"
|
||||
no longer appear, in particular.
|
||||
|
||||
New in alpha3.6: A couple more portability glitches fixed.
|
||||
|
||||
New in alpha3.5: Active development of this code has been stopped --
|
||||
I'm working on a complete reimplementation -- but folks have found some
|
||||
minor portability glitches and the like, hence this release to fix them.
|
||||
One penalty: slightly reduced compatibility with old compilers, because
|
||||
the ANSI C `unsigned long' type and `ul' constant suffix are used in a
|
||||
few places (I could avoid this but it would be considerably more work).
|
||||
|
||||
New in alpha3.4: The complex bug alluded to below has been fixed (in a
|
||||
slightly kludgey temporary way that may hurt efficiency a bit; this is
|
||||
another "get it out the door for 4.4" release). The tests at the end of
|
||||
the tests file have accordingly been uncommented. The primary sign of
|
||||
the bug was that something like a?b matching ab matched b rather than ab.
|
||||
(The bug was essentially specific to this exact situation, else it would
|
||||
have shown up earlier.)
|
||||
|
||||
New in alpha3.3: The definition of word boundaries has been altered
|
||||
slightly, to more closely match the usual programming notion that "_"
|
||||
is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir,
|
||||
and the makefile no longer alludes to it in mysterious ways. The
|
||||
makefile has generally been cleaned up some. Fixes have been made
|
||||
(again!) so that the regression test will run without -DREDEBUG, at
|
||||
the cost of weaker checking. A workaround for a bug in some folks'
|
||||
<assert.h> has been added. And some more things have been added to
|
||||
tests, including a couple right at the end which are commented out
|
||||
because the code currently flunks them (complex bug; fix coming).
|
||||
Plus the usual minor cleanup.
|
||||
|
||||
New in alpha3.2: Assorted bits of cleanup and portability improvement
|
||||
(the development base is now a BSDI system using GCC instead of an ancient
|
||||
Sun system, and the newer compiler exposed some glitches). Fix for a
|
||||
serious bug that affected REs using many [] (including REG_ICASE REs
|
||||
because of the way they are implemented), *sometimes*, depending on
|
||||
memory-allocation patterns. The header-file prototypes no longer name
|
||||
the parameters, avoiding possible name conflicts. The possibility that
|
||||
some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
|
||||
now handled gracefully. "uchar" is no longer used as an internal type
|
||||
name (too many people have the same idea). Still the same old lousy
|
||||
performance, alas.
|
||||
|
||||
New in alpha3.1: Basically nothing, this release is just a bookkeeping
|
||||
convenience. Stay tuned.
|
||||
|
||||
New in alpha3.0: Performance is no better, alas, but some fixes have been
|
||||
made and some functionality has been added. (This is basically the "get
|
||||
it out the door in time for 4.4" release.) One bug fix: regfree() didn't
|
||||
free the main internal structure (how embarrassing). It is now possible
|
||||
to put NULs in either the RE or the target string, using (resp.) a new
|
||||
REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to
|
||||
regcomp() makes all characters ordinary, so you can match a literal
|
||||
string easily (this will become more useful when performance improves!).
|
||||
There are now primitives to match beginnings and ends of words, although
|
||||
the syntax is disgusting and so is the implementation. The REG_ATOI
|
||||
debugging interface has changed a bit. And there has been considerable
|
||||
internal cleanup of various kinds.
|
||||
|
||||
New in alpha2.3: Split change list out of README, and moved flags notes
|
||||
into Makefile. Macro-ized the name of regex(7) in regex(3), since it has
|
||||
to change for 4.4BSD. Cleanup work in engine.c, and some new regression
|
||||
tests to catch tricky cases thereof.
|
||||
|
||||
New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two
|
||||
small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
|
||||
in my own test program and might be useful to others for similar purposes.
|
||||
The regression test will now compile (and run) without REDEBUG. The
|
||||
BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now.
|
||||
Char/uchar parameters are now written int/unsigned, to avoid possible
|
||||
portability problems with unpromoted parameters. Some unsigned casts have
|
||||
been introduced to minimize portability problems with shifting into sign
|
||||
bits.
|
||||
|
||||
New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big
|
||||
thing is that regex.h is now generated, using mkh, rather than being
|
||||
supplied in the distribution; due to circularities in dependencies,
|
||||
you have to build regex.h explicitly by "make h". The two known bugs
|
||||
have been fixed (and the regression test now checks for them), as has a
|
||||
problem with assertions not being suppressed in the absence of REDEBUG.
|
||||
No performance work yet.
|
||||
|
||||
New in alpha2: Backslash-anything is an ordinary character, not an
|
||||
error (except, of course, for the handful of backslashed metacharacters
|
||||
in BREs), which should reduce script breakage. The regression test
|
||||
checks *where* null strings are supposed to match, and has generally
|
||||
been tightened up somewhat. Small bug fixes in parameter passing (not
|
||||
harmful, but technically errors) and some other areas. Debugging
|
||||
invoked by defining REDEBUG rather than not defining NDEBUG.
|
||||
|
||||
New in alpha+3: full prototyping for internal routines, using a little
|
||||
helper program, mkh, which extracts prototypes given in stylized comments.
|
||||
More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple
|
||||
pre-screening of input when a literal string is known to be part of the
|
||||
RE; this does wonders for performance.
|
||||
|
||||
New in alpha+2: minor bits of cleanup. Notably, the number "32" for the
|
||||
word width isn't hardwired into regexec.c any more, the public header
|
||||
file prototypes the functions if __STDC__ is defined, and some small typos
|
||||
in the manpages have been fixed.
|
||||
|
||||
New in alpha+1: improvements to the manual pages, and an important
|
||||
extension, the REG_STARTEND option to regexec().
|
|
@ -0,0 +1,31 @@
|
|||
/* character-class table */
|
||||
static struct cclass {
|
||||
char *name;
|
||||
char *chars;
|
||||
char *multis;
|
||||
} cclasses[] = {
|
||||
"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789", "",
|
||||
"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
||||
"",
|
||||
"blank", " \t", "",
|
||||
"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
|
||||
\25\26\27\30\31\32\33\34\35\36\37\177", "",
|
||||
"digit", "0123456789", "",
|
||||
"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
"",
|
||||
"lower", "abcdefghijklmnopqrstuvwxyz",
|
||||
"",
|
||||
"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
|
||||
"",
|
||||
"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
"",
|
||||
"space", "\t\n\v\f\r ", "",
|
||||
"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
"",
|
||||
"xdigit", "0123456789ABCDEFabcdef",
|
||||
"",
|
||||
NULL, 0, ""
|
||||
};
|
|
@ -0,0 +1,102 @@
|
|||
/* character-name table */
|
||||
static struct cname {
|
||||
char *name;
|
||||
char code;
|
||||
} cnames[] = {
|
||||
"NUL", '\0',
|
||||
"SOH", '\001',
|
||||
"STX", '\002',
|
||||
"ETX", '\003',
|
||||
"EOT", '\004',
|
||||
"ENQ", '\005',
|
||||
"ACK", '\006',
|
||||
"BEL", '\007',
|
||||
"alert", '\007',
|
||||
"BS", '\010',
|
||||
"backspace", '\b',
|
||||
"HT", '\011',
|
||||
"tab", '\t',
|
||||
"LF", '\012',
|
||||
"newline", '\n',
|
||||
"VT", '\013',
|
||||
"vertical-tab", '\v',
|
||||
"FF", '\014',
|
||||
"form-feed", '\f',
|
||||
"CR", '\015',
|
||||
"carriage-return", '\r',
|
||||
"SO", '\016',
|
||||
"SI", '\017',
|
||||
"DLE", '\020',
|
||||
"DC1", '\021',
|
||||
"DC2", '\022',
|
||||
"DC3", '\023',
|
||||
"DC4", '\024',
|
||||
"NAK", '\025',
|
||||
"SYN", '\026',
|
||||
"ETB", '\027',
|
||||
"CAN", '\030',
|
||||
"EM", '\031',
|
||||
"SUB", '\032',
|
||||
"ESC", '\033',
|
||||
"IS4", '\034',
|
||||
"FS", '\034',
|
||||
"IS3", '\035',
|
||||
"GS", '\035',
|
||||
"IS2", '\036',
|
||||
"RS", '\036',
|
||||
"IS1", '\037',
|
||||
"US", '\037',
|
||||
"space", ' ',
|
||||
"exclamation-mark", '!',
|
||||
"quotation-mark", '"',
|
||||
"number-sign", '#',
|
||||
"dollar-sign", '$',
|
||||
"percent-sign", '%',
|
||||
"ampersand", '&',
|
||||
"apostrophe", '\'',
|
||||
"left-parenthesis", '(',
|
||||
"right-parenthesis", ')',
|
||||
"asterisk", '*',
|
||||
"plus-sign", '+',
|
||||
"comma", ',',
|
||||
"hyphen", '-',
|
||||
"hyphen-minus", '-',
|
||||
"period", '.',
|
||||
"full-stop", '.',
|
||||
"slash", '/',
|
||||
"solidus", '/',
|
||||
"zero", '0',
|
||||
"one", '1',
|
||||
"two", '2',
|
||||
"three", '3',
|
||||
"four", '4',
|
||||
"five", '5',
|
||||
"six", '6',
|
||||
"seven", '7',
|
||||
"eight", '8',
|
||||
"nine", '9',
|
||||
"colon", ':',
|
||||
"semicolon", ';',
|
||||
"less-than-sign", '<',
|
||||
"equals-sign", '=',
|
||||
"greater-than-sign", '>',
|
||||
"question-mark", '?',
|
||||
"commercial-at", '@',
|
||||
"left-square-bracket", '[',
|
||||
"backslash", '\\',
|
||||
"reverse-solidus", '\\',
|
||||
"right-square-bracket", ']',
|
||||
"circumflex", '^',
|
||||
"circumflex-accent", '^',
|
||||
"underscore", '_',
|
||||
"low-line", '_',
|
||||
"grave-accent", '`',
|
||||
"left-brace", '{',
|
||||
"left-curly-bracket", '{',
|
||||
"vertical-line", '|',
|
||||
"right-brace", '}',
|
||||
"right-curly-bracket", '}',
|
||||
"tilde", '~',
|
||||
"DEL", '\177',
|
||||
NULL, 0,
|
||||
};
|
|
@ -0,0 +1,242 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
#include "debug.ih"
|
||||
|
||||
/*
|
||||
- regprint - print a regexp for debugging
|
||||
== void regprint(regex_t *r, FILE *d);
|
||||
*/
|
||||
void
|
||||
regprint(r, d)
|
||||
regex_t *r;
|
||||
FILE *d;
|
||||
{
|
||||
register struct re_guts *g = r->re_g;
|
||||
register int i;
|
||||
register int c;
|
||||
register int last;
|
||||
int nincat[NC];
|
||||
|
||||
fprintf(d, "%ld states, %d categories", (long)g->nstates,
|
||||
g->ncategories);
|
||||
fprintf(d, ", first %ld last %ld", (long)g->firststate,
|
||||
(long)g->laststate);
|
||||
if (g->iflags&USEBOL)
|
||||
fprintf(d, ", USEBOL");
|
||||
if (g->iflags&USEEOL)
|
||||
fprintf(d, ", USEEOL");
|
||||
if (g->iflags&BAD)
|
||||
fprintf(d, ", BAD");
|
||||
if (g->nsub > 0)
|
||||
fprintf(d, ", nsub=%ld", (long)g->nsub);
|
||||
if (g->must != NULL)
|
||||
fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
|
||||
g->must);
|
||||
if (g->backrefs)
|
||||
fprintf(d, ", backrefs");
|
||||
if (g->nplus > 0)
|
||||
fprintf(d, ", nplus %ld", (long)g->nplus);
|
||||
fprintf(d, "\n");
|
||||
s_print(g, d);
|
||||
for (i = 0; i < g->ncategories; i++) {
|
||||
nincat[i] = 0;
|
||||
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
|
||||
if (g->categories[c] == i)
|
||||
nincat[i]++;
|
||||
}
|
||||
fprintf(d, "cc0#%d", nincat[0]);
|
||||
for (i = 1; i < g->ncategories; i++)
|
||||
if (nincat[i] == 1) {
|
||||
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
|
||||
if (g->categories[c] == i)
|
||||
break;
|
||||
fprintf(d, ", %d=%s", i, regchar(c));
|
||||
}
|
||||
fprintf(d, "\n");
|
||||
for (i = 1; i < g->ncategories; i++)
|
||||
if (nincat[i] != 1) {
|
||||
fprintf(d, "cc%d\t", i);
|
||||
last = -1;
|
||||
for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
|
||||
if (c <= CHAR_MAX && g->categories[c] == i) {
|
||||
if (last < 0) {
|
||||
fprintf(d, "%s", regchar(c));
|
||||
last = c;
|
||||
}
|
||||
} else {
|
||||
if (last >= 0) {
|
||||
if (last != c-1)
|
||||
fprintf(d, "-%s",
|
||||
regchar(c-1));
|
||||
last = -1;
|
||||
}
|
||||
}
|
||||
fprintf(d, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
- s_print - print the strip for debugging
|
||||
== static void s_print(register struct re_guts *g, FILE *d);
|
||||
*/
|
||||
static void
|
||||
s_print(g, d)
|
||||
register struct re_guts *g;
|
||||
FILE *d;
|
||||
{
|
||||
register sop *s;
|
||||
register cset *cs;
|
||||
register int i;
|
||||
register int done = 0;
|
||||
register sop opnd;
|
||||
register int col = 0;
|
||||
register int last;
|
||||
register sopno offset = 2;
|
||||
# define GAP() { if (offset % 5 == 0) { \
|
||||
if (col > 40) { \
|
||||
fprintf(d, "\n\t"); \
|
||||
col = 0; \
|
||||
} else { \
|
||||
fprintf(d, " "); \
|
||||
col++; \
|
||||
} \
|
||||
} else \
|
||||
col++; \
|
||||
offset++; \
|
||||
}
|
||||
|
||||
if (OP(g->strip[0]) != OEND)
|
||||
fprintf(d, "missing initial OEND!\n");
|
||||
for (s = &g->strip[1]; !done; s++) {
|
||||
opnd = OPND(*s);
|
||||
switch (OP(*s)) {
|
||||
case OEND:
|
||||
fprintf(d, "\n");
|
||||
done = 1;
|
||||
break;
|
||||
case OCHAR:
|
||||
if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
|
||||
fprintf(d, "\\%c", (char)opnd);
|
||||
else
|
||||
fprintf(d, "%s", regchar((char)opnd));
|
||||
break;
|
||||
case OBOL:
|
||||
fprintf(d, "^");
|
||||
break;
|
||||
case OEOL:
|
||||
fprintf(d, "$");
|
||||
break;
|
||||
case OBOW:
|
||||
fprintf(d, "\\{");
|
||||
break;
|
||||
case OEOW:
|
||||
fprintf(d, "\\}");
|
||||
break;
|
||||
case OANY:
|
||||
fprintf(d, ".");
|
||||
break;
|
||||
case OANYOF:
|
||||
fprintf(d, "[(%ld)", (long)opnd);
|
||||
cs = &g->sets[opnd];
|
||||
last = -1;
|
||||
for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
|
||||
if (CHIN(cs, i) && i < g->csetsize) {
|
||||
if (last < 0) {
|
||||
fprintf(d, "%s", regchar(i));
|
||||
last = i;
|
||||
}
|
||||
} else {
|
||||
if (last >= 0) {
|
||||
if (last != i-1)
|
||||
fprintf(d, "-%s",
|
||||
regchar(i-1));
|
||||
last = -1;
|
||||
}
|
||||
}
|
||||
fprintf(d, "]");
|
||||
break;
|
||||
case OBACK_:
|
||||
fprintf(d, "(\\<%ld>", (long)opnd);
|
||||
break;
|
||||
case O_BACK:
|
||||
fprintf(d, "<%ld>\\)", (long)opnd);
|
||||
break;
|
||||
case OPLUS_:
|
||||
fprintf(d, "(+");
|
||||
if (OP(*(s+opnd)) != O_PLUS)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
break;
|
||||
case O_PLUS:
|
||||
if (OP(*(s-opnd)) != OPLUS_)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
fprintf(d, "+)");
|
||||
break;
|
||||
case OQUEST_:
|
||||
fprintf(d, "(?");
|
||||
if (OP(*(s+opnd)) != O_QUEST)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
break;
|
||||
case O_QUEST:
|
||||
if (OP(*(s-opnd)) != OQUEST_)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
fprintf(d, "?)");
|
||||
break;
|
||||
case OLPAREN:
|
||||
fprintf(d, "((<%ld>", (long)opnd);
|
||||
break;
|
||||
case ORPAREN:
|
||||
fprintf(d, "<%ld>))", (long)opnd);
|
||||
break;
|
||||
case OCH_:
|
||||
fprintf(d, "<");
|
||||
if (OP(*(s+opnd)) != OOR2)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
break;
|
||||
case OOR1:
|
||||
if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
fprintf(d, "|");
|
||||
break;
|
||||
case OOR2:
|
||||
fprintf(d, "|");
|
||||
if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
break;
|
||||
case O_CH:
|
||||
if (OP(*(s-opnd)) != OOR1)
|
||||
fprintf(d, "<%ld>", (long)opnd);
|
||||
fprintf(d, ">");
|
||||
break;
|
||||
default:
|
||||
fprintf(d, "!%d(%d)!", OP(*s), opnd);
|
||||
break;
|
||||
}
|
||||
if (!done)
|
||||
GAP();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
- regchar - make a character printable
|
||||
== static char *regchar(int ch);
|
||||
*/
|
||||
static char * /* -> representation */
|
||||
regchar(ch)
|
||||
int ch;
|
||||
{
|
||||
static char buf[10];
|
||||
|
||||
if (isprint(ch) || ch == ' ')
|
||||
sprintf(buf, "%c", ch);
|
||||
else
|
||||
sprintf(buf, "\\%o", ch);
|
||||
return(buf);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,35 @@
|
|||
/* ========= begin header generated by ./mkh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === engine.c === */
|
||||
static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
|
||||
#define BOL (OUT+1)
|
||||
#define EOL (BOL+1)
|
||||
#define BOLEOL (BOL+2)
|
||||
#define NOTHING (BOL+3)
|
||||
#define BOW (BOL+4)
|
||||
#define EOW (BOL+5)
|
||||
#define CODEMAX (BOL+5) /* highest code used */
|
||||
#define NONCHAR(c) ((c) > CHAR_MAX)
|
||||
#define NNONCHAR (CODEMAX-CHAR_MAX)
|
||||
#ifdef REDEBUG
|
||||
static void print(struct match *m, char *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static char *pchar(int ch);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
|
@ -0,0 +1,510 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "main.ih"
|
||||
|
||||
char *progname;
|
||||
int debug = 0;
|
||||
int line = 0;
|
||||
int status = 0;
|
||||
|
||||
int copts = REG_EXTENDED;
|
||||
int eopts = 0;
|
||||
regoff_t startoff = 0;
|
||||
regoff_t endoff = 0;
|
||||
|
||||
|
||||
extern int split();
|
||||
extern void regprint();
|
||||
|
||||
/*
|
||||
- main - do the simple case, hand off to regress() for regression
|
||||
*/
|
||||
main(argc, argv)
|
||||
int argc;
|
||||
char *argv[];
|
||||
{
|
||||
regex_t re;
|
||||
# define NS 10
|
||||
regmatch_t subs[NS];
|
||||
char erbuf[100];
|
||||
int err;
|
||||
size_t len;
|
||||
int c;
|
||||
int errflg = 0;
|
||||
register int i;
|
||||
extern int optind;
|
||||
extern char *optarg;
|
||||
|
||||
progname = argv[0];
|
||||
|
||||
while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
|
||||
switch (c) {
|
||||
case 'c': /* compile options */
|
||||
copts = options('c', optarg);
|
||||
break;
|
||||
case 'e': /* execute options */
|
||||
eopts = options('e', optarg);
|
||||
break;
|
||||
case 'S': /* start offset */
|
||||
startoff = (regoff_t)atoi(optarg);
|
||||
break;
|
||||
case 'E': /* end offset */
|
||||
endoff = (regoff_t)atoi(optarg);
|
||||
break;
|
||||
case 'x': /* Debugging. */
|
||||
debug++;
|
||||
break;
|
||||
case '?':
|
||||
default:
|
||||
errflg++;
|
||||
break;
|
||||
}
|
||||
if (errflg) {
|
||||
fprintf(stderr, "usage: %s ", progname);
|
||||
fprintf(stderr, "[-c copt][-C][-d] [re]\n");
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (optind >= argc) {
|
||||
regress(stdin);
|
||||
exit(status);
|
||||
}
|
||||
|
||||
err = regcomp(&re, argv[optind++], copts);
|
||||
if (err) {
|
||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
||||
eprint(err), len, sizeof(erbuf), erbuf);
|
||||
exit(status);
|
||||
}
|
||||
regprint(&re, stdout);
|
||||
|
||||
if (optind >= argc) {
|
||||
regfree(&re);
|
||||
exit(status);
|
||||
}
|
||||
|
||||
if (eopts®_STARTEND) {
|
||||
subs[0].rm_so = startoff;
|
||||
subs[0].rm_eo = strlen(argv[optind]) - endoff;
|
||||
}
|
||||
err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
|
||||
if (err) {
|
||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
||||
eprint(err), len, sizeof(erbuf), erbuf);
|
||||
exit(status);
|
||||
}
|
||||
if (!(copts®_NOSUB)) {
|
||||
len = (int)(subs[0].rm_eo - subs[0].rm_so);
|
||||
if (subs[0].rm_so != -1) {
|
||||
if (len != 0)
|
||||
printf("match `%.*s'\n", len,
|
||||
argv[optind] + subs[0].rm_so);
|
||||
else
|
||||
printf("match `'@%.1s\n",
|
||||
argv[optind] + subs[0].rm_so);
|
||||
}
|
||||
for (i = 1; i < NS; i++)
|
||||
if (subs[i].rm_so != -1)
|
||||
printf("(%d) `%.*s'\n", i,
|
||||
(int)(subs[i].rm_eo - subs[i].rm_so),
|
||||
argv[optind] + subs[i].rm_so);
|
||||
}
|
||||
exit(status);
|
||||
}
|
||||
|
||||
/*
|
||||
- regress - main loop of regression test
|
||||
== void regress(FILE *in);
|
||||
*/
|
||||
void
|
||||
regress(in)
|
||||
FILE *in;
|
||||
{
|
||||
char inbuf[1000];
|
||||
# define MAXF 10
|
||||
char *f[MAXF];
|
||||
int nf;
|
||||
int i;
|
||||
char erbuf[100];
|
||||
size_t ne;
|
||||
char *badpat = "invalid regular expression";
|
||||
# define SHORT 10
|
||||
char *bpname = "REG_BADPAT";
|
||||
regex_t re;
|
||||
|
||||
while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
|
||||
line++;
|
||||
if (inbuf[0] == '#' || inbuf[0] == '\n')
|
||||
continue; /* NOTE CONTINUE */
|
||||
inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
|
||||
if (debug)
|
||||
fprintf(stdout, "%d:\n", line);
|
||||
nf = split(inbuf, f, MAXF, "\t\t");
|
||||
if (nf < 3) {
|
||||
fprintf(stderr, "bad input, line %d\n", line);
|
||||
exit(1);
|
||||
}
|
||||
for (i = 0; i < nf; i++)
|
||||
if (strcmp(f[i], "\"\"") == 0)
|
||||
f[i] = "";
|
||||
if (nf <= 3)
|
||||
f[3] = NULL;
|
||||
if (nf <= 4)
|
||||
f[4] = NULL;
|
||||
try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
|
||||
if (opt('&', f[1])) /* try with either type of RE */
|
||||
try(f[0], f[1], f[2], f[3], f[4],
|
||||
options('c', f[1]) &~ REG_EXTENDED);
|
||||
}
|
||||
|
||||
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
|
||||
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
|
||||
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
|
||||
erbuf, badpat);
|
||||
status = 1;
|
||||
}
|
||||
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
|
||||
if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
|
||||
ne != strlen(badpat)+1) {
|
||||
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
|
||||
erbuf, SHORT-1, badpat);
|
||||
status = 1;
|
||||
}
|
||||
ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
|
||||
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
|
||||
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
|
||||
erbuf, bpname);
|
||||
status = 1;
|
||||
}
|
||||
re.re_endp = bpname;
|
||||
ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
|
||||
if (atoi(erbuf) != (int)REG_BADPAT) {
|
||||
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
|
||||
erbuf, (long)REG_BADPAT);
|
||||
status = 1;
|
||||
} else if (ne != strlen(erbuf)+1) {
|
||||
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
|
||||
erbuf, (long)REG_BADPAT);
|
||||
status = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
- try - try it, and report on problems
|
||||
== void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
|
||||
*/
|
||||
void
|
||||
try(f0, f1, f2, f3, f4, opts)
|
||||
char *f0;
|
||||
char *f1;
|
||||
char *f2;
|
||||
char *f3;
|
||||
char *f4;
|
||||
int opts; /* may not match f1 */
|
||||
{
|
||||
regex_t re;
|
||||
# define NSUBS 10
|
||||
regmatch_t subs[NSUBS];
|
||||
# define NSHOULD 15
|
||||
char *should[NSHOULD];
|
||||
int nshould;
|
||||
char erbuf[100];
|
||||
int err;
|
||||
int len;
|
||||
char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
|
||||
register int i;
|
||||
char *grump;
|
||||
char f0copy[1000];
|
||||
char f2copy[1000];
|
||||
|
||||
strcpy(f0copy, f0);
|
||||
re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
|
||||
fixstr(f0copy);
|
||||
err = regcomp(&re, f0copy, opts);
|
||||
if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
|
||||
/* unexpected error or wrong error */
|
||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
|
||||
line, type, eprint(err), len,
|
||||
sizeof(erbuf), erbuf);
|
||||
status = 1;
|
||||
} else if (err == 0 && opt('C', f1)) {
|
||||
/* unexpected success */
|
||||
fprintf(stderr, "%d: %s should have given REG_%s\n",
|
||||
line, type, f2);
|
||||
status = 1;
|
||||
err = 1; /* so we won't try regexec */
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
regfree(&re);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(f2copy, f2);
|
||||
fixstr(f2copy);
|
||||
|
||||
if (options('e', f1)®_STARTEND) {
|
||||
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
|
||||
fprintf(stderr, "%d: bad STARTEND syntax\n", line);
|
||||
subs[0].rm_so = strchr(f2, '(') - f2 + 1;
|
||||
subs[0].rm_eo = strchr(f2, ')') - f2;
|
||||
}
|
||||
err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
|
||||
|
||||
if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
|
||||
/* unexpected error or wrong error */
|
||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
|
||||
line, type, eprint(err), len,
|
||||
sizeof(erbuf), erbuf);
|
||||
status = 1;
|
||||
} else if (err != 0) {
|
||||
/* nothing more to check */
|
||||
} else if (f3 == NULL) {
|
||||
/* unexpected success */
|
||||
fprintf(stderr, "%d: %s exec should have failed\n",
|
||||
line, type);
|
||||
status = 1;
|
||||
err = 1; /* just on principle */
|
||||
} else if (opts®_NOSUB) {
|
||||
/* nothing more to check */
|
||||
} else if ((grump = check(f2, subs[0], f3)) != NULL) {
|
||||
fprintf(stderr, "%d: %s %s\n", line, type, grump);
|
||||
status = 1;
|
||||
err = 1;
|
||||
}
|
||||
|
||||
if (err != 0 || f4 == NULL) {
|
||||
regfree(&re);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 1; i < NSHOULD; i++)
|
||||
should[i] = NULL;
|
||||
nshould = split(f4, should+1, NSHOULD-1, ",");
|
||||
if (nshould == 0) {
|
||||
nshould = 1;
|
||||
should[1] = "";
|
||||
}
|
||||
for (i = 1; i < NSUBS; i++) {
|
||||
grump = check(f2, subs[i], should[i]);
|
||||
if (grump != NULL) {
|
||||
fprintf(stderr, "%d: %s $%d %s\n", line,
|
||||
type, i, grump);
|
||||
status = 1;
|
||||
err = 1;
|
||||
}
|
||||
}
|
||||
|
||||
regfree(&re);
|
||||
}
|
||||
|
||||
/*
|
||||
- options - pick options out of a regression-test string
|
||||
== int options(int type, char *s);
|
||||
*/
|
||||
int
|
||||
options(type, s)
|
||||
int type; /* 'c' compile, 'e' exec */
|
||||
char *s;
|
||||
{
|
||||
register char *p;
|
||||
register int o = (type == 'c') ? copts : eopts;
|
||||
register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
|
||||
|
||||
for (p = s; *p != '\0'; p++)
|
||||
if (strchr(legal, *p) != NULL)
|
||||
switch (*p) {
|
||||
case 'b':
|
||||
o &= ~REG_EXTENDED;
|
||||
break;
|
||||
case 'i':
|
||||
o |= REG_ICASE;
|
||||
break;
|
||||
case 's':
|
||||
o |= REG_NOSUB;
|
||||
break;
|
||||
case 'n':
|
||||
o |= REG_NEWLINE;
|
||||
break;
|
||||
case 'm':
|
||||
o &= ~REG_EXTENDED;
|
||||
o |= REG_NOSPEC;
|
||||
break;
|
||||
case 'p':
|
||||
o |= REG_PEND;
|
||||
break;
|
||||
case '^':
|
||||
o |= REG_NOTBOL;
|
||||
break;
|
||||
case '$':
|
||||
o |= REG_NOTEOL;
|
||||
break;
|
||||
case '#':
|
||||
o |= REG_STARTEND;
|
||||
break;
|
||||
case 't': /* trace */
|
||||
o |= REG_TRACE;
|
||||
break;
|
||||
case 'l': /* force long representation */
|
||||
o |= REG_LARGE;
|
||||
break;
|
||||
case 'r': /* force backref use */
|
||||
o |= REG_BACKR;
|
||||
break;
|
||||
}
|
||||
return(o);
|
||||
}
|
||||
|
||||
/*
|
||||
- opt - is a particular option in a regression string?
|
||||
== int opt(int c, char *s);
|
||||
*/
|
||||
int /* predicate */
|
||||
opt(c, s)
|
||||
int c;
|
||||
char *s;
|
||||
{
|
||||
return(strchr(s, c) != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
- fixstr - transform magic characters in strings
|
||||
== void fixstr(register char *p);
|
||||
*/
|
||||
void
|
||||
fixstr(p)
|
||||
register char *p;
|
||||
{
|
||||
if (p == NULL)
|
||||
return;
|
||||
|
||||
for (; *p != '\0'; p++)
|
||||
if (*p == 'N')
|
||||
*p = '\n';
|
||||
else if (*p == 'T')
|
||||
*p = '\t';
|
||||
else if (*p == 'S')
|
||||
*p = ' ';
|
||||
else if (*p == 'Z')
|
||||
*p = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
- check - check a substring match
|
||||
== char *check(char *str, regmatch_t sub, char *should);
|
||||
*/
|
||||
char * /* NULL or complaint */
|
||||
check(str, sub, should)
|
||||
char *str;
|
||||
regmatch_t sub;
|
||||
char *should;
|
||||
{
|
||||
register int len;
|
||||
register int shlen;
|
||||
register char *p;
|
||||
static char grump[500];
|
||||
register char *at = NULL;
|
||||
|
||||
if (should != NULL && strcmp(should, "-") == 0)
|
||||
should = NULL;
|
||||
if (should != NULL && should[0] == '@') {
|
||||
at = should + 1;
|
||||
should = "";
|
||||
}
|
||||
|
||||
/* check rm_so and rm_eo for consistency */
|
||||
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
|
||||
(sub.rm_so != -1 && sub.rm_eo == -1) ||
|
||||
(sub.rm_so != -1 && sub.rm_so < 0) ||
|
||||
(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
|
||||
sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
|
||||
(long)sub.rm_eo);
|
||||
return(grump);
|
||||
}
|
||||
|
||||
/* check for no match */
|
||||
if (sub.rm_so == -1 && should == NULL)
|
||||
return(NULL);
|
||||
if (sub.rm_so == -1)
|
||||
return("did not match");
|
||||
|
||||
/* check for in range */
|
||||
if (sub.rm_eo > strlen(str)) {
|
||||
sprintf(grump, "start %ld end %ld, past end of string",
|
||||
(long)sub.rm_so, (long)sub.rm_eo);
|
||||
return(grump);
|
||||
}
|
||||
|
||||
len = (int)(sub.rm_eo - sub.rm_so);
|
||||
shlen = (int)strlen(should);
|
||||
p = str + sub.rm_so;
|
||||
|
||||
/* check for not supposed to match */
|
||||
if (should == NULL) {
|
||||
sprintf(grump, "matched `%.*s'", len, p);
|
||||
return(grump);
|
||||
}
|
||||
|
||||
/* check for wrong match */
|
||||
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
|
||||
sprintf(grump, "matched `%.*s' instead", len, p);
|
||||
return(grump);
|
||||
}
|
||||
if (shlen > 0)
|
||||
return(NULL);
|
||||
|
||||
/* check null match in right place */
|
||||
if (at == NULL)
|
||||
return(NULL);
|
||||
shlen = strlen(at);
|
||||
if (shlen == 0)
|
||||
shlen = 1; /* force check for end-of-string */
|
||||
if (strncmp(p, at, shlen) != 0) {
|
||||
sprintf(grump, "matched null at `%.20s'", p);
|
||||
return(grump);
|
||||
}
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
- eprint - convert error number to name
|
||||
== static char *eprint(int err);
|
||||
*/
|
||||
static char *
|
||||
eprint(err)
|
||||
int err;
|
||||
{
|
||||
static char epbuf[100];
|
||||
size_t len;
|
||||
|
||||
len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
||||
assert(len <= sizeof(epbuf));
|
||||
return(epbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
- efind - convert error name to number
|
||||
== static int efind(char *name);
|
||||
*/
|
||||
static int
|
||||
efind(name)
|
||||
char *name;
|
||||
{
|
||||
static char efbuf[100];
|
||||
size_t n;
|
||||
regex_t re;
|
||||
|
||||
sprintf(efbuf, "REG_%s", name);
|
||||
assert(strlen(efbuf) < sizeof(efbuf));
|
||||
re.re_endp = efbuf;
|
||||
(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
|
||||
return(atoi(efbuf));
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
#! /bin/sh
|
||||
# mkh - pull headers out of C source
|
||||
# PATH=/bin:/usr/bin ; export PATH
|
||||
|
||||
# egrep pattern to pick out marked lines
|
||||
egrep='^ =([ ]|$)'
|
||||
|
||||
# Sed program to process marked lines into lines for the header file.
|
||||
# The markers have already been removed. Two things are done here: removal
|
||||
# of backslashed newlines, and some fudging of comments. The first is done
|
||||
# because -o needs to have prototypes on one line to strip them down.
|
||||
# Getting comments into the output is tricky; we turn C++-style // comments
|
||||
# into /* */ comments, after altering any existing */'s to avoid trouble.
|
||||
peel=' /\\$/N
|
||||
/\\\n[ ]*/s///g
|
||||
/\/\//s;\*/;* /;g
|
||||
/\/\//s;//\(.*\);/*\1 */;'
|
||||
|
||||
for a
|
||||
do
|
||||
case "$a" in
|
||||
-o) # old (pre-function-prototype) compiler
|
||||
# add code to comment out argument lists
|
||||
peel="$peel
|
||||
"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);'
|
||||
shift
|
||||
;;
|
||||
-b) # funny Berkeley __P macro
|
||||
peel="$peel
|
||||
"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));'
|
||||
shift
|
||||
;;
|
||||
-s) # compiler doesn't like `static foo();'
|
||||
# add code to get rid of the `static'
|
||||
peel="$peel
|
||||
"'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;'
|
||||
shift
|
||||
;;
|
||||
-p) # private declarations
|
||||
egrep='^ ==([ ]|$)'
|
||||
shift
|
||||
;;
|
||||
-i) # wrap in #ifndef, argument is name
|
||||
ifndef="$2"
|
||||
shift ; shift
|
||||
;;
|
||||
*) break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if test " $ifndef" != " "
|
||||
then
|
||||
echo "#ifndef $ifndef"
|
||||
echo "#define $ifndef /* never again */"
|
||||
fi
|
||||
echo "/* ========= begin header generated by $0 ========= */"
|
||||
echo '#ifdef __cplusplus'
|
||||
echo 'extern "C" {'
|
||||
echo '#endif'
|
||||
for f
|
||||
do
|
||||
echo
|
||||
echo "/* === $f === */"
|
||||
egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel"
|
||||
echo
|
||||
done
|
||||
echo '#ifdef __cplusplus'
|
||||
echo '}'
|
||||
echo '#endif'
|
||||
echo "/* ========= end header generated by $0 ========= */"
|
||||
if test " $ifndef" != " "
|
||||
then
|
||||
echo "#endif"
|
||||
fi
|
||||
exit 0
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,51 @@
|
|||
/* ========= begin header generated by ./mkh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === regcomp.c === */
|
||||
static void p_ere(register struct parse *p, int stop);
|
||||
static void p_ere_exp(register struct parse *p);
|
||||
static void p_str(register struct parse *p);
|
||||
static void p_bre(register struct parse *p, register int end1, register int end2);
|
||||
static int p_simp_re(register struct parse *p, int starordinary);
|
||||
static int p_count(register struct parse *p);
|
||||
static void p_bracket(register struct parse *p);
|
||||
static void p_b_term(register struct parse *p, register cset *cs);
|
||||
static void p_b_cclass(register struct parse *p, register cset *cs);
|
||||
static void p_b_eclass(register struct parse *p, register cset *cs);
|
||||
static char p_b_symbol(register struct parse *p);
|
||||
static char p_b_coll_elem(register struct parse *p, int endc);
|
||||
static char othercase(int ch);
|
||||
static void bothcases(register struct parse *p, int ch);
|
||||
static void ordinary(register struct parse *p, register int ch);
|
||||
static void nonnewline(register struct parse *p);
|
||||
static void repeat(register struct parse *p, sopno start, int from, int to);
|
||||
static int seterr(register struct parse *p, int e);
|
||||
static cset *allocset(register struct parse *p);
|
||||
static void freeset(register struct parse *p, register cset *cs);
|
||||
static int freezeset(register struct parse *p, register cset *cs);
|
||||
static int firstch(register struct parse *p, register cset *cs);
|
||||
static int nch(register struct parse *p, register cset *cs);
|
||||
static void mcadd(register struct parse *p, register cset *cs, register char *cp);
|
||||
static void mcsub(register cset *cs, register char *cp);
|
||||
static int mcin(register cset *cs, register char *cp);
|
||||
static char *mcfind(register cset *cs, register char *cp);
|
||||
static void mcinvert(register struct parse *p, register cset *cs);
|
||||
static void mccase(register struct parse *p, register cset *cs);
|
||||
static int isinsets(register struct re_guts *g, int c);
|
||||
static int samesets(register struct re_guts *g, int c1, int c2);
|
||||
static void categorize(struct parse *p, register struct re_guts *g);
|
||||
static sopno dupl(register struct parse *p, sopno start, sopno finish);
|
||||
static void doemit(register struct parse *p, sop op, size_t opnd);
|
||||
static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
|
||||
static void dofwd(register struct parse *p, sopno pos, sop value);
|
||||
static void enlarge(register struct parse *p, sopno size);
|
||||
static void stripsnug(register struct parse *p, register struct re_guts *g);
|
||||
static void findmust(register struct parse *p, register struct re_guts *g);
|
||||
static sopno pluscount(register struct parse *p, register struct re_guts *g);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
Binary file not shown.
|
@ -0,0 +1,126 @@
|
|||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regerror.ih"
|
||||
|
||||
/*
|
||||
= #define REG_OKAY 0
|
||||
= #define REG_NOMATCH 1
|
||||
= #define REG_BADPAT 2
|
||||
= #define REG_ECOLLATE 3
|
||||
= #define REG_ECTYPE 4
|
||||
= #define REG_EESCAPE 5
|
||||
= #define REG_ESUBREG 6
|
||||
= #define REG_EBRACK 7
|
||||
= #define REG_EPAREN 8
|
||||
= #define REG_EBRACE 9
|
||||
= #define REG_BADBR 10
|
||||
= #define REG_ERANGE 11
|
||||
= #define REG_ESPACE 12
|
||||
= #define REG_BADRPT 13
|
||||
= #define REG_EMPTY 14
|
||||
= #define REG_ASSERT 15
|
||||
= #define REG_INVARG 16
|
||||
= #define REG_ATOI 255 // convert name to number (!)
|
||||
= #define REG_ITOA 0400 // convert number to name (!)
|
||||
*/
|
||||
static struct rerr {
|
||||
int code;
|
||||
char *name;
|
||||
char *explain;
|
||||
} rerrs[] = {
|
||||
REG_OKAY, "REG_OKAY", "no errors detected",
|
||||
REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match",
|
||||
REG_BADPAT, "REG_BADPAT", "invalid regular expression",
|
||||
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element",
|
||||
REG_ECTYPE, "REG_ECTYPE", "invalid character class",
|
||||
REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)",
|
||||
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number",
|
||||
REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced",
|
||||
REG_EPAREN, "REG_EPAREN", "parentheses not balanced",
|
||||
REG_EBRACE, "REG_EBRACE", "braces not balanced",
|
||||
REG_BADBR, "REG_BADBR", "invalid repetition count(s)",
|
||||
REG_ERANGE, "REG_ERANGE", "invalid character range",
|
||||
REG_ESPACE, "REG_ESPACE", "out of memory",
|
||||
REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid",
|
||||
REG_EMPTY, "REG_EMPTY", "empty (sub)expression",
|
||||
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug",
|
||||
REG_INVARG, "REG_INVARG", "invalid argument to regex routine",
|
||||
-1, "", "*** unknown regexp error code ***",
|
||||
};
|
||||
|
||||
/*
|
||||
- regerror - the interface to error numbers
|
||||
= extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
size_t
|
||||
regerror(errcode, preg, errbuf, errbuf_size)
|
||||
int errcode;
|
||||
const regex_t *preg;
|
||||
char *errbuf;
|
||||
size_t errbuf_size;
|
||||
{
|
||||
register struct rerr *r;
|
||||
register size_t len;
|
||||
register int target = errcode &~ REG_ITOA;
|
||||
register char *s;
|
||||
char convbuf[50];
|
||||
|
||||
if (errcode == REG_ATOI)
|
||||
s = regatoi(preg, convbuf);
|
||||
else {
|
||||
for (r = rerrs; r->code >= 0; r++)
|
||||
if (r->code == target)
|
||||
break;
|
||||
|
||||
if (errcode®_ITOA) {
|
||||
if (r->code >= 0)
|
||||
(void) strcpy(convbuf, r->name);
|
||||
else
|
||||
sprintf(convbuf, "REG_0x%x", target);
|
||||
assert(strlen(convbuf) < sizeof(convbuf));
|
||||
s = convbuf;
|
||||
} else
|
||||
s = r->explain;
|
||||
}
|
||||
|
||||
len = strlen(s) + 1;
|
||||
if (errbuf_size > 0) {
|
||||
if (errbuf_size > len)
|
||||
(void) strcpy(errbuf, s);
|
||||
else {
|
||||
(void) strncpy(errbuf, s, errbuf_size-1);
|
||||
errbuf[errbuf_size-1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
return(len);
|
||||
}
|
||||
|
||||
/*
|
||||
- regatoi - internal routine to implement REG_ATOI
|
||||
== static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
*/
|
||||
static char *
|
||||
regatoi(preg, localbuf)
|
||||
const regex_t *preg;
|
||||
char *localbuf;
|
||||
{
|
||||
register struct rerr *r;
|
||||
|
||||
for (r = rerrs; r->code >= 0; r++)
|
||||
if (strcmp(r->name, preg->re_endp) == 0)
|
||||
break;
|
||||
if (r->code < 0)
|
||||
return("0");
|
||||
|
||||
sprintf(localbuf, "%d", r->code);
|
||||
return(localbuf);
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/* ========= begin header generated by ./mkh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === regerror.c === */
|
||||
static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
|
@ -0,0 +1,509 @@
|
|||
.TH REGEX 3 "25 Sept 1997"
|
||||
.BY "Henry Spencer"
|
||||
.de ZR
|
||||
.\" one other place knows this name: the SEE ALSO section
|
||||
.IR regex (7) \\$1
|
||||
..
|
||||
.SH NAME
|
||||
regcomp, regexec, regerror, regfree \- regular-expression library
|
||||
.SH SYNOPSIS
|
||||
.ft B
|
||||
.\".na
|
||||
#include <sys/types.h>
|
||||
.br
|
||||
#include <regex.h>
|
||||
.HP 10
|
||||
int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags);
|
||||
.HP
|
||||
int\ regexec(const\ regex_t\ *preg, const\ char\ *string,
|
||||
size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags);
|
||||
.HP
|
||||
size_t\ regerror(int\ errcode, const\ regex_t\ *preg,
|
||||
char\ *errbuf, size_t\ errbuf_size);
|
||||
.HP
|
||||
void\ regfree(regex_t\ *preg);
|
||||
.\".ad
|
||||
.ft
|
||||
.SH DESCRIPTION
|
||||
These routines implement POSIX 1003.2 regular expressions (``RE''s);
|
||||
see
|
||||
.ZR .
|
||||
.I Regcomp
|
||||
compiles an RE written as a string into an internal form,
|
||||
.I regexec
|
||||
matches that internal form against a string and reports results,
|
||||
.I regerror
|
||||
transforms error codes from either into human-readable messages,
|
||||
and
|
||||
.I regfree
|
||||
frees any dynamically-allocated storage used by the internal form
|
||||
of an RE.
|
||||
.PP
|
||||
The header
|
||||
.I <regex.h>
|
||||
declares two structure types,
|
||||
.I regex_t
|
||||
and
|
||||
.IR regmatch_t ,
|
||||
the former for compiled internal forms and the latter for match reporting.
|
||||
It also declares the four functions,
|
||||
a type
|
||||
.IR regoff_t ,
|
||||
and a number of constants with names starting with ``REG_''.
|
||||
.PP
|
||||
.I Regcomp
|
||||
compiles the regular expression contained in the
|
||||
.I pattern
|
||||
string,
|
||||
subject to the flags in
|
||||
.IR cflags ,
|
||||
and places the results in the
|
||||
.I regex_t
|
||||
structure pointed to by
|
||||
.IR preg .
|
||||
.I Cflags
|
||||
is the bitwise OR of zero or more of the following flags:
|
||||
.IP REG_EXTENDED \w'REG_EXTENDED'u+2n
|
||||
Compile modern (``extended'') REs,
|
||||
rather than the obsolete (``basic'') REs that
|
||||
are the default.
|
||||
.IP REG_BASIC
|
||||
This is a synonym for 0,
|
||||
provided as a counterpart to REG_EXTENDED to improve readability.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.IP REG_NOSPEC
|
||||
Compile with recognition of all special characters turned off.
|
||||
All characters are thus considered ordinary,
|
||||
so the ``RE'' is a literal string.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
REG_EXTENDED and REG_NOSPEC may not be used
|
||||
in the same call to
|
||||
.IR regcomp .
|
||||
.IP REG_ICASE
|
||||
Compile for matching that ignores upper/lower case distinctions.
|
||||
See
|
||||
.ZR .
|
||||
.IP REG_NOSUB
|
||||
Compile for matching that need only report success or failure,
|
||||
not what was matched.
|
||||
.IP REG_NEWLINE
|
||||
Compile for newline-sensitive matching.
|
||||
By default, newline is a completely ordinary character with no special
|
||||
meaning in either REs or strings.
|
||||
With this flag,
|
||||
`[^' bracket expressions and `.' never match newline,
|
||||
a `^' anchor matches the null string after any newline in the string
|
||||
in addition to its normal function,
|
||||
and the `$' anchor matches the null string before any newline in the
|
||||
string in addition to its normal function.
|
||||
.IP REG_PEND
|
||||
The regular expression ends,
|
||||
not at the first NUL,
|
||||
but just before the character pointed to by the
|
||||
.I re_endp
|
||||
member of the structure pointed to by
|
||||
.IR preg .
|
||||
The
|
||||
.I re_endp
|
||||
member is of type
|
||||
.IR const\ char\ * .
|
||||
This flag permits inclusion of NULs in the RE;
|
||||
they are considered ordinary characters.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.PP
|
||||
When successful,
|
||||
.I regcomp
|
||||
returns 0 and fills in the structure pointed to by
|
||||
.IR preg .
|
||||
One member of that structure
|
||||
(other than
|
||||
.IR re_endp )
|
||||
is publicized:
|
||||
.IR re_nsub ,
|
||||
of type
|
||||
.IR size_t ,
|
||||
contains the number of parenthesized subexpressions within the RE
|
||||
(except that the value of this member is undefined if the
|
||||
REG_NOSUB flag was used).
|
||||
If
|
||||
.I regcomp
|
||||
fails, it returns a non-zero error code;
|
||||
see DIAGNOSTICS.
|
||||
.PP
|
||||
.I Regexec
|
||||
matches the compiled RE pointed to by
|
||||
.I preg
|
||||
against the
|
||||
.IR string ,
|
||||
subject to the flags in
|
||||
.IR eflags ,
|
||||
and reports results using
|
||||
.IR nmatch ,
|
||||
.IR pmatch ,
|
||||
and the returned value.
|
||||
The RE must have been compiled by a previous invocation of
|
||||
.IR regcomp .
|
||||
The compiled form is not altered during execution of
|
||||
.IR regexec ,
|
||||
so a single compiled RE can be used simultaneously by multiple threads.
|
||||
.PP
|
||||
By default,
|
||||
the NUL-terminated string pointed to by
|
||||
.I string
|
||||
is considered to be the text of an entire line,
|
||||
with the NUL indicating the end of the line.
|
||||
(That is,
|
||||
any other end-of-line marker is considered to have been removed
|
||||
and replaced by the NUL.)
|
||||
The
|
||||
.I eflags
|
||||
argument is the bitwise OR of zero or more of the following flags:
|
||||
.IP REG_NOTBOL \w'REG_STARTEND'u+2n
|
||||
The first character of
|
||||
the string
|
||||
is not the beginning of a line, so the `^' anchor should not match before it.
|
||||
This does not affect the behavior of newlines under REG_NEWLINE.
|
||||
.IP REG_NOTEOL
|
||||
The NUL terminating
|
||||
the string
|
||||
does not end a line, so the `$' anchor should not match before it.
|
||||
This does not affect the behavior of newlines under REG_NEWLINE.
|
||||
.IP REG_STARTEND
|
||||
The string is considered to start at
|
||||
\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR
|
||||
and to have a terminating NUL located at
|
||||
\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR
|
||||
(there need not actually be a NUL at that location),
|
||||
regardless of the value of
|
||||
.IR nmatch .
|
||||
See below for the definition of
|
||||
.IR pmatch
|
||||
and
|
||||
.IR nmatch .
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL;
|
||||
REG_STARTEND affects only the location of the string,
|
||||
not how it is matched.
|
||||
.PP
|
||||
See
|
||||
.ZR
|
||||
for a discussion of what is matched in situations where an RE or a
|
||||
portion thereof could match any of several substrings of
|
||||
.IR string .
|
||||
.PP
|
||||
Normally,
|
||||
.I regexec
|
||||
returns 0 for success and the non-zero code REG_NOMATCH for failure.
|
||||
Other non-zero error codes may be returned in exceptional situations;
|
||||
see DIAGNOSTICS.
|
||||
.PP
|
||||
If REG_NOSUB was specified in the compilation of the RE,
|
||||
or if
|
||||
.I nmatch
|
||||
is 0,
|
||||
.I regexec
|
||||
ignores the
|
||||
.I pmatch
|
||||
argument (but see below for the case where REG_STARTEND is specified).
|
||||
Otherwise,
|
||||
.I pmatch
|
||||
points to an array of
|
||||
.I nmatch
|
||||
structures of type
|
||||
.IR regmatch_t .
|
||||
Such a structure has at least the members
|
||||
.I rm_so
|
||||
and
|
||||
.IR rm_eo ,
|
||||
both of type
|
||||
.I regoff_t
|
||||
(a signed arithmetic type at least as large as an
|
||||
.I off_t
|
||||
and a
|
||||
.IR ssize_t ),
|
||||
containing respectively the offset of the first character of a substring
|
||||
and the offset of the first character after the end of the substring.
|
||||
Offsets are measured from the beginning of the
|
||||
.I string
|
||||
argument given to
|
||||
.IR regexec .
|
||||
An empty substring is denoted by equal offsets,
|
||||
both indicating the character following the empty substring.
|
||||
.PP
|
||||
The 0th member of the
|
||||
.I pmatch
|
||||
array is filled in to indicate what substring of
|
||||
.I string
|
||||
was matched by the entire RE.
|
||||
Remaining members report what substring was matched by parenthesized
|
||||
subexpressions within the RE;
|
||||
member
|
||||
.I i
|
||||
reports subexpression
|
||||
.IR i ,
|
||||
with subexpressions counted (starting at 1) by the order of their opening
|
||||
parentheses in the RE, left to right.
|
||||
Unused entries in the array\(emcorresponding either to subexpressions that
|
||||
did not participate in the match at all, or to subexpressions that do not
|
||||
exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both
|
||||
.I rm_so
|
||||
and
|
||||
.I rm_eo
|
||||
set to \-1.
|
||||
If a subexpression participated in the match several times,
|
||||
the reported substring is the last one it matched.
|
||||
(Note, as an example in particular, that when the RE `(b*)+' matches `bbb',
|
||||
the parenthesized subexpression matches the three `b's and then
|
||||
an infinite number of empty strings following the last `b',
|
||||
so the reported substring is one of the empties.)
|
||||
.PP
|
||||
If REG_STARTEND is specified,
|
||||
.I pmatch
|
||||
must point to at least one
|
||||
.I regmatch_t
|
||||
(even if
|
||||
.I nmatch
|
||||
is 0 or REG_NOSUB was specified),
|
||||
to hold the input offsets for REG_STARTEND.
|
||||
Use for output is still entirely controlled by
|
||||
.IR nmatch ;
|
||||
if
|
||||
.I nmatch
|
||||
is 0 or REG_NOSUB was specified,
|
||||
the value of
|
||||
.IR pmatch [0]
|
||||
will not be changed by a successful
|
||||
.IR regexec .
|
||||
.PP
|
||||
.I Regerror
|
||||
maps a non-zero
|
||||
.I errcode
|
||||
from either
|
||||
.I regcomp
|
||||
or
|
||||
.I regexec
|
||||
to a human-readable, printable message.
|
||||
If
|
||||
.I preg
|
||||
is non-NULL,
|
||||
the error code should have arisen from use of
|
||||
the
|
||||
.I regex_t
|
||||
pointed to by
|
||||
.IR preg ,
|
||||
and if the error code came from
|
||||
.IR regcomp ,
|
||||
it should have been the result from the most recent
|
||||
.I regcomp
|
||||
using that
|
||||
.IR regex_t .
|
||||
.RI ( Regerror
|
||||
may be able to supply a more detailed message using information
|
||||
from the
|
||||
.IR regex_t .)
|
||||
.I Regerror
|
||||
places the NUL-terminated message into the buffer pointed to by
|
||||
.IR errbuf ,
|
||||
limiting the length (including the NUL) to at most
|
||||
.I errbuf_size
|
||||
bytes.
|
||||
If the whole message won't fit,
|
||||
as much of it as will fit before the terminating NUL is supplied.
|
||||
In any case,
|
||||
the returned value is the size of buffer needed to hold the whole
|
||||
message (including terminating NUL).
|
||||
If
|
||||
.I errbuf_size
|
||||
is 0,
|
||||
.I errbuf
|
||||
is ignored but the return value is still correct.
|
||||
.PP
|
||||
If the
|
||||
.I errcode
|
||||
given to
|
||||
.I regerror
|
||||
is first ORed with REG_ITOA,
|
||||
the ``message'' that results is the printable name of the error code,
|
||||
e.g. ``REG_NOMATCH'',
|
||||
rather than an explanation thereof.
|
||||
If
|
||||
.I errcode
|
||||
is REG_ATOI,
|
||||
then
|
||||
.I preg
|
||||
shall be non-NULL and the
|
||||
.I re_endp
|
||||
member of the structure it points to
|
||||
must point to the printable name of an error code;
|
||||
in this case, the result in
|
||||
.I errbuf
|
||||
is the decimal digits of
|
||||
the numeric value of the error code
|
||||
(0 if the name is not recognized).
|
||||
REG_ITOA and REG_ATOI are intended primarily as debugging facilities;
|
||||
they are extensions,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
Be warned also that they are considered experimental and changes are possible.
|
||||
.PP
|
||||
.I Regfree
|
||||
frees any dynamically-allocated storage associated with the compiled RE
|
||||
pointed to by
|
||||
.IR preg .
|
||||
The remaining
|
||||
.I regex_t
|
||||
is no longer a valid compiled RE
|
||||
and the effect of supplying it to
|
||||
.I regexec
|
||||
or
|
||||
.I regerror
|
||||
is undefined.
|
||||
.PP
|
||||
None of these functions references global variables except for tables
|
||||
of constants;
|
||||
all are safe for use from multiple threads if the arguments are safe.
|
||||
.SH IMPLEMENTATION CHOICES
|
||||
There are a number of decisions that 1003.2 leaves up to the implementor,
|
||||
either by explicitly saying ``undefined'' or by virtue of them being
|
||||
forbidden by the RE grammar.
|
||||
This implementation treats them as follows.
|
||||
.PP
|
||||
See
|
||||
.ZR
|
||||
for a discussion of the definition of case-independent matching.
|
||||
.PP
|
||||
There is no particular limit on the length of REs,
|
||||
except insofar as memory is limited.
|
||||
Memory usage is approximately linear in RE size, and largely insensitive
|
||||
to RE complexity, except for bounded repetitions.
|
||||
See BUGS for one short RE using them
|
||||
that will run almost any system out of memory.
|
||||
.PP
|
||||
A backslashed character other than one specifically given a magic meaning
|
||||
by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs)
|
||||
is taken as an ordinary character.
|
||||
.PP
|
||||
Any unmatched [ is a REG_EBRACK error.
|
||||
.PP
|
||||
Equivalence classes cannot begin or end bracket-expression ranges.
|
||||
The endpoint of one range cannot begin another.
|
||||
.PP
|
||||
RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255.
|
||||
.PP
|
||||
A repetition operator (?, *, +, or bounds) cannot follow another
|
||||
repetition operator.
|
||||
A repetition operator cannot begin an expression or subexpression
|
||||
or follow `^' or `|'.
|
||||
.PP
|
||||
`|' cannot appear first or last in a (sub)expression or after another `|',
|
||||
i.e. an operand of `|' cannot be an empty subexpression.
|
||||
An empty parenthesized subexpression, `()', is legal and matches an
|
||||
empty (sub)string.
|
||||
An empty string is not a legal RE.
|
||||
.PP
|
||||
A `{' followed by a digit is considered the beginning of bounds for a
|
||||
bounded repetition, which must then follow the syntax for bounds.
|
||||
A `{' \fInot\fR followed by a digit is considered an ordinary character.
|
||||
.PP
|
||||
`^' and `$' beginning and ending subexpressions in obsolete (``basic'')
|
||||
REs are anchors, not ordinary characters.
|
||||
.SH SEE ALSO
|
||||
grep(1), regex(7)
|
||||
.PP
|
||||
POSIX 1003.2, sections 2.8 (Regular Expression Notation)
|
||||
and
|
||||
B.5 (C Binding for Regular Expression Matching).
|
||||
.SH DIAGNOSTICS
|
||||
Non-zero error codes from
|
||||
.I regcomp
|
||||
and
|
||||
.I regexec
|
||||
include the following:
|
||||
.PP
|
||||
.nf
|
||||
.ta \w'REG_ECOLLATE'u+3n
|
||||
REG_NOMATCH regexec() failed to match
|
||||
REG_BADPAT invalid regular expression
|
||||
REG_ECOLLATE invalid collating element
|
||||
REG_ECTYPE invalid character class
|
||||
REG_EESCAPE \e applied to unescapable character
|
||||
REG_ESUBREG invalid backreference number
|
||||
REG_EBRACK brackets [ ] not balanced
|
||||
REG_EPAREN parentheses ( ) not balanced
|
||||
REG_EBRACE braces { } not balanced
|
||||
REG_BADBR invalid repetition count(s) in { }
|
||||
REG_ERANGE invalid character range in [ ]
|
||||
REG_ESPACE ran out of memory
|
||||
REG_BADRPT ?, *, or + operand invalid
|
||||
REG_EMPTY empty (sub)expression
|
||||
REG_ASSERT ``can't happen''\(emyou found a bug
|
||||
REG_INVARG invalid argument, e.g. negative-length string
|
||||
.fi
|
||||
.SH HISTORY
|
||||
Written by Henry Spencer,
|
||||
henry@zoo.toronto.edu.
|
||||
.SH BUGS
|
||||
This is an alpha release with known defects.
|
||||
Please report problems.
|
||||
.PP
|
||||
There is one known functionality bug.
|
||||
The implementation of internationalization is incomplete:
|
||||
the locale is always assumed to be the default one of 1003.2,
|
||||
and only the collating elements etc. of that locale are available.
|
||||
.PP
|
||||
The back-reference code is subtle and doubts linger about its correctness
|
||||
in complex cases.
|
||||
.PP
|
||||
.I Regexec
|
||||
performance is poor.
|
||||
This will improve with later releases.
|
||||
.I Nmatch
|
||||
exceeding 0 is expensive;
|
||||
.I nmatch
|
||||
exceeding 1 is worse.
|
||||
.I Regexec
|
||||
is largely insensitive to RE complexity \fIexcept\fR that back
|
||||
references are massively expensive.
|
||||
RE length does matter; in particular, there is a strong speed bonus
|
||||
for keeping RE length under about 30 characters,
|
||||
with most special characters counting roughly double.
|
||||
.PP
|
||||
.I Regcomp
|
||||
implements bounded repetitions by macro expansion,
|
||||
which is costly in time and space if counts are large
|
||||
or bounded repetitions are nested.
|
||||
An RE like, say,
|
||||
`((((a{1,100}){1,100}){1,100}){1,100}){1,100}'
|
||||
will (eventually) run almost any existing machine out of swap space.
|
||||
.PP
|
||||
There are suspected problems with response to obscure error conditions.
|
||||
Notably,
|
||||
certain kinds of internal overflow,
|
||||
produced only by truly enormous REs or by multiply nested bounded repetitions,
|
||||
are probably not handled well.
|
||||
.PP
|
||||
Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is
|
||||
a special character only in the presence of a previous unmatched `('.
|
||||
This can't be fixed until the spec is fixed.
|
||||
.PP
|
||||
The standard's definition of back references is vague.
|
||||
For example, does
|
||||
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?
|
||||
Until the standard is clarified,
|
||||
behavior in such cases should not be relied on.
|
||||
.PP
|
||||
The implementation of word-boundary matching is a bit of a kludge,
|
||||
and bugs may lurk in combinations of word-boundary matching and anchoring.
|
|
@ -0,0 +1,235 @@
|
|||
.TH REGEX 7 "25 Oct 1995"
|
||||
.BY "Henry Spencer"
|
||||
.SH NAME
|
||||
regex \- POSIX 1003.2 regular expressions
|
||||
.SH DESCRIPTION
|
||||
Regular expressions (``RE''s),
|
||||
as defined in POSIX 1003.2, come in two forms:
|
||||
modern REs (roughly those of
|
||||
.IR egrep ;
|
||||
1003.2 calls these ``extended'' REs)
|
||||
and obsolete REs (roughly those of
|
||||
.IR ed ;
|
||||
1003.2 ``basic'' REs).
|
||||
Obsolete REs mostly exist for backward compatibility in some old programs;
|
||||
they will be discussed at the end.
|
||||
1003.2 leaves some aspects of RE syntax and semantics open;
|
||||
`\(dg' marks decisions on these aspects that
|
||||
may not be fully portable to other 1003.2 implementations.
|
||||
.PP
|
||||
A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
|
||||
separated by `|'.
|
||||
It matches anything that matches one of the branches.
|
||||
.PP
|
||||
A branch is one\(dg or more \fIpieces\fR, concatenated.
|
||||
It matches a match for the first, followed by a match for the second, etc.
|
||||
.PP
|
||||
A piece is an \fIatom\fR possibly followed
|
||||
by a single\(dg `*', `+', `?', or \fIbound\fR.
|
||||
An atom followed by `*' matches a sequence of 0 or more matches of the atom.
|
||||
An atom followed by `+' matches a sequence of 1 or more matches of the atom.
|
||||
An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
|
||||
.PP
|
||||
A \fIbound\fR is `{' followed by an unsigned decimal integer,
|
||||
possibly followed by `,'
|
||||
possibly followed by another unsigned decimal integer,
|
||||
always followed by `}'.
|
||||
The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
|
||||
and if there are two of them, the first may not exceed the second.
|
||||
An atom followed by a bound containing one integer \fIi\fR
|
||||
and no comma matches
|
||||
a sequence of exactly \fIi\fR matches of the atom.
|
||||
An atom followed by a bound
|
||||
containing one integer \fIi\fR and a comma matches
|
||||
a sequence of \fIi\fR or more matches of the atom.
|
||||
An atom followed by a bound
|
||||
containing two integers \fIi\fR and \fIj\fR matches
|
||||
a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
|
||||
.PP
|
||||
An atom is a regular expression enclosed in `()' (matching a match for the
|
||||
regular expression),
|
||||
an empty set of `()' (matching the null string)\(dg,
|
||||
a \fIbracket expression\fR (see below), `.'
|
||||
(matching any single character), `^' (matching the null string at the
|
||||
beginning of a line), `$' (matching the null string at the
|
||||
end of a line), a `\e' followed by one of the characters
|
||||
`^.[$()|*+?{\e'
|
||||
(matching that character taken as an ordinary character),
|
||||
a `\e' followed by any other character\(dg
|
||||
(matching that character taken as an ordinary character,
|
||||
as if the `\e' had not been present\(dg),
|
||||
or a single character with no other significance (matching that character).
|
||||
A `{' followed by a character other than a digit is an ordinary
|
||||
character, not the beginning of a bound\(dg.
|
||||
It is illegal to end an RE with `\e'.
|
||||
.PP
|
||||
A \fIbracket expression\fR is a list of characters enclosed in `[]'.
|
||||
It normally matches any single character from the list (but see below).
|
||||
If the list begins with `^',
|
||||
it matches any single character
|
||||
(but see below) \fInot\fR from the rest of the list.
|
||||
If two characters in the list are separated by `\-', this is shorthand
|
||||
for the full \fIrange\fR of characters between those two (inclusive) in the
|
||||
collating sequence,
|
||||
e.g. `[0\-9]' in ASCII matches any decimal digit.
|
||||
It is illegal\(dg for two ranges to share an
|
||||
endpoint, e.g. `a\-c\-e'.
|
||||
Ranges are very collating-sequence-dependent,
|
||||
and portable programs should avoid relying on them.
|
||||
.PP
|
||||
To include a literal `]' in the list, make it the first character
|
||||
(following a possible `^').
|
||||
To include a literal `\-', make it the first or last character,
|
||||
or the second endpoint of a range.
|
||||
To use a literal `\-' as the first endpoint of a range,
|
||||
enclose it in `[.' and `.]' to make it a collating element (see below).
|
||||
With the exception of these and some combinations using `[' (see next
|
||||
paragraphs), all other special characters, including `\e', lose their
|
||||
special significance within a bracket expression.
|
||||
.PP
|
||||
Within a bracket expression, a collating element (a character,
|
||||
a multi-character sequence that collates as if it were a single character,
|
||||
or a collating-sequence name for either)
|
||||
enclosed in `[.' and `.]' stands for the
|
||||
sequence of characters of that collating element.
|
||||
The sequence is a single element of the bracket expression's list.
|
||||
A bracket expression containing a multi-character collating element
|
||||
can thus match more than one character,
|
||||
e.g. if the collating sequence includes a `ch' collating element,
|
||||
then the RE `[[.ch.]]*c' matches the first five characters
|
||||
of `chchcc'.
|
||||
.PP
|
||||
Within a bracket expression, a collating element enclosed in `[=' and
|
||||
`=]' is an equivalence class, standing for the sequences of characters
|
||||
of all collating elements equivalent to that one, including itself.
|
||||
(If there are no other equivalent collating elements,
|
||||
the treatment is as if the enclosing delimiters were `[.' and `.]'.)
|
||||
For example, if o and \o'o^' are the members of an equivalence class,
|
||||
then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
|
||||
An equivalence class may not\(dg be an endpoint
|
||||
of a range.
|
||||
.PP
|
||||
Within a bracket expression, the name of a \fIcharacter class\fR enclosed
|
||||
in `[:' and `:]' stands for the list of all characters belonging to that
|
||||
class.
|
||||
Standard character class names are:
|
||||
.PP
|
||||
.RS
|
||||
.nf
|
||||
.ta 3c 6c 9c
|
||||
alnum digit punct
|
||||
alpha graph space
|
||||
blank lower upper
|
||||
cntrl print xdigit
|
||||
.fi
|
||||
.RE
|
||||
.PP
|
||||
These stand for the character classes defined in
|
||||
.IR ctype (3).
|
||||
A locale may provide others.
|
||||
A character class may not be used as an endpoint of a range.
|
||||
.PP
|
||||
There are two special cases\(dg of bracket expressions:
|
||||
the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
|
||||
the beginning and end of a word respectively.
|
||||
A word is defined as a sequence of
|
||||
word characters
|
||||
which is neither preceded nor followed by
|
||||
word characters.
|
||||
A word character is an
|
||||
.I alnum
|
||||
character (as defined by
|
||||
.IR ctype (3))
|
||||
or an underscore.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.PP
|
||||
In the event that an RE could match more than one substring of a given
|
||||
string,
|
||||
the RE matches the one starting earliest in the string.
|
||||
If the RE could match more than one substring starting at that point,
|
||||
it matches the longest.
|
||||
Subexpressions also match the longest possible substrings, subject to
|
||||
the constraint that the whole match be as long as possible,
|
||||
with subexpressions starting earlier in the RE taking priority over
|
||||
ones starting later.
|
||||
Note that higher-level subexpressions thus take priority over
|
||||
their lower-level component subexpressions.
|
||||
.PP
|
||||
Match lengths are measured in characters, not collating elements.
|
||||
A null string is considered longer than no match at all.
|
||||
For example,
|
||||
`bb*' matches the three middle characters of `abbbc',
|
||||
`(wee|week)(knights|nights)' matches all ten characters of `weeknights',
|
||||
when `(.*).*' is matched against `abc' the parenthesized subexpression
|
||||
matches all three characters, and
|
||||
when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
|
||||
subexpression match the null string.
|
||||
.PP
|
||||
If case-independent matching is specified,
|
||||
the effect is much as if all case distinctions had vanished from the
|
||||
alphabet.
|
||||
When an alphabetic that exists in multiple cases appears as an
|
||||
ordinary character outside a bracket expression, it is effectively
|
||||
transformed into a bracket expression containing both cases,
|
||||
e.g. `x' becomes `[xX]'.
|
||||
When it appears inside a bracket expression, all case counterparts
|
||||
of it are added to the bracket expression, so that (e.g.) `[x]'
|
||||
becomes `[xX]' and `[^x]' becomes `[^xX]'.
|
||||
.PP
|
||||
No particular limit is imposed on the length of REs\(dg.
|
||||
Programs intended to be portable should not employ REs longer
|
||||
than 256 bytes,
|
||||
as an implementation can refuse to accept such REs and remain
|
||||
POSIX-compliant.
|
||||
.PP
|
||||
Obsolete (``basic'') regular expressions differ in several respects.
|
||||
`|', `+', and `?' are ordinary characters and there is no equivalent
|
||||
for their functionality.
|
||||
The delimiters for bounds are `\e{' and `\e}',
|
||||
with `{' and `}' by themselves ordinary characters.
|
||||
The parentheses for nested subexpressions are `\e(' and `\e)',
|
||||
with `(' and `)' by themselves ordinary characters.
|
||||
`^' is an ordinary character except at the beginning of the
|
||||
RE or\(dg the beginning of a parenthesized subexpression,
|
||||
`$' is an ordinary character except at the end of the
|
||||
RE or\(dg the end of a parenthesized subexpression,
|
||||
and `*' is an ordinary character if it appears at the beginning of the
|
||||
RE or the beginning of a parenthesized subexpression
|
||||
(after a possible leading `^').
|
||||
Finally, there is one new type of atom, a \fIback reference\fR:
|
||||
`\e' followed by a non-zero decimal digit \fId\fR
|
||||
matches the same sequence of characters
|
||||
matched by the \fId\fRth parenthesized subexpression
|
||||
(numbering subexpressions by the positions of their opening parentheses,
|
||||
left to right),
|
||||
so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
|
||||
.SH SEE ALSO
|
||||
regex(3)
|
||||
.PP
|
||||
POSIX 1003.2, section 2.8 (Regular Expression Notation).
|
||||
.SH HISTORY
|
||||
Written by Henry Spencer, based on the 1003.2 spec.
|
||||
.SH BUGS
|
||||
Having two kinds of REs is a botch.
|
||||
.PP
|
||||
The current 1003.2 spec says that `)' is an ordinary character in
|
||||
the absence of an unmatched `(';
|
||||
this was an unintentional result of a wording error,
|
||||
and change is likely.
|
||||
Avoid relying on it.
|
||||
.PP
|
||||
Back references are a dreadful botch,
|
||||
posing major problems for efficient implementations.
|
||||
They are also somewhat vaguely defined
|
||||
(does
|
||||
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
|
||||
Avoid using them.
|
||||
.PP
|
||||
1003.2's specification of case-independent matching is vague.
|
||||
The ``one case implies all cases'' definition given above
|
||||
is current consensus among implementors as to the right interpretation.
|
||||
.PP
|
||||
The syntax for word boundaries is incredibly ugly.
|
|
@ -0,0 +1,74 @@
|
|||
#ifndef _REGEX_H_
|
||||
#define _REGEX_H_ /* never again */
|
||||
/* ========= begin header generated by ./mkh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === regex2.h === */
|
||||
typedef off_t regoff_t;
|
||||
typedef struct {
|
||||
int re_magic;
|
||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
const char *re_endp; /* end pointer for REG_PEND */
|
||||
struct re_guts *re_g; /* none of your business :-) */
|
||||
} regex_t;
|
||||
typedef struct {
|
||||
regoff_t rm_so; /* start of match */
|
||||
regoff_t rm_eo; /* end of match */
|
||||
} regmatch_t;
|
||||
|
||||
|
||||
/* === regcomp.c === */
|
||||
extern int regcomp(regex_t *, const char *, int);
|
||||
#define REG_BASIC 0000
|
||||
#define REG_EXTENDED 0001
|
||||
#define REG_ICASE 0002
|
||||
#define REG_NOSUB 0004
|
||||
#define REG_NEWLINE 0010
|
||||
#define REG_NOSPEC 0020
|
||||
#define REG_PEND 0040
|
||||
#define REG_DUMP 0200
|
||||
|
||||
|
||||
/* === regerror.c === */
|
||||
#define REG_OKAY 0
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
#define REG_EMPTY 14
|
||||
#define REG_ASSERT 15
|
||||
#define REG_INVARG 16
|
||||
#define REG_ATOI 255 /* convert name to number (!) */
|
||||
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||
extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
|
||||
|
||||
/* === regexec.c === */
|
||||
extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
|
||||
#define REG_NOTBOL 00001
|
||||
#define REG_NOTEOL 00002
|
||||
#define REG_STARTEND 00004
|
||||
#define REG_TRACE 00400 /* tracing of execution */
|
||||
#define REG_LARGE 01000 /* force large representation */
|
||||
#define REG_BACKR 02000 /* force use of backref code */
|
||||
|
||||
|
||||
/* === regfree.c === */
|
||||
extern void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
||||
#endif
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* First, the stuff that ends up in the outside-world include file
|
||||
= typedef off_t regoff_t;
|
||||
= typedef struct {
|
||||
= int re_magic;
|
||||
= size_t re_nsub; // number of parenthesized subexpressions
|
||||
= const char *re_endp; // end pointer for REG_PEND
|
||||
= struct re_guts *re_g; // none of your business :-)
|
||||
= } regex_t;
|
||||
= typedef struct {
|
||||
= regoff_t rm_so; // start of match
|
||||
= regoff_t rm_eo; // end of match
|
||||
= } regmatch_t;
|
||||
*/
|
||||
/*
|
||||
* internals of regex_t
|
||||
*/
|
||||
#define MAGIC1 ((('r'^0200)<<8) | 'e')
|
||||
|
||||
/*
|
||||
* The internal representation is a *strip*, a sequence of
|
||||
* operators ending with an endmarker. (Some terminology etc. is a
|
||||
* historical relic of earlier versions which used multiple strips.)
|
||||
* Certain oddities in the representation are there to permit running
|
||||
* the machinery backwards; in particular, any deviation from sequential
|
||||
* flow must be marked at both its source and its destination. Some
|
||||
* fine points:
|
||||
*
|
||||
* - OPLUS_ and O_PLUS are *inside* the loop they create.
|
||||
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
|
||||
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
|
||||
* OOR1 and OOR2 are respectively the end and the beginning of one of
|
||||
* the branches. Note that there is an implicit OOR2 following OCH_
|
||||
* and an implicit OOR1 preceding O_CH.
|
||||
*
|
||||
* In state representations, an operator's bit is on to signify a state
|
||||
* immediately *preceding* "execution" of that operator.
|
||||
*/
|
||||
typedef long sop; /* strip operator */
|
||||
typedef long sopno;
|
||||
#define OPRMASK 0x7c000000
|
||||
#define OPDMASK 0x03ffffff
|
||||
#define OPSHIFT (26)
|
||||
#define OP(n) ((n)&OPRMASK)
|
||||
#define OPND(n) ((n)&OPDMASK)
|
||||
#define SOP(op, opnd) ((op)|(opnd))
|
||||
/* operators meaning operand */
|
||||
/* (back, fwd are offsets) */
|
||||
#define OEND (1<<OPSHIFT) /* endmarker - */
|
||||
#define OCHAR (2<<OPSHIFT) /* character unsigned char */
|
||||
#define OBOL (3<<OPSHIFT) /* left anchor - */
|
||||
#define OEOL (4<<OPSHIFT) /* right anchor - */
|
||||
#define OANY (5<<OPSHIFT) /* . - */
|
||||
#define OANYOF (6<<OPSHIFT) /* [...] set number */
|
||||
#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
|
||||
#define O_BACK (8<<OPSHIFT) /* end \d paren number */
|
||||
#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
|
||||
#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
|
||||
#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
|
||||
#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
|
||||
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
|
||||
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
|
||||
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
|
||||
#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
||||
#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
||||
#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
|
||||
#define OBOW (19<<OPSHIFT) /* begin word - */
|
||||
#define OEOW (20<<OPSHIFT) /* end word - */
|
||||
|
||||
/*
|
||||
* Structure for [] character-set representation. Character sets are
|
||||
* done as bit vectors, grouped 8 to a byte vector for compactness.
|
||||
* The individual set therefore has both a pointer to the byte vector
|
||||
* and a mask to pick out the relevant bit of each byte. A hash code
|
||||
* simplifies testing whether two sets could be identical.
|
||||
*
|
||||
* This will get trickier for multicharacter collating elements. As
|
||||
* preliminary hooks for dealing with such things, we also carry along
|
||||
* a string of multi-character elements, and decide the size of the
|
||||
* vectors at run time.
|
||||
*/
|
||||
typedef struct {
|
||||
uch *ptr; /* -> uch [csetsize] */
|
||||
uch mask; /* bit within array */
|
||||
uch hash; /* hash code */
|
||||
size_t smultis;
|
||||
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
||||
} cset;
|
||||
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
||||
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
||||
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
||||
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
||||
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
||||
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
||||
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
||||
|
||||
/* stuff for character categories */
|
||||
typedef unsigned char cat_t;
|
||||
|
||||
/*
|
||||
* main compiled-expression structure
|
||||
*/
|
||||
struct re_guts {
|
||||
int magic;
|
||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||
sop *strip; /* malloced area for strip */
|
||||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
sopno nstates; /* = number of sops */
|
||||
sopno firststate; /* the initial OEND (normally 0) */
|
||||
sopno laststate; /* the final OEND */
|
||||
int iflags; /* internal flags */
|
||||
# define USEBOL 01 /* used ^ */
|
||||
# define USEEOL 02 /* used $ */
|
||||
# define BAD 04 /* something wrong */
|
||||
int nbol; /* number of ^ used */
|
||||
int neol; /* number of $ used */
|
||||
int ncategories; /* how many character categories */
|
||||
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
||||
char *must; /* match must contain this string */
|
||||
int mlen; /* length of must */
|
||||
size_t nsub; /* copy of re_nsub */
|
||||
int backrefs; /* does it use back references? */
|
||||
sopno nplus; /* how deep does it nest +s? */
|
||||
/* catspace must be last */
|
||||
cat_t catspace[1]; /* actually [NC] */
|
||||
};
|
||||
|
||||
/* misc utilities */
|
||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
||||
#define ISWORD(c) (isalnum(c) || (c) == '_')
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* the outer shell of regexec()
|
||||
*
|
||||
* This file includes engine.c *twice*, after muchos fiddling with the
|
||||
* macros that code uses. This lets the same code operate on two different
|
||||
* representations for state sets.
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
static int nope = 0; /* for use in asserts; shuts lint up */
|
||||
|
||||
/* macros for manipulating states, small version */
|
||||
#define states unsigned
|
||||
#define states1 unsigned /* for later use in regexec() decision */
|
||||
#define CLEAR(v) ((v) = 0)
|
||||
#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
|
||||
#define SET1(v, n) ((v) |= (unsigned)1 << (n))
|
||||
#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
|
||||
#define ASSIGN(d, s) ((d) = (s))
|
||||
#define EQ(a, b) ((a) == (b))
|
||||
#define STATEVARS int dummy /* dummy version */
|
||||
#define STATESETUP(m, n) /* nothing */
|
||||
#define STATETEARDOWN(m) /* nothing */
|
||||
#define SETUP(v) ((v) = 0)
|
||||
#define onestate unsigned
|
||||
#define INIT(o, n) ((o) = (unsigned)1 << (n))
|
||||
#define INC(o) ((o) <<= 1)
|
||||
#define ISSTATEIN(v, o) ((v) & (o))
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
|
||||
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
|
||||
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
|
||||
/* function names */
|
||||
#define SNAMES /* engine.c looks after details */
|
||||
|
||||
#include "engine.c"
|
||||
|
||||
/* now undo things */
|
||||
#undef states
|
||||
#undef CLEAR
|
||||
#undef SET0
|
||||
#undef SET1
|
||||
#undef ISSET
|
||||
#undef ASSIGN
|
||||
#undef EQ
|
||||
#undef STATEVARS
|
||||
#undef STATESETUP
|
||||
#undef STATETEARDOWN
|
||||
#undef SETUP
|
||||
#undef onestate
|
||||
#undef INIT
|
||||
#undef INC
|
||||
#undef ISSTATEIN
|
||||
#undef FWD
|
||||
#undef BACK
|
||||
#undef ISSETBACK
|
||||
#undef SNAMES
|
||||
|
||||
/* macros for manipulating states, large version */
|
||||
#define states char *
|
||||
#define CLEAR(v) memset(v, 0, m->g->nstates)
|
||||
#define SET0(v, n) ((v)[n] = 0)
|
||||
#define SET1(v, n) ((v)[n] = 1)
|
||||
#define ISSET(v, n) ((v)[n])
|
||||
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
|
||||
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
||||
#define STATEVARS int vn; char *space
|
||||
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
|
||||
if ((m)->space == NULL) return(REG_ESPACE); \
|
||||
(m)->vn = 0; }
|
||||
#define STATETEARDOWN(m) { free((m)->space); }
|
||||
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
||||
#define onestate int
|
||||
#define INIT(o, n) ((o) = (n))
|
||||
#define INC(o) ((o)++)
|
||||
#define ISSTATEIN(v, o) ((v)[o])
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
|
||||
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
|
||||
#define ISSETBACK(v, n) ((v)[here - (n)])
|
||||
/* function names */
|
||||
#define LNAMES /* flag */
|
||||
|
||||
#include "engine.c"
|
||||
|
||||
/*
|
||||
- regexec - interface for matching
|
||||
= extern int regexec(const regex_t *, const char *, size_t, \
|
||||
= regmatch_t [], int);
|
||||
= #define REG_NOTBOL 00001
|
||||
= #define REG_NOTEOL 00002
|
||||
= #define REG_STARTEND 00004
|
||||
= #define REG_TRACE 00400 // tracing of execution
|
||||
= #define REG_LARGE 01000 // force large representation
|
||||
= #define REG_BACKR 02000 // force use of backref code
|
||||
*
|
||||
* We put this here so we can exploit knowledge of the state representation
|
||||
* when choosing which matcher to call. Also, by this point the matchers
|
||||
* have been prototyped.
|
||||
*/
|
||||
int /* 0 success, REG_NOMATCH failure */
|
||||
regexec(preg, string, nmatch, pmatch, eflags)
|
||||
const regex_t *preg;
|
||||
const char *string;
|
||||
size_t nmatch;
|
||||
regmatch_t pmatch[];
|
||||
int eflags;
|
||||
{
|
||||
register struct re_guts *g = preg->re_g;
|
||||
#ifdef REDEBUG
|
||||
# define GOODFLAGS(f) (f)
|
||||
#else
|
||||
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
|
||||
#endif
|
||||
|
||||
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
|
||||
return(REG_BADPAT);
|
||||
assert(!(g->iflags&BAD));
|
||||
if (g->iflags&BAD) /* backstop for no-debug case */
|
||||
return(REG_BADPAT);
|
||||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
else
|
||||
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,37 @@
|
|||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
/*
|
||||
- regfree - free everything
|
||||
= extern void regfree(regex_t *);
|
||||
*/
|
||||
void
|
||||
regfree(preg)
|
||||
regex_t *preg;
|
||||
{
|
||||
register struct re_guts *g;
|
||||
|
||||
if (preg->re_magic != MAGIC1) /* oops */
|
||||
return; /* nice to complain, but hard */
|
||||
|
||||
g = preg->re_g;
|
||||
if (g == NULL || g->magic != MAGIC2) /* oops again */
|
||||
return;
|
||||
preg->re_magic = 0; /* mark it invalid */
|
||||
g->magic = 0; /* mark it invalid */
|
||||
|
||||
if (g->strip != NULL)
|
||||
free((char *)g->strip);
|
||||
if (g->sets != NULL)
|
||||
free((char *)g->sets);
|
||||
if (g->setbits != NULL)
|
||||
free((char *)g->setbits);
|
||||
if (g->must != NULL)
|
||||
free(g->must);
|
||||
free((char *)g);
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
- split - divide a string into fields, like awk split()
|
||||
= int split(char *string, char *fields[], int nfields, char *sep);
|
||||
*/
|
||||
int /* number of fields, including overflow */
|
||||
split(string, fields, nfields, sep)
|
||||
char *string;
|
||||
char *fields[]; /* list is not NULL-terminated */
|
||||
int nfields; /* number of entries available in fields[] */
|
||||
char *sep; /* "" white, "c" single char, "ab" [ab]+ */
|
||||
{
|
||||
register char *p = string;
|
||||
register char c; /* latest character */
|
||||
register char sepc = sep[0];
|
||||
register char sepc2;
|
||||
register int fn;
|
||||
register char **fp = fields;
|
||||
register char *sepp;
|
||||
register int trimtrail;
|
||||
|
||||
/* white space */
|
||||
if (sepc == '\0') {
|
||||
while ((c = *p++) == ' ' || c == '\t')
|
||||
continue;
|
||||
p--;
|
||||
trimtrail = 1;
|
||||
sep = " \t"; /* note, code below knows this is 2 long */
|
||||
sepc = ' ';
|
||||
} else
|
||||
trimtrail = 0;
|
||||
sepc2 = sep[1]; /* now we can safely pick this up */
|
||||
|
||||
/* catch empties */
|
||||
if (*p == '\0')
|
||||
return(0);
|
||||
|
||||
/* single separator */
|
||||
if (sepc2 == '\0') {
|
||||
fn = nfields;
|
||||
for (;;) {
|
||||
*fp++ = p;
|
||||
fn--;
|
||||
if (fn == 0)
|
||||
break;
|
||||
while ((c = *p++) != sepc)
|
||||
if (c == '\0')
|
||||
return(nfields - fn);
|
||||
*(p-1) = '\0';
|
||||
}
|
||||
/* we have overflowed the fields vector -- just count them */
|
||||
fn = nfields;
|
||||
for (;;) {
|
||||
while ((c = *p++) != sepc)
|
||||
if (c == '\0')
|
||||
return(fn);
|
||||
fn++;
|
||||
}
|
||||
/* not reached */
|
||||
}
|
||||
|
||||
/* two separators */
|
||||
if (sep[2] == '\0') {
|
||||
fn = nfields;
|
||||
for (;;) {
|
||||
*fp++ = p;
|
||||
fn--;
|
||||
while ((c = *p++) != sepc && c != sepc2)
|
||||
if (c == '\0') {
|
||||
if (trimtrail && **(fp-1) == '\0')
|
||||
fn++;
|
||||
return(nfields - fn);
|
||||
}
|
||||
if (fn == 0)
|
||||
break;
|
||||
*(p-1) = '\0';
|
||||
while ((c = *p++) == sepc || c == sepc2)
|
||||
continue;
|
||||
p--;
|
||||
}
|
||||
/* we have overflowed the fields vector -- just count them */
|
||||
fn = nfields;
|
||||
while (c != '\0') {
|
||||
while ((c = *p++) == sepc || c == sepc2)
|
||||
continue;
|
||||
p--;
|
||||
fn++;
|
||||
while ((c = *p++) != '\0' && c != sepc && c != sepc2)
|
||||
continue;
|
||||
}
|
||||
/* might have to trim trailing white space */
|
||||
if (trimtrail) {
|
||||
p--;
|
||||
while ((c = *--p) == sepc || c == sepc2)
|
||||
continue;
|
||||
p++;
|
||||
if (*p != '\0') {
|
||||
if (fn == nfields+1)
|
||||
*p = '\0';
|
||||
fn--;
|
||||
}
|
||||
}
|
||||
return(fn);
|
||||
}
|
||||
|
||||
/* n separators */
|
||||
fn = 0;
|
||||
for (;;) {
|
||||
if (fn < nfields)
|
||||
*fp++ = p;
|
||||
fn++;
|
||||
for (;;) {
|
||||
c = *p++;
|
||||
if (c == '\0')
|
||||
return(fn);
|
||||
sepp = sep;
|
||||
while ((sepc = *sepp++) != '\0' && sepc != c)
|
||||
continue;
|
||||
if (sepc != '\0') /* it was a separator */
|
||||
break;
|
||||
}
|
||||
if (fn < nfields)
|
||||
*(p-1) = '\0';
|
||||
for (;;) {
|
||||
c = *p++;
|
||||
sepp = sep;
|
||||
while ((sepc = *sepp++) != '\0' && sepc != c)
|
||||
continue;
|
||||
if (sepc == '\0') /* it wasn't a separator */
|
||||
break;
|
||||
}
|
||||
p--;
|
||||
}
|
||||
|
||||
/* not reached */
|
||||
}
|
||||
|
||||
#ifdef TEST_SPLIT
|
||||
|
||||
|
||||
/*
|
||||
* test program
|
||||
* pgm runs regression
|
||||
* pgm sep splits stdin lines by sep
|
||||
* pgm str sep splits str by sep
|
||||
* pgm str sep n splits str by sep n times
|
||||
*/
|
||||
int
|
||||
main(argc, argv)
|
||||
int argc;
|
||||
char *argv[];
|
||||
{
|
||||
char buf[512];
|
||||
register int n;
|
||||
# define MNF 10
|
||||
char *fields[MNF];
|
||||
|
||||
if (argc > 4)
|
||||
for (n = atoi(argv[3]); n > 0; n--) {
|
||||
(void) strcpy(buf, argv[1]);
|
||||
}
|
||||
else if (argc > 3)
|
||||
for (n = atoi(argv[3]); n > 0; n--) {
|
||||
(void) strcpy(buf, argv[1]);
|
||||
(void) split(buf, fields, MNF, argv[2]);
|
||||
}
|
||||
else if (argc > 2)
|
||||
dosplit(argv[1], argv[2]);
|
||||
else if (argc > 1)
|
||||
while (fgets(buf, sizeof(buf), stdin) != NULL) {
|
||||
buf[strlen(buf)-1] = '\0'; /* stomp newline */
|
||||
dosplit(buf, argv[1]);
|
||||
}
|
||||
else
|
||||
regress();
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
dosplit(string, seps)
|
||||
char *string;
|
||||
char *seps;
|
||||
{
|
||||
# define NF 5
|
||||
char *fields[NF];
|
||||
register int nf;
|
||||
|
||||
nf = split(string, fields, NF, seps);
|
||||
print(nf, NF, fields);
|
||||
}
|
||||
|
||||
print(nf, nfp, fields)
|
||||
int nf;
|
||||
int nfp;
|
||||
char *fields[];
|
||||
{
|
||||
register int fn;
|
||||
register int bound;
|
||||
|
||||
bound = (nf > nfp) ? nfp : nf;
|
||||
printf("%d:\t", nf);
|
||||
for (fn = 0; fn < bound; fn++)
|
||||
printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
|
||||
}
|
||||
|
||||
#define RNF 5 /* some table entries know this */
|
||||
struct {
|
||||
char *str;
|
||||
char *seps;
|
||||
int nf;
|
||||
char *fi[RNF];
|
||||
} tests[] = {
|
||||
"", " ", 0, { "" },
|
||||
" ", " ", 2, { "", "" },
|
||||
"x", " ", 1, { "x" },
|
||||
"xy", " ", 1, { "xy" },
|
||||
"x y", " ", 2, { "x", "y" },
|
||||
"abc def g ", " ", 5, { "abc", "def", "", "g", "" },
|
||||
" a bcd", " ", 4, { "", "", "a", "bcd" },
|
||||
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
|
||||
" a b c d ", " ", 6, { "", "a", "b", "c", "d " },
|
||||
|
||||
"", " _", 0, { "" },
|
||||
" ", " _", 2, { "", "" },
|
||||
"x", " _", 1, { "x" },
|
||||
"x y", " _", 2, { "x", "y" },
|
||||
"ab _ cd", " _", 2, { "ab", "cd" },
|
||||
" a_b c ", " _", 5, { "", "a", "b", "c", "" },
|
||||
"a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
|
||||
" a b c d ", " _", 6, { "", "a", "b", "c", "d " },
|
||||
|
||||
"", " _~", 0, { "" },
|
||||
" ", " _~", 2, { "", "" },
|
||||
"x", " _~", 1, { "x" },
|
||||
"x y", " _~", 2, { "x", "y" },
|
||||
"ab _~ cd", " _~", 2, { "ab", "cd" },
|
||||
" a_b c~", " _~", 5, { "", "a", "b", "c", "" },
|
||||
"a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
|
||||
"~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
|
||||
|
||||
"", " _~-", 0, { "" },
|
||||
" ", " _~-", 2, { "", "" },
|
||||
"x", " _~-", 1, { "x" },
|
||||
"x y", " _~-", 2, { "x", "y" },
|
||||
"ab _~- cd", " _~-", 2, { "ab", "cd" },
|
||||
" a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
|
||||
"a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
|
||||
"~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
|
||||
|
||||
"", " ", 0, { "" },
|
||||
" ", " ", 2, { "", "" },
|
||||
"x", " ", 1, { "x" },
|
||||
"xy", " ", 1, { "xy" },
|
||||
"x y", " ", 2, { "x", "y" },
|
||||
"abc def g ", " ", 4, { "abc", "def", "g", "" },
|
||||
" a bcd", " ", 3, { "", "a", "bcd" },
|
||||
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
|
||||
" a b c d ", " ", 6, { "", "a", "b", "c", "d " },
|
||||
|
||||
"", "", 0, { "" },
|
||||
" ", "", 0, { "" },
|
||||
"x", "", 1, { "x" },
|
||||
"xy", "", 1, { "xy" },
|
||||
"x y", "", 2, { "x", "y" },
|
||||
"abc def g ", "", 3, { "abc", "def", "g" },
|
||||
"\t a bcd", "", 2, { "a", "bcd" },
|
||||
" a \tb\t c ", "", 3, { "a", "b", "c" },
|
||||
"a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
|
||||
"a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
|
||||
" a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
|
||||
|
||||
NULL, NULL, 0, { NULL },
|
||||
};
|
||||
|
||||
regress()
|
||||
{
|
||||
char buf[512];
|
||||
register int n;
|
||||
char *fields[RNF+1];
|
||||
register int nf;
|
||||
register int i;
|
||||
register int printit;
|
||||
register char *f;
|
||||
|
||||
for (n = 0; tests[n].str != NULL; n++) {
|
||||
(void) strcpy(buf, tests[n].str);
|
||||
fields[RNF] = NULL;
|
||||
nf = split(buf, fields, RNF, tests[n].seps);
|
||||
printit = 0;
|
||||
if (nf != tests[n].nf) {
|
||||
printf("split `%s' by `%s' gave %d fields, not %d\n",
|
||||
tests[n].str, tests[n].seps, nf, tests[n].nf);
|
||||
printit = 1;
|
||||
} else if (fields[RNF] != NULL) {
|
||||
printf("split() went beyond array end\n");
|
||||
printit = 1;
|
||||
} else {
|
||||
for (i = 0; i < nf && i < RNF; i++) {
|
||||
f = fields[i];
|
||||
if (f == NULL)
|
||||
f = "(NULL)";
|
||||
if (strcmp(f, tests[n].fi[i]) != 0) {
|
||||
printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
|
||||
tests[n].str, tests[n].seps,
|
||||
i, fields[i], tests[n].fi[i]);
|
||||
printit = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (printit)
|
||||
print(nf, RNF, fields);
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,477 @@
|
|||
# regular expression test set
|
||||
# Lines are at least three fields, separated by one or more tabs. "" stands
|
||||
# for an empty field. First field is an RE. Second field is flags. If
|
||||
# C flag given, regcomp() is expected to fail, and the third field is the
|
||||
# error name (minus the leading REG_).
|
||||
#
|
||||
# Otherwise it is expected to succeed, and the third field is the string to
|
||||
# try matching it against. If there is no fourth field, the match is
|
||||
# expected to fail. If there is a fourth field, it is the substring that
|
||||
# the RE is expected to match. If there is a fifth field, it is a comma-
|
||||
# separated list of what the subexpressions should match, with - indicating
|
||||
# no match for that one. In both the fourth and fifth fields, a (sub)field
|
||||
# starting with @ indicates that the (sub)expression is expected to match
|
||||
# a null string followed by the stuff after the @; this provides a way to
|
||||
# test where null strings match. The character `N' in REs and strings
|
||||
# is newline, `S' is space, `T' is tab, `Z' is NUL.
|
||||
#
|
||||
# The full list of flags:
|
||||
# - placeholder, does nothing
|
||||
# b RE is a BRE, not an ERE
|
||||
# & try it as both an ERE and a BRE
|
||||
# C regcomp() error expected, third field is error name
|
||||
# i REG_ICASE
|
||||
# m ("mundane") REG_NOSPEC
|
||||
# s REG_NOSUB (not really testable)
|
||||
# n REG_NEWLINE
|
||||
# ^ REG_NOTBOL
|
||||
# $ REG_NOTEOL
|
||||
# # REG_STARTEND (see below)
|
||||
# p REG_PEND
|
||||
#
|
||||
# For REG_STARTEND, the start/end offsets are those of the substring
|
||||
# enclosed in ().
|
||||
|
||||
# basics
|
||||
a & a a
|
||||
abc & abc abc
|
||||
abc|de - abc abc
|
||||
a|b|c - abc a
|
||||
|
||||
# parentheses and perversions thereof
|
||||
a(b)c - abc abc
|
||||
a\(b\)c b abc abc
|
||||
a( C EPAREN
|
||||
a( b a( a(
|
||||
a\( - a( a(
|
||||
a\( bC EPAREN
|
||||
a\(b bC EPAREN
|
||||
a(b C EPAREN
|
||||
a(b b a(b a(b
|
||||
# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
|
||||
a) - a) a)
|
||||
) - ) )
|
||||
# end gagging (in a just world, those *should* give EPAREN)
|
||||
a) b a) a)
|
||||
a\) bC EPAREN
|
||||
\) bC EPAREN
|
||||
a()b - ab ab
|
||||
a\(\)b b ab ab
|
||||
|
||||
# anchoring and REG_NEWLINE
|
||||
^abc$ & abc abc
|
||||
a^b - a^b
|
||||
a^b b a^b a^b
|
||||
a$b - a$b
|
||||
a$b b a$b a$b
|
||||
^ & abc @abc
|
||||
$ & abc @
|
||||
^$ & "" @
|
||||
$^ - "" @
|
||||
\($\)\(^\) b "" @
|
||||
# stop retching, those are legitimate (although disgusting)
|
||||
^^ - "" @
|
||||
$$ - "" @
|
||||
b$ & abNc
|
||||
b$ &n abNc b
|
||||
^b$ & aNbNc
|
||||
^b$ &n aNbNc b
|
||||
^$ &n aNNb @Nb
|
||||
^$ n abc
|
||||
^$ n abcN @
|
||||
$^ n aNNb @Nb
|
||||
\($\)\(^\) bn aNNb @Nb
|
||||
^^ n^ aNNb @Nb
|
||||
$$ n aNNb @NN
|
||||
^a ^ a
|
||||
a$ $ a
|
||||
^a ^n aNb
|
||||
^b ^n aNb b
|
||||
a$ $n bNa
|
||||
b$ $n bNa b
|
||||
a*(^b$)c* - b b
|
||||
a*\(^b$\)c* b b b
|
||||
|
||||
# certain syntax errors and non-errors
|
||||
| C EMPTY
|
||||
| b | |
|
||||
* C BADRPT
|
||||
* b * *
|
||||
+ C BADRPT
|
||||
? C BADRPT
|
||||
"" &C EMPTY
|
||||
() - abc @abc
|
||||
\(\) b abc @abc
|
||||
a||b C EMPTY
|
||||
|ab C EMPTY
|
||||
ab| C EMPTY
|
||||
(|a)b C EMPTY
|
||||
(a|)b C EMPTY
|
||||
(*a) C BADRPT
|
||||
(+a) C BADRPT
|
||||
(?a) C BADRPT
|
||||
({1}a) C BADRPT
|
||||
\(\{1\}a\) bC BADRPT
|
||||
(a|*b) C BADRPT
|
||||
(a|+b) C BADRPT
|
||||
(a|?b) C BADRPT
|
||||
(a|{1}b) C BADRPT
|
||||
^* C BADRPT
|
||||
^* b * *
|
||||
^+ C BADRPT
|
||||
^? C BADRPT
|
||||
^{1} C BADRPT
|
||||
^\{1\} bC BADRPT
|
||||
|
||||
# metacharacters, backslashes
|
||||
a.c & abc abc
|
||||
a[bc]d & abd abd
|
||||
a\*c & a*c a*c
|
||||
a\\b & a\b a\b
|
||||
a\\\*b & a\*b a\*b
|
||||
a\bc & abc abc
|
||||
a\ &C EESCAPE
|
||||
a\\bc & a\bc a\bc
|
||||
\{ bC BADRPT
|
||||
a\[b & a[b a[b
|
||||
a[b &C EBRACK
|
||||
# trailing $ is a peculiar special case for the BRE code
|
||||
a$ & a a
|
||||
a$ & a$
|
||||
a\$ & a
|
||||
a\$ & a$ a$
|
||||
a\\$ & a
|
||||
a\\$ & a$
|
||||
a\\$ & a\$
|
||||
a\\$ & a\ a\
|
||||
|
||||
# back references, ugh
|
||||
a\(b\)\2c bC ESUBREG
|
||||
a\(b\1\)c bC ESUBREG
|
||||
a\(b*\)c\1d b abbcbbd abbcbbd bb
|
||||
a\(b*\)c\1d b abbcbd
|
||||
a\(b*\)c\1d b abbcbbbd
|
||||
^\(.\)\1 b abc
|
||||
a\([bc]\)\1d b abcdabbd abbd b
|
||||
a\(\([bc]\)\2\)*d b abbccd abbccd
|
||||
a\(\([bc]\)\2\)*d b abbcbd
|
||||
# actually, this next one probably ought to fail, but the spec is unclear
|
||||
a\(\(b\)*\2\)*d b abbbd abbbd
|
||||
# here is a case that no NFA implementation does right
|
||||
\(ab*\)[ab]*\1 b ababaaa ababaaa a
|
||||
# check out normal matching in the presence of back refs
|
||||
\(a\)\1bcd b aabcd aabcd
|
||||
\(a\)\1bc*d b aabcd aabcd
|
||||
\(a\)\1bc*d b aabd aabd
|
||||
\(a\)\1bc*d b aabcccd aabcccd
|
||||
\(a\)\1bc*[ce]d b aabcccd aabcccd
|
||||
^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd
|
||||
|
||||
# ordinary repetitions
|
||||
ab*c & abc abc
|
||||
ab+c - abc abc
|
||||
ab?c - abc abc
|
||||
a\(*\)b b a*b a*b
|
||||
a\(**\)b b ab ab
|
||||
a\(***\)b bC BADRPT
|
||||
*a b *a *a
|
||||
**a b a a
|
||||
***a bC BADRPT
|
||||
|
||||
# the dreaded bounded repetitions
|
||||
{ & { {
|
||||
{abc & {abc {abc
|
||||
{1 C BADRPT
|
||||
{1} C BADRPT
|
||||
a{b & a{b a{b
|
||||
a{1}b - ab ab
|
||||
a\{1\}b b ab ab
|
||||
a{1,}b - ab ab
|
||||
a\{1,\}b b ab ab
|
||||
a{1,2}b - aab aab
|
||||
a\{1,2\}b b aab aab
|
||||
a{1 C EBRACE
|
||||
a\{1 bC EBRACE
|
||||
a{1a C EBRACE
|
||||
a\{1a bC EBRACE
|
||||
a{1a} C BADBR
|
||||
a\{1a\} bC BADBR
|
||||
a{,2} - a{,2} a{,2}
|
||||
a\{,2\} bC BADBR
|
||||
a{,} - a{,} a{,}
|
||||
a\{,\} bC BADBR
|
||||
a{1,x} C BADBR
|
||||
a\{1,x\} bC BADBR
|
||||
a{1,x C EBRACE
|
||||
a\{1,x bC EBRACE
|
||||
a{300} C BADBR
|
||||
a\{300\} bC BADBR
|
||||
a{1,0} C BADBR
|
||||
a\{1,0\} bC BADBR
|
||||
ab{0,0}c - abcac ac
|
||||
ab\{0,0\}c b abcac ac
|
||||
ab{0,1}c - abcac abc
|
||||
ab\{0,1\}c b abcac abc
|
||||
ab{0,3}c - abbcac abbc
|
||||
ab\{0,3\}c b abbcac abbc
|
||||
ab{1,1}c - acabc abc
|
||||
ab\{1,1\}c b acabc abc
|
||||
ab{1,3}c - acabc abc
|
||||
ab\{1,3\}c b acabc abc
|
||||
ab{2,2}c - abcabbc abbc
|
||||
ab\{2,2\}c b abcabbc abbc
|
||||
ab{2,4}c - abcabbc abbc
|
||||
ab\{2,4\}c b abcabbc abbc
|
||||
((a{1,10}){1,10}){1,10} - a a a,a
|
||||
|
||||
# multiple repetitions
|
||||
a** &C BADRPT
|
||||
a++ C BADRPT
|
||||
a?? C BADRPT
|
||||
a*+ C BADRPT
|
||||
a*? C BADRPT
|
||||
a+* C BADRPT
|
||||
a+? C BADRPT
|
||||
a?* C BADRPT
|
||||
a?+ C BADRPT
|
||||
a{1}{1} C BADRPT
|
||||
a*{1} C BADRPT
|
||||
a+{1} C BADRPT
|
||||
a?{1} C BADRPT
|
||||
a{1}* C BADRPT
|
||||
a{1}+ C BADRPT
|
||||
a{1}? C BADRPT
|
||||
a*{b} - a{b} a{b}
|
||||
a\{1\}\{1\} bC BADRPT
|
||||
a*\{1\} bC BADRPT
|
||||
a\{1\}* bC BADRPT
|
||||
|
||||
# brackets, and numerous perversions thereof
|
||||
a[b]c & abc abc
|
||||
a[ab]c & abc abc
|
||||
a[^ab]c & adc adc
|
||||
a[]b]c & a]c a]c
|
||||
a[[b]c & a[c a[c
|
||||
a[-b]c & a-c a-c
|
||||
a[^]b]c & adc adc
|
||||
a[^-b]c & adc adc
|
||||
a[b-]c & a-c a-c
|
||||
a[b &C EBRACK
|
||||
a[] &C EBRACK
|
||||
a[1-3]c & a2c a2c
|
||||
a[3-1]c &C ERANGE
|
||||
a[1-3-5]c &C ERANGE
|
||||
a[[.-.]--]c & a-c a-c
|
||||
a[1- &C ERANGE
|
||||
a[[. &C EBRACK
|
||||
a[[.x &C EBRACK
|
||||
a[[.x. &C EBRACK
|
||||
a[[.x.] &C EBRACK
|
||||
a[[.x.]] & ax ax
|
||||
a[[.x,.]] &C ECOLLATE
|
||||
a[[.one.]]b & a1b a1b
|
||||
a[[.notdef.]]b &C ECOLLATE
|
||||
a[[.].]]b & a]b a]b
|
||||
a[[:alpha:]]c & abc abc
|
||||
a[[:notdef:]]c &C ECTYPE
|
||||
a[[: &C EBRACK
|
||||
a[[:alpha &C EBRACK
|
||||
a[[:alpha:] &C EBRACK
|
||||
a[[:alpha,:] &C ECTYPE
|
||||
a[[:]:]]b &C ECTYPE
|
||||
a[[:-:]]b &C ECTYPE
|
||||
a[[:alph:]] &C ECTYPE
|
||||
a[[:alphabet:]] &C ECTYPE
|
||||
[[:alnum:]]+ - -%@a0X- a0X
|
||||
[[:alpha:]]+ - -%@aX0- aX
|
||||
[[:blank:]]+ - aSSTb SST
|
||||
[[:cntrl:]]+ - aNTb NT
|
||||
[[:digit:]]+ - a019b 019
|
||||
[[:graph:]]+ - Sa%bS a%b
|
||||
[[:lower:]]+ - AabC ab
|
||||
[[:print:]]+ - NaSbN aSb
|
||||
[[:punct:]]+ - S%-&T %-&
|
||||
[[:space:]]+ - aSNTb SNT
|
||||
[[:upper:]]+ - aBCd BC
|
||||
[[:xdigit:]]+ - p0f3Cq 0f3C
|
||||
a[[=b=]]c & abc abc
|
||||
a[[= &C EBRACK
|
||||
a[[=b &C EBRACK
|
||||
a[[=b= &C EBRACK
|
||||
a[[=b=] &C EBRACK
|
||||
a[[=b,=]] &C ECOLLATE
|
||||
a[[=one=]]b & a1b a1b
|
||||
|
||||
# complexities
|
||||
a(((b)))c - abc abc
|
||||
a(b|(c))d - abd abd
|
||||
a(b*|c)d - abbd abbd
|
||||
# just gotta have one DFA-buster, of course
|
||||
a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
|
||||
# and an inline expansion in case somebody gets tricky
|
||||
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
|
||||
# and in case somebody just slips in an NFA...
|
||||
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
|
||||
# fish for anomalies as the number of states passes 32
|
||||
12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
|
||||
123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
|
||||
1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
|
||||
12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
|
||||
123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
|
||||
# and one really big one, beyond any plausible word width
|
||||
1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
|
||||
# fish for problems as brackets go past 8
|
||||
[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
|
||||
[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
|
||||
|
||||
# subtleties of matching
|
||||
abc & xabcy abc
|
||||
a\(b\)?c\1d b acd
|
||||
aBc i Abc Abc
|
||||
a[Bc]*d i abBCcd abBCcd
|
||||
0[[:upper:]]1 &i 0a1 0a1
|
||||
0[[:lower:]]1 &i 0A1 0A1
|
||||
a[^b]c &i abc
|
||||
a[^b]c &i aBc
|
||||
a[^b]c &i adc adc
|
||||
[a]b[c] - abc abc
|
||||
[a]b[a] - aba aba
|
||||
[abc]b[abc] - abc abc
|
||||
[abc]b[abd] - abd abd
|
||||
a(b?c)+d - accd accd
|
||||
(wee|week)(knights|night) - weeknights weeknights
|
||||
(we|wee|week|frob)(knights|night|day) - weeknights weeknights
|
||||
a[bc]d - xyzaaabcaababdacd abd
|
||||
a[ab]c - aaabc abc
|
||||
abc s abc abc
|
||||
a* & b @b
|
||||
|
||||
# Let's have some fun -- try to match a C comment.
|
||||
# first the obvious, which looks okay at first glance...
|
||||
/\*.*\*/ - /*x*/ /*x*/
|
||||
# but...
|
||||
/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/
|
||||
# okay, we must not match */ inside; try to do that...
|
||||
/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/
|
||||
/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/
|
||||
# but...
|
||||
/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/
|
||||
# and a still fancier version, which does it right (I think)...
|
||||
/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/
|
||||
/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/
|
||||
/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/
|
||||
/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/
|
||||
/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/
|
||||
/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/
|
||||
|
||||
# subexpressions
|
||||
.* - abc abc -
|
||||
a(b)(c)d - abcd abcd b,c
|
||||
a(((b)))c - abc abc b,b,b
|
||||
a(b|(c))d - abd abd b,-
|
||||
a(b*|c|e)d - abbd abbd bb
|
||||
a(b*|c|e)d - acd acd c
|
||||
a(b*|c|e)d - ad ad @d
|
||||
a(b?)c - abc abc b
|
||||
a(b?)c - ac ac @c
|
||||
a(b+)c - abc abc b
|
||||
a(b+)c - abbbc abbbc bbb
|
||||
a(b*)c - ac ac @c
|
||||
(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de
|
||||
# the regression tester only asks for 9 subexpressions
|
||||
a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
|
||||
a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k
|
||||
a([bc]?)c - abc abc b
|
||||
a([bc]?)c - ac ac @c
|
||||
a([bc]+)c - abc abc b
|
||||
a([bc]+)c - abcc abcc bc
|
||||
a([bc]+)bc - abcbc abcbc bc
|
||||
a(bb+|b)b - abb abb b
|
||||
a(bbb+|bb+|b)b - abb abb b
|
||||
a(bbb+|bb+|b)b - abbb abbb bb
|
||||
a(bbb+|bb+|b)bb - abbb abbb b
|
||||
(.*).* - abcdef abcdef abcdef
|
||||
(a*)* - bc @b @b
|
||||
|
||||
# do we get the right subexpression when it is used more than once?
|
||||
a(b|c)*d - ad ad -
|
||||
a(b|c)*d - abcd abcd c
|
||||
a(b|c)+d - abd abd b
|
||||
a(b|c)+d - abcd abcd c
|
||||
a(b|c?)+d - ad ad @d
|
||||
a(b|c?)+d - abcd abcd @d
|
||||
a(b|c){0,0}d - ad ad -
|
||||
a(b|c){0,1}d - ad ad -
|
||||
a(b|c){0,1}d - abd abd b
|
||||
a(b|c){0,2}d - ad ad -
|
||||
a(b|c){0,2}d - abcd abcd c
|
||||
a(b|c){0,}d - ad ad -
|
||||
a(b|c){0,}d - abcd abcd c
|
||||
a(b|c){1,1}d - abd abd b
|
||||
a(b|c){1,1}d - acd acd c
|
||||
a(b|c){1,2}d - abd abd b
|
||||
a(b|c){1,2}d - abcd abcd c
|
||||
a(b|c){1,}d - abd abd b
|
||||
a(b|c){1,}d - abcd abcd c
|
||||
a(b|c){2,2}d - acbd acbd b
|
||||
a(b|c){2,2}d - abcd abcd c
|
||||
a(b|c){2,4}d - abcd abcd c
|
||||
a(b|c){2,4}d - abcbd abcbd b
|
||||
a(b|c){2,4}d - abcbcd abcbcd c
|
||||
a(b|c){2,}d - abcd abcd c
|
||||
a(b|c){2,}d - abcbd abcbd b
|
||||
a(b+|((c)*))+d - abd abd @d,@d,-
|
||||
a(b+|((c)*))+d - abcd abcd @d,@d,-
|
||||
|
||||
# check out the STARTEND option
|
||||
[abc] &# a(b)c b
|
||||
[abc] &# a(d)c
|
||||
[abc] &# a(bc)d b
|
||||
[abc] &# a(dc)d c
|
||||
. &# a()c
|
||||
b.*c &# b(bc)c bc
|
||||
b.* &# b(bc)c bc
|
||||
.*c &# b(bc)c bc
|
||||
|
||||
# plain strings, with the NOSPEC flag
|
||||
abc m abc abc
|
||||
abc m xabcy abc
|
||||
abc m xyz
|
||||
a*b m aba*b a*b
|
||||
a*b m ab
|
||||
"" mC EMPTY
|
||||
|
||||
# cases involving NULs
|
||||
aZb & a a
|
||||
aZb &p a
|
||||
aZb &p# (aZb) aZb
|
||||
aZ*b &p# (ab) ab
|
||||
a.b &# (aZb) aZb
|
||||
a.* &# (aZb)c aZb
|
||||
|
||||
# word boundaries (ick)
|
||||
[[:<:]]a & a a
|
||||
[[:<:]]a & ba
|
||||
[[:<:]]a & -a a
|
||||
a[[:>:]] & a a
|
||||
a[[:>:]] & ab
|
||||
a[[:>:]] & a- a
|
||||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
|
||||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
|
||||
[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
|
||||
[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc
|
||||
[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_
|
||||
[[:<:]]a_b[[:>:]] & x_a_b
|
||||
|
||||
# past problems, and suspected problems
|
||||
(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
|
||||
abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
|
||||
abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
|
||||
(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
|
||||
CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
|
||||
Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz
|
||||
a?b - ab ab
|
||||
-\{0,1\}[0-9]*$ b -5 -5
|
||||
a*a*a*a*a*a*a* & aaaaaa aaaaaa
|
|
@ -0,0 +1,22 @@
|
|||
/* utility definitions */
|
||||
#ifdef _POSIX2_RE_DUP_MAX
|
||||
#define DUPMAX _POSIX2_RE_DUP_MAX
|
||||
#else
|
||||
#define DUPMAX 255
|
||||
#endif
|
||||
#define INFINITY (DUPMAX + 1)
|
||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||
typedef unsigned char uch;
|
||||
|
||||
/* switch off assertions (if not already off) if no REDEBUG */
|
||||
#ifndef REDEBUG
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG /* no assertions please */
|
||||
#endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/* for old systems with bcopy() but no memmove() */
|
||||
#ifdef USEBCOPY
|
||||
#define memmove(d, s, c) bcopy(s, d, c)
|
||||
#endif
|
|
@ -504,7 +504,7 @@
|
|||
|
||||
|
||||
(define-interface scsh-high-level-process-interface
|
||||
(export (run :syntax)
|
||||
(export (runn :syntax)
|
||||
(exec-epf :syntax)
|
||||
(& :syntax)
|
||||
(|| :syntax)
|
||||
|
@ -990,6 +990,10 @@
|
|||
(export signal->interrupt
|
||||
interrupt-set
|
||||
|
||||
interrupt-handlers-vector ; JMG: replaces vm vector
|
||||
init-scsh-signal
|
||||
procobj-handler
|
||||
|
||||
(with-enabled-interrupts :syntax)
|
||||
with-enabled-interrupts*
|
||||
enabled-interrupts
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
(really-fork/pipe+ fork conns maybe-thunk))
|
||||
|
||||
;;; Common code.
|
||||
;; JMG: this should spawn a thread to prevent deadlocking the vm
|
||||
(define (really-fork/pipe+ forker conns maybe-thunk)
|
||||
(let* ((pipes (map (lambda (conn) (call-with-values pipe cons))
|
||||
conns))
|
||||
|
@ -382,7 +383,7 @@
|
|||
|
||||
;; In a subprocess, close the read ports, redirect input from
|
||||
;; the write ports, and run THUNK.
|
||||
(status (run (begin (for-each close-input-port read-ports)
|
||||
(status (runn (begin (for-each close-input-port read-ports)
|
||||
(for-each move->fdes write-ports fds)
|
||||
(thunk)))))
|
||||
|
||||
|
@ -458,7 +459,7 @@
|
|||
|
||||
(define (run/file* thunk)
|
||||
(let ((fname (create-temp-file)))
|
||||
(run (begin (thunk)) (> ,fname))
|
||||
(runn (begin (thunk)) (> ,fname))
|
||||
fname))
|
||||
|
||||
(define (run/string* thunk)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -113,6 +113,13 @@
|
|||
(define procobj-handler (lambda (enabled-interrupts) #f))
|
||||
|
||||
(define (init-scsh-signal)
|
||||
(do ((sig 32 (- sig 1)))
|
||||
((< sig 0))
|
||||
(set-scsh-os-signal-handler!
|
||||
sig
|
||||
(lambda (x) (display "default handler was called"))))
|
||||
|
||||
|
||||
(begin
|
||||
(set-interrupt-handler!
|
||||
(enum interrupt os-signal)
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include "cstuff.h"
|
||||
#include <assert.h>
|
||||
|
||||
/* Make sure our exports match up w/the implementation: */
|
||||
#include "sighandlers1.h"
|
||||
|
@ -68,8 +67,8 @@ int get_procmask(int *old_lo_p)
|
|||
static void scm_handle_sig(int sig)
|
||||
{
|
||||
/*fprintf(stderr, "scm_handle_sig(%d) = int %d\n", sig, sig2int[sig]);*/
|
||||
//JMG: Spending_interruptsS |= (1<<sig2int[sig]);
|
||||
assert(1 == 0);
|
||||
//Spending_interruptsS |= (1<<sig2int[sig]);
|
||||
fprintf(stderr, "scm_handle_sig was called with %d\n", sig);
|
||||
}
|
||||
|
||||
|
||||
|
@ -195,7 +194,7 @@ void install_scsh_handlers(void)
|
|||
/* Sneak me the S48 interrupt handlers vector. */
|
||||
s48_value get_int_handlers(void)
|
||||
{
|
||||
assert (1 == 0); //JMG
|
||||
fprintf(stderr,"get_int_handlers return 1 instead of Sinterrupt_handlersS ");
|
||||
//return Sinterrupt_handlersS;
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -4,10 +4,11 @@
|
|||
|
||||
#!
|
||||
For testing load this at a scsh prompt
|
||||
,config ,load ../vm/ps-interface.scm
|
||||
,config ,load ../vm/interfaces.scm
|
||||
,config ,load ../vm/package-defs.scm
|
||||
,config ,load ../vm/s48-package-defs.scm
|
||||
,config ,load ../scheme/prescheme/interface.scm
|
||||
,config ,load ../scheme/prescheme/package-defs.scm
|
||||
,config ,load ../scheme/vm/interfaces.scm
|
||||
,config ,load ../scheme/vm/s48-package-defs.scm
|
||||
,config ,load ../scheme/vm/package-defs.scm
|
||||
,config ,load static.scm
|
||||
,load-package static-heaps
|
||||
,in static-heaps
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
(define-simple-syntax (& . epf)
|
||||
(fork (lambda () (exec-epf . epf))))
|
||||
|
||||
(define-simple-syntax (run . epf)
|
||||
(define-simple-syntax (runn . epf)
|
||||
(wait (& . epf)))
|
||||
|
||||
;;; Sequencing operators:
|
||||
|
@ -32,9 +32,9 @@
|
|||
|
||||
;;; WARNING: || is not a readable symbol in R4RS.
|
||||
|
||||
(define-simple-syntax (|| pf ...) (or (zero? (run pf)) ...))
|
||||
(define-simple-syntax (:or: pf ...) (or (zero? (run pf)) ...))
|
||||
(define-simple-syntax (&& pf ...) (and (zero? (run pf)) ...))
|
||||
(define-simple-syntax (|| pf ...) (or (zero? (runn pf)) ...))
|
||||
(define-simple-syntax (:or: pf ...) (or (zero? (runn pf)) ...))
|
||||
(define-simple-syntax (&& pf ...) (and (zero? (runn pf)) ...))
|
||||
|
||||
(define-simple-syntax (run/collecting fds . epf)
|
||||
(run/collecting* `fds (lambda () (exec-epf . epf))))
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -109,7 +110,7 @@ s48_value df_scheme_cwd(s48_value mv_vec)
|
|||
|
||||
r1 = scheme_cwd(&r2);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -220,7 +221,7 @@ s48_value df_my_username(s48_value mv_vec)
|
|||
|
||||
r1 = my_username();
|
||||
ret1 = S48_VECTOR_REF(mv_vec,0);
|
||||
S48_SET_CAR(ret1,(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(ret1),(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -434,7 +435,7 @@ s48_value df_scm_readlink(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = scm_readlink(s48_extract_string(g1));
|
||||
ret1 = errno_on_zero_or_false(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r1); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r1); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -767,10 +768,10 @@ s48_value df_user_info_uid(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = user_info_uid(s48_extract_fixnum(g1), &r2, &r3, &r4, &r5);
|
||||
ret1 = ENTER_BOOLEAN(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
S48_VECTOR_SET(mv_vec,1,s48_enter_fixnum(r3));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,2),(long) r4); S48_SET_CDR(S48_VECTOR_REF(mv_vec,2),strlen_or_false(r4));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,3),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,2)),(long) r4); S48_SET_CDR(S48_VECTOR_REF(mv_vec,2),strlen_or_false(r4));//str-and-len
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,3)),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -789,8 +790,8 @@ s48_value df_user_info_name(s48_value g1, s48_value mv_vec)
|
|||
ret1 = ENTER_BOOLEAN(r1);
|
||||
S48_VECTOR_SET(mv_vec,0,s48_enter_fixnum(r2));
|
||||
S48_VECTOR_SET(mv_vec,1,s48_enter_fixnum(r3));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,2),(long) r4); S48_SET_CDR(S48_VECTOR_REF(mv_vec,2),strlen_or_false(r4));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,3),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,2)),(long) r4); S48_SET_CDR(S48_VECTOR_REF(mv_vec,2),strlen_or_false(r4));//str-and-len
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,3)),(long) r5); S48_SET_CDR(S48_VECTOR_REF(mv_vec,3),strlen_or_false(r5));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -806,8 +807,8 @@ s48_value df_group_info_gid(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = group_info_gid(s48_extract_fixnum(g1), &r2, &r3, &r4);
|
||||
ret1 = ENTER_BOOLEAN(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,2,s48_enter_fixnum(r4));
|
||||
return ret1;
|
||||
}
|
||||
|
@ -825,7 +826,7 @@ s48_value df_group_info_name(s48_value g1, s48_value mv_vec)
|
|||
r1 = group_info_name(s48_extract_string(g1), &r2, &r3, &r4);
|
||||
ret1 = ENTER_BOOLEAN(r1);
|
||||
S48_VECTOR_SET(mv_vec,0,s48_enter_fixnum(r2));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,1),(long) r3);
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,1),(long) r3);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,2,s48_enter_fixnum(r4));
|
||||
return ret1;
|
||||
}
|
||||
|
@ -841,7 +842,7 @@ s48_value df_open_dir(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = open_dir(s48_extract_string(g1), &r2, &r3);
|
||||
ret1 = False_on_zero(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2);
|
||||
SetAlienVal(S48_VECTOR_REF(mv_vec,0),(long) r2);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,1,s48_enter_fixnum(r3));
|
||||
return ret1;
|
||||
}
|
||||
|
@ -865,7 +866,7 @@ s48_value df_scm_envvec(s48_value mv_vec)
|
|||
|
||||
r1 = scm_envvec(&r2);
|
||||
ret1 = S48_VECTOR_REF(mv_vec,0);
|
||||
S48_SET_CAR(ret1,(long) r1);
|
||||
SetAlienVal(ret1,(long) r1);//simple-assign
|
||||
S48_VECTOR_SET(mv_vec,1,s48_enter_fixnum(r2));
|
||||
return ret1;
|
||||
}
|
||||
|
@ -891,7 +892,7 @@ s48_value df_getenv(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = getenv(s48_extract_string(g1));
|
||||
ret1 = S48_VECTOR_REF(mv_vec,0);
|
||||
S48_SET_CAR(ret1,(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(ret1),(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -976,7 +977,7 @@ s48_value df_scm_gethostname(s48_value mv_vec)
|
|||
|
||||
r1 = scm_gethostname();
|
||||
ret1 = S48_VECTOR_REF(mv_vec,0);
|
||||
S48_SET_CAR(ret1,(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(ret1),(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -989,7 +990,7 @@ s48_value df_errno_msg(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = errno_msg(s48_extract_fixnum(g1));
|
||||
ret1 = S48_VECTOR_REF(mv_vec,0);
|
||||
S48_SET_CAR(ret1,(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(ret1),(long) r1); S48_SET_CDR(ret1,strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
;;; Need to rationalise names here. getgid. get-gid. "effective" as morpheme?
|
||||
|
||||
(foreign-init-name "syscalls")
|
||||
|
||||
(foreign-source
|
||||
"#include <sys/signal.h>"
|
||||
"#include <sys/types.h>"
|
||||
|
@ -54,8 +56,11 @@
|
|||
(define (syscall arg ...)
|
||||
(receive (err ret-val ...) (syscall/errno arg ...)
|
||||
(cond ((not err) (values ret-val ...)) ; Win
|
||||
((= err errno/intr) (syscall arg ...)) ; Retry
|
||||
(else (errno-error err syscall arg ...)))))) ; Lose
|
||||
((= err errno/intr)
|
||||
(syscall arg ...)) ; Retry
|
||||
(else (error "syscallerror (this is a JMGhack)" err syscall)))))) ; Lose
|
||||
|
||||
; (else (errno-error err syscall arg ...)))))) ; Lose
|
||||
|
||||
;;; This case handles rest args
|
||||
((define-errno-syscall (syscall . args) syscall/errno
|
||||
|
@ -63,8 +68,11 @@
|
|||
(define (syscall . args)
|
||||
(receive (err ret-val ...) (apply syscall/errno args)
|
||||
(cond ((not err) (values ret-val ...)) ; Win
|
||||
((= err errno/intr) (apply syscall args)) ; Retry
|
||||
(else (apply errno-error err syscall args)))))))); Lose
|
||||
((= err errno/intr)
|
||||
(apply syscall args)) ; Retry
|
||||
(else (error "syscallerror (this is a JMGhack)" err syscall )))))))) ; Lose
|
||||
|
||||
; (else (apply errno-error err syscall args)))))))); Lose
|
||||
|
||||
;;; By the way, it would be better to insert a (LET LP () ...) for the
|
||||
;;; the errno/intr retries, instead of calling the top-level definition
|
||||
|
@ -118,6 +126,7 @@
|
|||
;;; This ugly little hack will have to stay in until I do early
|
||||
;;; zombie reaping with SIGCHLD interrupts.
|
||||
|
||||
;; JMG: this should spawn a thread to prevent deadlocking the vm
|
||||
(define (%%fork-with-retry/errno)
|
||||
(receive (err pid) (%%fork/errno)
|
||||
(cond ((and err (eq? 'early (autoreap-policy)))
|
||||
|
@ -146,6 +155,8 @@
|
|||
|
||||
(define-errno-syscall (%chdir dir) %chdir/errno)
|
||||
|
||||
;;; JMG: this may block
|
||||
|
||||
(define (chdir . maybe-dir)
|
||||
(let ((dir (:optional maybe-dir (home-dir))))
|
||||
(%chdir (ensure-file-name-is-nondirectory dir))))
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -85,7 +86,7 @@ s48_value df_time2date(s48_value g1, s48_value g2, s48_value g3, s48_value mv_ve
|
|||
S48_VECTOR_SET(mv_vec,3,s48_enter_fixnum(r5));
|
||||
S48_VECTOR_SET(mv_vec,4,s48_enter_fixnum(r6));
|
||||
S48_VECTOR_SET(mv_vec,5,s48_enter_fixnum(r7));
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,6),(long) r8); S48_SET_CDR(S48_VECTOR_REF(mv_vec,6),strlen_or_false(r8));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,6)),(long) r8); S48_SET_CDR(S48_VECTOR_REF(mv_vec,6),strlen_or_false(r8));//str-and-len
|
||||
S48_VECTOR_SET(mv_vec,7,s48_enter_fixnum(r9));
|
||||
S48_VECTOR_SET(mv_vec,8,ENTER_BOOLEAN(r10));
|
||||
S48_VECTOR_SET(mv_vec,9,s48_enter_fixnum(r11));
|
||||
|
@ -103,7 +104,7 @@ s48_value df_format_date(s48_value g1, s48_value g2, s48_value g3, s48_value g4,
|
|||
|
||||
r1 = format_date(s48_extract_string(g1), s48_extract_fixnum(g2), s48_extract_fixnum(g3), s48_extract_fixnum(g4), s48_extract_fixnum(g5), s48_extract_fixnum(g6), s48_extract_fixnum(g7), g8, EXTRACT_BOOLEAN(g9), s48_extract_fixnum(g10), s48_extract_fixnum(g11), &r2);
|
||||
ret1 = r1;
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r2); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r2));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by a hacked version of cig 3.0.
|
||||
step 3
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -169,7 +170,7 @@ s48_value df_ttyname(s48_value g1, s48_value mv_vec)
|
|||
|
||||
r1 = ttyname(s48_extract_fixnum(g1));
|
||||
ret1 = errno_on_zero_or_false(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r1); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r1); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
@ -182,7 +183,7 @@ s48_value df_scm_ctermid(s48_value mv_vec)
|
|||
|
||||
r1 = scm_ctermid();
|
||||
ret1 = errno_on_zero_or_false(r1);
|
||||
S48_SET_CAR(S48_VECTOR_REF(mv_vec,0),(long) r1); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r1));
|
||||
SetAlienVal(S48_CAR(S48_VECTOR_REF(mv_vec,0)),(long) r1); S48_SET_CDR(S48_VECTOR_REF(mv_vec,0),strlen_or_false(r1));//str-and-len
|
||||
return ret1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue