Rest of Olins merge for SRFI-13 and 14.

This commit is contained in:
mainzelm 2001-03-23 10:52:09 +00:00
parent e18289f61c
commit 0ffb123bee
23 changed files with 5018 additions and 116 deletions

View File

@ -81,9 +81,9 @@ BIG_HEAP = -h 5000000
# LINKER_VM = ./$(VM) $(BIG_HEAP)
# LINKER_RUNNABLE = $(LINKER_VM) -i $(IMAGE)
# therefor according to 2.
LINKER_VM = $(RUNNABLE) $(BIG_HEAP)
LINKER_RUNNABLE = $(RUNNABLE)
# therefor according to 2 but canot use scsh since -i is not understood
LINKER_VM = scheme48 $(BIG_HEAP)
LINKER_RUNNABLE = scheme48
LINKER_IMAGE = build/linker.image
LINKER = $(LINKER_VM) -i $(LINKER_IMAGE)
@ -382,7 +382,7 @@ go:
echo '#!/bin/sh' >$@ && \
echo >>$@ && \
echo "lib=`pwd`" >>$@ && \
echo 'exec $$lib/$(VM) -o $$lib/$(VM) -i $$lib/scsh/$(IMAGE) "$$@"' \
echo 'exec $$lib/$(VM) -o $$lib/$(VM) -i $$lib/scsh/scsh.image "$$@"' \
>>$@ && \
chmod +x $@
@ -766,6 +766,7 @@ SCHEME =scsh/awk.scm \
scsh/scsh-version.scm \
scsh/scsh.scm \
scsh/select.scm \
scsh/sighandlers.scm \
scsh/startup.scm \
scsh/stringcoll.scm \
scsh/syntax-helpers.scm \
@ -798,16 +799,6 @@ scsh/scsh: scsh/scsh-tramp.c
-DIMAGE=\"$(LIB)/scsh.image\" \
scsh/scsh-tramp.c
loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
$(srcdir)/scsh/machine/packages.scm \
$(srcdir)/scsh/rx/packages.scm \
$(srcdir)/scsh/rx/cond-package.scm \
$(srcdir)/scsh/scsh-package.scm \
$(srcdir)/scsh/lib/string-pack.scm \
$(srcdir)/scsh/lib/list-pack.scm \
$(srcdir)/scsh/lib/ccp-pack.scm
bs: build/build-scsh-image
sh $(srcdir)/build/build-scsh-image "$(srcdir)" "$(LIB)" "$(IMAGE)" \
"$(VM)" cig/cig.image
@ -817,9 +808,13 @@ loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
$(srcdir)/scsh/rx/packages.scm \
$(srcdir)/scsh/rx/cond-package.scm \
$(srcdir)/scsh/scsh-package.scm \
$(srcdir)/scsh/lib/string-pack.scm \
$(srcdir)/scsh/lib/cset-package.scm \
$(srcdir)/scsh/lib/string-package.scm \
$(srcdir)/scsh/lib/list-pack.scm \
$(srcdir)/scsh/lib/ccp-pack.scm
$(srcdir)/scsh/lib/ccp-pack.scm \
$(srcdir)/scsh/lib/char-package.scm \
$(srcdir)/scsh/lib/cset-obsolete.scm
scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
(echo ",translate =scheme48/ $(srcdir)/scheme/"; \
@ -828,7 +823,6 @@ scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
echo ",config"; \
echo ",load $(loads)"; \
echo ",load-package scsh"; \
echo ",load-package events"; \
echo ",load-package scsh-here-string-hax"; \
echo ",translate =scheme48/ $(LIB)/"; \
echo ",load-package list-lib"; \

View File

@ -318,7 +318,7 @@
s)))
((concat) ; CONCAT-delimiter reader.
(let ((not-delims (char-set-invert delims)))
(let ((not-delims (char-set-complement delims)))
(lambda maybe-port
(let* ((p (:optional maybe-port (current-input-port)))
(s (read-delimited delims p 'concat)))
@ -328,7 +328,7 @@
(string-append s extra-delims))))))))
((split) ; SPLIT-delimiter reader.
(let ((not-delims (char-set-invert delims)))
(let ((not-delims (char-set-complement delims)))
(lambda maybe-port
(let ((p (:optional maybe-port (current-input-port))))
(receive (s delim) (read-delimited delims p 'split)

View File

@ -150,16 +150,15 @@
(case c
((#\])
(let ((cset (fold (lambda (elt cset)
(char-set-union
cset
(if (char? elt)
(char-set elt)
(ascii-range->char-set (char->ascii (car elt))
(+ 1 (char->ascii (cdr elt)))))))
char-set:empty
(if (char? elt)
(char-set-adjoin! cset elt)
(ucs-range->char-set! (char->ascii (car elt))
(+ 1 (char->ascii (cdr elt)))
#f cset)))
(char-set-copy char-set:empty)
elts)))
(values (re-char-set (if negate?
(char-set-invert cset)
(char-set-compelment! cset)
cset))
i)))

View File

@ -93,7 +93,7 @@
))
(define-structure ccp-lib ccp-lib-interface
(open char-set-package
(open char-set-lib
ascii
defrec-package
string-lib

View File

@ -95,11 +95,11 @@
(every (lambda (ccp2)
(and (char-set= domain (ccp:domain ccp2))
(let ((cmap2 (ccp:map ccp2)))
(char-set-every? (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap i)
(string-ref cmap2 i))))
domain))))
(char-set-every (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap i)
(string-ref cmap2 i))))
domain))))
rest)))
@ -116,11 +116,11 @@
(rest (cdr rest)))
(and (char-set<= domain1 domain2)
(let ((cmap2 (ccp:map ccp2)))
(char-set-every? (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap1 i)
(string-ref cmap2 i))))
domain1))
(char-set-every (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap1 i)
(string-ref cmap2 i))))
domain1))
(lp domain2 cmap2 rest))))))

59
scsh/lib/char-package.scm Normal file
View File

@ -0,0 +1,59 @@
;;; These defs are things for characters *not* in SRFIs 13 & 14.
;;; It includes some R5RS defs that are not correct in S48 in a Latin-1 world.
(define-interface char-set-predicates-interface
(export
((char-lower-case? ; R5RS
char-upper-case? ; R5RS
char-alphabetic? ; R5RS
char-numeric? ; R5RS
char-whitespace? ; R5RS
char-alphanumeric? ; For compatibility w/old code
char-letter? ; Scsh
char-digit?
char-letter+digit?
char-graphic?
char-printing?
char-blank?
char-iso-control?
char-punctuation?
char-symbol?
char-hex-digit?
char-ascii?) (proc (:char) :boolean))))
(define-structure char-set-predicates-lib char-set-predicates-interface
(open error-package ; ERROR
scsh-utilities ; DEPRECATED-PROC
char-set-lib
scheme)
(begin
;; These are R5RS. We can't use the native S48 ones, because they
;; don't handle full Latin-1.
(define (char-lower-case? c) (char-set-contains? char-set:lower-case c))
(define (char-upper-case? c) (char-set-contains? char-set:upper-case c))
(define (char-alphabetic? c) (char-set-contains? char-set:letter c))
(define (char-numeric? c) (char-set-contains? char-set:digit c))
(define (char-whitespace? c) (char-set-contains? char-set:whitespace c))
;; These are scsh extensions to R5RS.
(define (char-letter? c) (char-set-contains? char-set:letter c))
(define (char-digit? c) (char-set-contains? char-set:digit c))
(define (char-letter+digit? c) (char-set-contains? char-set:letter+digit c))
(define (char-graphic? c) (char-set-contains? char-set:graphic c))
(define (char-printing? c) (char-set-contains? char-set:printing c))
(define (char-blank? c) (char-set-contains? char-set:blank c))
(define (char-iso-control? c) (char-set-contains? char-set:iso-control c))
(define (char-punctuation? c) (char-set-contains? char-set:punctuation c))
(define (char-symbol? c) (char-set-contains? char-set:symbol c))
(define (char-hex-digit? c) (char-set-contains? char-set:hex-digit c))
(define (char-ascii? c) (char-set-contains? char-set:ascii c))
;; Obsolete scsh.
(define char-alphanumeric?
(deprecated-proc char-letter+digit? 'char-alphanumeric?
"Use CHAR-LETTER+DIGIT? instead.")))
(optimize auto-integrate))

2016
scsh/lib/cset-lib.html Normal file

File diff suppressed because it is too large Load Diff

804
scsh/lib/cset-lib.scm Normal file
View File

@ -0,0 +1,804 @@
;;; SRFI-14 character-sets library -*- Scheme -*-
;;;
;;; - Ported from MIT Scheme runtime by Brian D. Carlstrom.
;;; - Massively rehacked & extended by Olin Shivers 6/98.
;;; - Massively redesigned and rehacked 5/2000 during SRFI process.
;;; At this point, the code bears the following relationship to the
;;; MIT Scheme code: "This is my grandfather's axe. My father replaced
;;; the head, and I have replaced the handle." Nonetheless, we preserve
;;; the MIT Scheme copyright:
;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology
;;; The MIT Scheme license is a "free software" license. See the end of
;;; this file for the tedious details.
;;; Exports:
;;; char-set? char-set= char-set<=
;;; char-set-hash
;;; char-set-cursor char-set-ref char-set-cursor-next end-of-char-set?
;;; char-set-fold char-set-unfold char-set-unfold!
;;; char-set-for-each char-set-map
;;; char-set-copy char-set
;;;
;;; list->char-set string->char-set
;;; list->char-set! string->char-set!
;;;
;;; filterchar-set ucs-range->char-set ->char-set
;;; filterchar-set! ucs-range->char-set!
;;;
;;; char-set->list char-set->string
;;;
;;; char-set-size char-set-count char-set-contains?
;;; char-set-every char-set-any
;;;
;;; char-set-adjoin char-set-delete
;;; char-set-adjoin! char-set-delete!
;;;
;;; char-set-complement char-set-union char-set-intersection
;;; char-set-complement! char-set-union! char-set-intersection!
;;;
;;; char-set-difference char-set-xor char-set-diff+intersection
;;; char-set-difference! char-set-xor! char-set-diff+intersection!
;;;
;;; char-set:lower-case char-set:upper-case char-set:title-case
;;; char-set:letter char-set:digit char-set:letter+digit
;;; char-set:graphic char-set:printing char-set:whitespace
;;; char-set:iso-control char-set:punctuation char-set:symbol
;;; char-set:hex-digit char-set:blank char-set:ascii
;;; char-set:empty char-set:full
;;; Imports
;;; This code has the following non-R5RS dependencies:
;;; - ERROR
;;; - %LATIN1->CHAR %CHAR->LATIN1
;;; - LET-OPTIONALS* and :OPTIONAL macros for parsing, checking & defaulting
;;; optional arguments from rest lists.
;;; - BITWISE-AND for CHAR-SET-HASH
;;; - The SRFI-19 DEFINE-RECORD-TYPE record macro
;;; - A simple CHECK-ARG procedure:
;;; (lambda (pred val caller) (if (not (pred val)) (error val caller)))
;;; This is simple code, not great code. Char sets are represented as 256-char
;;; strings. If char I is ASCII/Latin-1 0, then it isn't in the set; if char I
;;; is ASCII/Latin-1 1, then it is in the set.
;;; - Should be rewritten to use bit strings or byte vecs.
;;; - Is Latin-1 specific. Would certainly have to be rewritten for Unicode.
;;; See the end of the file for porting and performance-tuning notes.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record-type :char-set
(make-char-set s)
char-set?
(s char-set:s))
(define (%string-copy s) (substring s 0 (string-length s)))
;;; Parse, type-check & default a final optional BASE-CS parameter from
;;; a rest argument. Return a *fresh copy* of the underlying string.
;;; The default is the empty set. The PROC argument is to help us
;;; generate informative error exceptions.
(define (%default-base maybe-base proc)
(if (pair? maybe-base)
(let ((bcs (car maybe-base))
(tail (cdr maybe-base)))
(if (null? tail)
(if (char-set? bcs) (%string-copy (char-set:s bcs))
(error "BASE-CS parameter not a char-set" proc bcs))
(error "Expected final base char set -- too many parameters"
proc maybe-base)))
(make-string 256 (%latin1->char 0))))
;;; If CS is really a char-set, do CHAR-SET:S, otw report an error msg on
;;; behalf of our caller, PROC. This procedure exists basically to provide
;;; explicit error-checking & reporting.
(define (%char-set:s/check cs proc)
(let lp ((cs cs))
(if (char-set? cs) (char-set:s cs)
(lp (error "Not a char-set" cs proc)))))
;;; These internal functions hide a lot of the dependency on the
;;; underlying string representation of char sets. They should be
;;; inlined if possible.
(define (si=0? s i) (zero? (%char->latin1 (string-ref s i))))
(define (si=1? s i) (not (si=0? s i)))
(define c0 (%latin1->char 0))
(define c1 (%latin1->char 1))
(define (si s i) (%char->latin1 (string-ref s i)))
(define (%set0! s i) (string-set! s i c0))
(define (%set1! s i) (string-set! s i c1))
;;; These do various "s[i] := s[i] op val" operations -- see
;;; %CHAR-SET-ALGEBRA. They are used to implement the various
;;; set-algebra procedures.
(define (setv! s i v) (string-set! s i (%latin1->char v))) ; SET to a Value.
(define (%not! s i v) (setv! s i (- 1 v)))
(define (%and! s i v) (if (zero? v) (%set0! s i)))
(define (%or! s i v) (if (not (zero? v)) (%set1! s i)))
(define (%minus! s i v) (if (not (zero? v)) (%set0! s i)))
(define (%xor! s i v) (if (not (zero? v)) (setv! s i (- 1 (si s i)))))
(define (char-set-copy cs)
(make-char-set (%string-copy (%char-set:s/check cs char-set-copy))))
(define (char-set= . rest)
(or (null? rest)
(let* ((cs1 (car rest))
(rest (cdr rest))
(s1 (%char-set:s/check cs1 char-set=)))
(let lp ((rest rest))
(or (not (pair? rest))
(and (string=? s1 (%char-set:s/check (car rest) char-set=))
(lp (cdr rest))))))))
(define (char-set<= . rest)
(or (null? rest)
(let ((cs1 (car rest))
(rest (cdr rest)))
(let lp ((s1 (%char-set:s/check cs1 char-set<=)) (rest rest))
(or (not (pair? rest))
(let ((s2 (%char-set:s/check (car rest) char-set<=))
(rest (cdr rest)))
(if (eq? s1 s2) (lp s2 rest) ; Fast path
(let lp2 ((i 255)) ; Real test
(if (< i 0) (lp s2 rest)
(and (<= (si s1 i) (si s2 i))
(lp2 (- i 1))))))))))))
;;; Hash
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in
;;; to keep the intermediate values small. (We do the calculation with just
;;; enough bits to represent BOUND, masking off high bits at each step in
;;; calculation. If this screws up any important properties of the hash
;;; function I'd like to hear about it. -Olin)
;;;
;;; If you keep BOUND small enough, the intermediate calculations will
;;; always be fixnums. How small is dependent on the underlying Scheme system;
;;; we use a default BOUND of 2^22 = 4194304, which should hack it in
;;; Schemes that give you at least 29 signed bits for fixnums. The core
;;; calculation that you don't want to overflow is, worst case,
;;; (+ 65535 (* 37 (- bound 1)))
;;; where 65535 is the max character code. Choose the default BOUND to be the
;;; biggest power of two that won't cause this expression to fixnum overflow,
;;; and everything will be copacetic.
(define (char-set-hash cs . maybe-bound)
(let* ((bound (:optional maybe-bound 4194304 (lambda (n) (and (integer? n)
(exact? n)
(<= 0 n)))))
(bound (if (zero? bound) 4194304 bound)) ; 0 means default.
(s (%char-set:s/check cs char-set-hash))
;; Compute a 111...1 mask that will cover BOUND-1:
(mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh?
(if (>= i bound) (- i 1) (lp (+ i i))))))
(let lp ((i 255) (ans 0))
(if (< i 0) (modulo ans bound)
(lp (- i 1)
(if (si=0? s i) ans
(bitwise-and mask (+ (* 37 ans) i))))))))
(define (char-set-contains? cs char)
(si=1? (%char-set:s/check cs char-set-contains?)
(%char->latin1 (check-arg char? char char-set-contains?))))
(define (char-set-size cs)
(let ((s (%char-set:s/check cs char-set-size)))
(let lp ((i 255) (size 0))
(if (< i 0) size
(lp (- i 1) (+ size (si s i)))))))
(define (char-set-count pred cset)
(check-arg procedure? pred char-set-count)
(let ((s (%char-set:s/check cset char-set-count)))
(let lp ((i 255) (count 0))
(if (< i 0) count
(lp (- i 1)
(if (and (si=1? s i) (pred (%latin1->char i)))
(+ count 1)
count))))))
;;; -- Adjoin & delete
(define (%set-char-set set proc cs chars)
(let ((s (%string-copy (%char-set:s/check cs proc))))
(for-each (lambda (c) (set s (%char->latin1 c)))
chars)
(make-char-set s)))
(define (%set-char-set! set proc cs chars)
(let ((s (%char-set:s/check cs proc)))
(for-each (lambda (c) (set s (%char->latin1 c)))
chars))
cs)
(define (char-set-adjoin cs . chars)
(%set-char-set %set1! char-set-adjoin cs chars))
(define (char-set-adjoin! cs . chars)
(%set-char-set! %set1! char-set-adjoin! cs chars))
(define (char-set-delete cs . chars)
(%set-char-set %set0! char-set-delete cs chars))
(define (char-set-delete! cs . chars)
(%set-char-set! %set0! char-set-delete! cs chars))
;;; Cursors
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Simple implementation. A cursors is an integer index into the
;;; mark vector, and -1 for the end-of-char-set cursor.
;;;
;;; If we represented char sets as a bit set, we could do the following
;;; trick to pick the lowest bit out of the set:
;;; (count-bits (xor (- cset 1) cset))
;;; (But first mask out the bits already scanned by the cursor first.)
(define (char-set-cursor cset)
(%char-set-cursor-next cset 256 char-set-cursor))
(define (end-of-char-set? cursor) (< cursor 0))
(define (char-set-ref cset cursor) (%latin1->char cursor))
(define (char-set-cursor-next cset cursor)
(check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i 255))) cursor
char-set-cursor-next)
(%char-set-cursor-next cset cursor char-set-cursor-next))
(define (%char-set-cursor-next cset cursor proc) ; Internal
(let ((s (%char-set:s/check cset proc)))
(let lp ((cur cursor))
(let ((cur (- cur 1)))
(if (or (< cur 0) (si=1? s cur)) cur
(lp cur))))))
;;; -- for-each map fold unfold every any
(define (char-set-for-each proc cs)
(check-arg procedure? proc char-set-for-each)
(let ((s (%char-set:s/check cs char-set-for-each)))
(let lp ((i 255))
(cond ((>= i 0)
(if (si=1? s i) (proc (%latin1->char i)))
(lp (- i 1)))))))
(define (char-set-map proc cs)
(check-arg procedure? proc char-set-map)
(let ((s (%char-set:s/check cs char-set-map))
(ans (make-string 256 c0)))
(let lp ((i 255))
(cond ((>= i 0)
(if (si=1? s i)
(%set1! ans (%char->latin1 (proc (%latin1->char i)))))
(lp (- i 1)))))
(make-char-set ans)))
(define (char-set-fold kons knil cs)
(check-arg procedure? kons char-set-fold)
(let ((s (%char-set:s/check cs char-set-fold)))
(let lp ((i 255) (ans knil))
(if (< i 0) ans
(lp (- i 1)
(if (si=0? s i) ans
(kons (%latin1->char i) ans)))))))
(define (char-set-every pred cs)
(check-arg procedure? pred char-set-every)
(let ((s (%char-set:s/check cs char-set-every)))
(let lp ((i 255))
(or (< i 0)
(and (or (si=0? s i) (pred (%latin1->char i)))
(lp (- i 1)))))))
(define (char-set-any pred cs)
(check-arg procedure? pred char-set-any)
(let ((s (%char-set:s/check cs char-set-any)))
(let lp ((i 255))
(and (>= i 0)
(or (and (si=1? s i) (pred (%latin1->char i)))
(lp (- i 1)))))))
(define (%char-set-unfold! proc p f g s seed)
(check-arg procedure? p proc)
(check-arg procedure? f proc)
(check-arg procedure? g proc)
(let lp ((seed seed))
(cond ((not (p seed)) ; P says we are done.
(%set1! s (%char->latin1 (f seed))) ; Add (F SEED) to set.
(lp (g seed)))))) ; Loop on (G SEED).
(define (char-set-unfold p f g seed . maybe-base)
(let ((bs (%default-base maybe-base char-set-unfold)))
(%char-set-unfold! char-set-unfold p f g bs seed)
(make-char-set bs)))
(define (char-set-unfold! p f g seed base-cset)
(%char-set-unfold! char-set-unfold! p f g
(%char-set:s/check base-cset char-set-unfold!)
seed)
base-cset)
;;; list <--> char-set
(define (%list->char-set! chars s)
(for-each (lambda (char) (%set1! s (%char->latin1 char)))
chars))
(define (char-set . chars)
(let ((s (make-string 256 c0)))
(%list->char-set! chars s)
(make-char-set s)))
(define (list->char-set chars . maybe-base)
(let ((bs (%default-base maybe-base list->char-set)))
(%list->char-set! chars bs)
(make-char-set bs)))
(define (list->char-set! chars base-cs)
(%list->char-set! chars (%char-set:s/check base-cs list->char-set!))
base-cs)
(define (char-set->list cs)
(let ((s (%char-set:s/check cs char-set->list)))
(let lp ((i 255) (ans '()))
(if (< i 0) ans
(lp (- i 1)
(if (si=0? s i) ans
(cons (%latin1->char i) ans)))))))
;;; string <--> char-set
(define (%string->char-set! str bs proc)
(check-arg string? str proc)
(do ((i (- (string-length str) 1) (- i 1)))
((< i 0))
(%set1! bs (%char->latin1 (string-ref str i)))))
(define (string->char-set str . maybe-base)
(let ((bs (%default-base maybe-base string->char-set)))
(%string->char-set! str bs string->char-set)
(make-char-set bs)))
(define (string->char-set! str base-cs)
(%string->char-set! str (%char-set:s/check base-cs string->char-set!)
string->char-set!)
base-cs)
(define (char-set->string cs)
(let* ((s (%char-set:s/check cs char-set->string))
(ans (make-string (char-set-size cs))))
(let lp ((i 255) (j 0))
(if (< i 0) ans
(let ((j (if (si=0? s i) j
(begin (string-set! ans j (%latin1->char i))
(+ j 1)))))
(lp (- i 1) j))))))
;;; -- UCS-range -> char-set
(define (%ucs-range->char-set! lower upper error? bs proc)
(check-arg (lambda (x) (and (integer? x) (exact? x) (<= 0 x))) lower proc)
(check-arg (lambda (x) (and (integer? x) (exact? x) (<= lower x))) upper proc)
(if (and (< lower upper) (< 256 upper) error?)
(error "Requested UCS range contains unavailable characters -- this implementation only supports Latin-1"
proc lower upper))
(let lp ((i (- (min upper 256) 1)))
(cond ((<= lower i) (%set1! bs i) (lp (- i 1))))))
(define (ucs-range->char-set lower upper . rest)
(let-optionals* rest ((error? #f) rest)
(let ((bs (%default-base rest ucs-range->char-set)))
(%ucs-range->char-set! lower upper error? bs ucs-range->char-set)
(make-char-set bs))))
(define (ucs-range->char-set! lower upper error? base-cs)
(%ucs-range->char-set! lower upper error?
(%char-set:s/check base-cs ucs-range->char-set!)
ucs-range->char-set)
base-cs)
;;; -- predicate -> char-set
(define (%char-set-filter! pred ds bs proc)
(check-arg procedure? pred proc)
(let lp ((i 255))
(cond ((>= i 0)
(if (and (si=1? ds i) (pred (%latin1->char i)))
(%set1! bs i))
(lp (- i 1))))))
(define (char-set-filter predicate domain . maybe-base)
(let ((bs (%default-base maybe-base char-set-filter)))
(%char-set-filter! predicate
(%char-set:s/check domain char-set-filter!)
bs
char-set-filter)
(make-char-set bs)))
(define (char-set-filter! predicate domain base-cs)
(%char-set-filter! predicate
(%char-set:s/check domain char-set-filter!)
(%char-set:s/check base-cs char-set-filter!)
char-set-filter!)
base-cs)
;;; {string, char, char-set, char predicate} -> char-set
(define (->char-set x)
(cond ((char-set? x) x)
((string? x) (string->char-set x))
((char? x) (char-set x))
(else (error "->char-set: Not a charset, string or char." x))))
;;; Set algebra
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; The exported ! procs are "linear update" -- allowed, but not required, to
;;; side-effect their first argument when computing their result. In other
;;; words, you must use them as if they were completely functional, just like
;;; their non-! counterparts, and you must additionally ensure that their
;;; first arguments are "dead" at the point of call. In return, we promise a
;;; more efficient result, plus allowing you to always assume char-sets are
;;; unchangeable values.
;;; Apply P to each index and its char code in S: (P I VAL).
;;; Used by the set-algebra ops.
(define (%string-iter p s)
(let lp ((i (- (string-length s) 1)))
(cond ((>= i 0)
(p i (%char->latin1 (string-ref s i)))
(lp (- i 1))))))
;;; String S represents some initial char-set. (OP s i val) does some
;;; kind of s[i] := s[i] op val update. Do
;;; S := S OP CSETi
;;; for all the char-sets in the list CSETS. The n-ary set-algebra ops
;;; all use this internal proc.
(define (%char-set-algebra s csets op proc)
(for-each (lambda (cset)
(let ((s2 (%char-set:s/check cset proc)))
(let lp ((i 255))
(cond ((>= i 0)
(op s i (si s2 i))
(lp (- i 1)))))))
csets))
;;; -- Complement
(define (char-set-complement cs)
(let ((s (%char-set:s/check cs char-set-complement))
(ans (make-string 256)))
(%string-iter (lambda (i v) (%not! ans i v)) s)
(make-char-set ans)))
(define (char-set-complement! cset)
(let ((s (%char-set:s/check cset char-set-complement!)))
(%string-iter (lambda (i v) (%not! s i v)) s))
cset)
;;; -- Union
(define (char-set-union! cset1 . csets)
(%char-set-algebra (%char-set:s/check cset1 char-set-union!)
csets %or! char-set-union!)
cset1)
(define (char-set-union . csets)
(if (pair? csets)
(let ((s (%string-copy (%char-set:s/check (car csets) char-set-union))))
(%char-set-algebra s (cdr csets) %or! char-set-union)
(make-char-set s))
(char-set-copy char-set:empty)))
;;; -- Intersection
(define (char-set-intersection! cset1 . csets)
(%char-set-algebra (%char-set:s/check cset1 char-set-intersection!)
csets %and! char-set-intersection!)
cset1)
(define (char-set-intersection . csets)
(if (pair? csets)
(let ((s (%string-copy (%char-set:s/check (car csets) char-set-intersection))))
(%char-set-algebra s (cdr csets) %and! char-set-intersection)
(make-char-set s))
(char-set-copy char-set:full)))
;;; -- Difference
(define (char-set-difference! cset1 . csets)
(%char-set-algebra (%char-set:s/check cset1 char-set-difference!)
csets %minus! char-set-difference!)
cset1)
(define (char-set-difference cs1 . csets)
(if (pair? csets)
(let ((s (%string-copy (%char-set:s/check cs1 char-set-difference))))
(%char-set-algebra s csets %minus! char-set-difference)
(make-char-set s))
(char-set-copy cs1)))
;;; -- Xor
(define (char-set-xor! cset1 . csets)
(%char-set-algebra (%char-set:s/check cset1 char-set-xor!)
csets %xor! char-set-xor!)
cset1)
(define (char-set-xor . csets)
(if (pair? csets)
(let ((s (%string-copy (%char-set:s/check (car csets) char-set-xor))))
(%char-set-algebra s (cdr csets) %xor! char-set-xor)
(make-char-set s))
(char-set-copy char-set:empty)))
;;; -- Difference & intersection
(define (%char-set-diff+intersection! diff int csets proc)
(for-each (lambda (cs)
(%string-iter (lambda (i v)
(if (not (zero? v))
(cond ((si=1? diff i)
(%set0! diff i)
(%set1! int i)))))
(%char-set:s/check cs proc)))
csets))
(define (char-set-diff+intersection! cs1 cs2 . csets)
(let ((s1 (%char-set:s/check cs1 char-set-diff+intersection!))
(s2 (%char-set:s/check cs2 char-set-diff+intersection!)))
(%string-iter (lambda (i v) (if (zero? v)
(%set0! s2 i)
(if (si=1? s2 i) (%set0! s1 i))))
s1)
(%char-set-diff+intersection! s1 s2 csets char-set-diff+intersection!))
(values cs1 cs2))
(define (char-set-diff+intersection cs1 . csets)
(let ((diff (string-copy (%char-set:s/check cs1 char-set-diff+intersection)))
(int (make-string 256 c0)))
(%char-set-diff+intersection! diff int csets char-set-diff+intersection)
(values (make-char-set diff) (make-char-set int))))
;;;; System character sets
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; These definitions are for Latin-1.
;;;
;;; If your Scheme implementation allows you to mark the underlying strings
;;; as immutable, you should do so -- it would be very, very bad if a client's
;;; buggy code corrupted these constants.
(define char-set:empty (char-set))
(define char-set:full (char-set-complement char-set:empty))
(define char-set:lower-case
(let* ((a-z (ucs-range->char-set #x61 #x7B))
(latin1 (ucs-range->char-set! #xdf #xf7 #t a-z))
(latin2 (ucs-range->char-set! #xf8 #x100 #t latin1)))
(char-set-adjoin! latin2 (%latin1->char #xb5))))
(define char-set:upper-case
(let ((A-Z (ucs-range->char-set #x41 #x5B)))
;; Add in the Latin-1 upper-case chars.
(ucs-range->char-set! #xd8 #xdf #t
(ucs-range->char-set! #xc0 #xd7 #t A-Z))))
(define char-set:title-case char-set:empty)
(define char-set:letter
(let ((u/l (char-set-union char-set:upper-case char-set:lower-case)))
(char-set-adjoin! u/l
(%latin1->char #xaa) ; FEMININE ORDINAL INDICATOR
(%latin1->char #xba)))) ; MASCULINE ORDINAL INDICATOR
(define char-set:digit (string->char-set "0123456789"))
(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF"))
(define char-set:letter+digit
(char-set-union char-set:letter char-set:digit))
(define char-set:punctuation
(let ((ascii (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}"))
(latin-1-chars (map %latin1->char '(#xA1 ; INVERTED EXCLAMATION MARK
#xAB ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
#xAD ; SOFT HYPHEN
#xB7 ; MIDDLE DOT
#xBB ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
#xBF)))) ; INVERTED QUESTION MARK
(list->char-set! latin-1-chars ascii)))
(define char-set:symbol
(let ((ascii (string->char-set "$+<=>^`|~"))
(latin-1-chars (map %latin1->char '(#x00A2 ; CENT SIGN
#x00A3 ; POUND SIGN
#x00A4 ; CURRENCY SIGN
#x00A5 ; YEN SIGN
#x00A6 ; BROKEN BAR
#x00A7 ; SECTION SIGN
#x00A8 ; DIAERESIS
#x00A9 ; COPYRIGHT SIGN
#x00AC ; NOT SIGN
#x00AE ; REGISTERED SIGN
#x00AF ; MACRON
#x00B0 ; DEGREE SIGN
#x00B1 ; PLUS-MINUS SIGN
#x00B4 ; ACUTE ACCENT
#x00B6 ; PILCROW SIGN
#x00B8 ; CEDILLA
#x00D7 ; MULTIPLICATION SIGN
#x00F7)))) ; DIVISION SIGN
(list->char-set! latin-1-chars ascii)))
(define char-set:graphic
(char-set-union char-set:letter+digit char-set:punctuation char-set:symbol))
(define char-set:whitespace
(list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION
#x0A ; LINE FEED
#x0B ; VERTICAL TABULATION
#x0C ; FORM FEED
#x0D ; CARRIAGE RETURN
#x20 ; SPACE
#xA0))))
(define char-set:printing (char-set-union char-set:whitespace char-set:graphic)) ; NO-BREAK SPACE
(define char-set:blank
(list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION
#x20 ; SPACE
#xA0)))) ; NO-BREAK SPACE
(define char-set:iso-control
(ucs-range->char-set! #x7F #xA0 #t (ucs-range->char-set 0 32)))
(define char-set:ascii (ucs-range->char-set 0 128))
;;; Porting & performance-tuning notes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; See the section at the beginning of this file on external dependencies.
;;;
;;; First and foremost, rewrite this code to use bit vectors of some sort.
;;; This will give big speedup and memory savings.
;;;
;;; - LET-OPTIONALS* macro.
;;; This is only used once. You can rewrite the use, port the hairy macro
;;; definition (which is implemented using a Clinger-Rees low-level
;;; explicit-renaming macro system), or port the simple, high-level
;;; definition, which is less efficient.
;;;
;;; - :OPTIONAL macro
;;; Very simply defined using an R5RS high-level macro.
;;;
;;; Implementations that can arrange for the base char sets to be immutable
;;; should do so. (E.g., Scheme 48 allows one to mark a string as immutable,
;;; which can be used to protect the underlying strings.) It would be very,
;;; very bad if a client's buggy code corrupted these constants.
;;;
;;; There is a fair amount of argument checking. This is, strictly speaking,
;;; unnecessary -- the actual body of the procedures will blow up if an
;;; illegal value is passed in. However, the error message will not be as good
;;; as if the error were caught at the "higher level." Also, a very, very
;;; smart Scheme compiler may be able to exploit having the type checks done
;;; early, so that the actual body of the procedures can assume proper values.
;;; This isn't likely; this kind of compiler technology isn't common any
;;; longer.
;;;
;;; The overhead of optional-argument parsing is irritating. The optional
;;; arguments must be consed into a rest list on entry, and then parsed out.
;;; Function call should be a matter of a few register moves and a jump; it
;;; should not involve heap allocation! Your Scheme system may have a superior
;;; non-R5RS optional-argument system that can eliminate this overhead. If so,
;;; then this is a prime candidate for optimising these procedures,
;;; *especially* the many optional BASE-CS parameters.
;;;
;;; Note that optional arguments are also a barrier to procedure integration.
;;; If your Scheme system permits you to specify alternate entry points
;;; for a call when the number of optional arguments is known in a manner
;;; that enables inlining/integration, this can provide performance
;;; improvements.
;;;
;;; There is enough *explicit* error checking that *all* internal operations
;;; should *never* produce a type or index-range error. Period. Feel like
;;; living dangerously? *Big* performance win to be had by replacing string
;;; and record-field accessors and setters with unsafe equivalents in the
;;; code. Similarly, fixnum-specific operators can speed up the arithmetic
;;; done on the index values in the inner loops. The only arguments that are
;;; not completely error checked are
;;; - string lists (complete checking requires time proportional to the
;;; length of the list)
;;; - procedure arguments, such as char->char maps & predicates.
;;; There is no way to check the range & domain of procedures in Scheme.
;;; Procedures that take these parameters cannot fully check their
;;; arguments. But all other types to all other procedures are fully
;;; checked.
;;;
;;; This does open up the alternate possibility of simply *removing* these
;;; checks, and letting the safe primitives raise the errors. On a dumb
;;; Scheme system, this would provide speed (by eliminating the redundant
;;; error checks) at the cost of error-message clarity.
;;;
;;; In an interpreted Scheme, some of these procedures, or the internal
;;; routines with % prefixes, are excellent candidates for being rewritten
;;; in C.
;;;
;;; It would also be nice to have the ability to mark some of these
;;; routines as candidates for inlining/integration.
;;;
;;; See the comments preceding the hash function code for notes on tuning
;;; the default bound so that the code never overflows your implementation's
;;; fixnum size into bignum calculation.
;;;
;;; All the %-prefixed routines in this source code are written
;;; to be called internally to this library. They do *not* perform
;;; friendly error checks on the inputs; they assume everything is
;;; proper. They also do not take optional arguments. These two properties
;;; save calling overhead and enable procedure integration -- but they
;;; are not appropriate for exported routines.
;;; Copyright notice
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology
;;;
;;; This material was developed by the Scheme project at the Massachusetts
;;; Institute of Technology, Department of Electrical Engineering and
;;; Computer Science. Permission to copy and modify this software, to
;;; redistribute either the original software or a modified version, and
;;; to use this software for any purpose is granted, subject to the
;;; following restrictions and understandings.
;;;
;;; 1. Any copy made of this software must include this copyright notice
;;; in full.
;;;
;;; 2. Users of this software agree to make their best efforts (a) to
;;; return to the MIT Scheme project any improvements or extensions that
;;; they make, so that these may be included in future releases; and (b)
;;; to inform MIT of noteworthy uses of this software.
;;;
;;; 3. All materials developed as a consequence of the use of this
;;; software shall duly acknowledge such use, in accordance with the usual
;;; standards of acknowledging credit in academic research.
;;;
;;; 4. MIT has made no warrantee or representation that the operation of
;;; this software will be error-free, and MIT is under no obligation to
;;; provide any services, by way of maintenance, update, or otherwise.
;;;
;;; 5. In conjunction with products arising from the use of this material,
;;; there shall be no use of the name of the Massachusetts Institute of
;;; Technology nor of any adaptation thereof in any advertising,
;;; promotional, or sales literature without prior written consent from
;;; MIT in each case.

1271
scsh/lib/cset-lib.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,52 @@
;;; Support for obsolete, deprecated 0.5.2 char-set procedures.
;;; Will go away in a future release.
(define-interface obsolete-char-set-interface
(export char-set-members ; char-set->list
chars->char-set ; list->char-set
ascii-range->char-set ; ucs-range->char-set (not exact)
predicate->char-set ; char-set-filter (not exact)
->char-set ; no longer handles a predicate
char-set-every? ; char-set-every
char-set-invert ; char-set-complement
char-set-invert! ; char-set-complement!
char-set:alphabetic ; char-set:letter
char-set:numeric ; char-set:digit
char-set:alphanumeric ; char-set:letter+digit
char-set:control)) ; char-set:iso-control
(define-structure obsolete-char-set-lib obsolete-char-set-interface
(open scsh-utilities char-set-lib scheme)
(begin
(define char-set-members
(deprecated-proc char-set->list 'char-set-members
"Use CHAR-SET->LIST instead."))
(define chars->char-set
(deprecated-proc list->char-set 'chars->char-set
"Use LIST->CHAR-SET instead."))
(define ascii-range->char-set
(deprecated-proc (lambda (lower upper) (ucs-range->char-set lower upper #t))
'ascii-range->char-set
"Use UCS-RANGE->CHAR-SET instead."))
(define predicate->char-set
(deprecated-proc (lambda (pred) (char-set-filter pred char-set:full))
'predicate->char-set
"Change code to use CHAR-SET-FILTER."))
(define char-set-every?
(deprecated-proc char-set-every 'char-set-every?
"Use CHAR-SET-EVERYyn instead."))
(define char-set-invert
(deprecated-proc char-set-complement 'char-set-invert
"Use CHAR-SET-COMPLEMENTyn instead."))
(define char-set-invert!
(deprecated-proc char-set-complement! 'char-set-invert!
"Use CHAR-SET-COMPLEMENT!yn instead."))
(define char-set:alphabetic char-set:letter)
(define char-set:numeric char-set:digit)
(define char-set:alphanumeric char-set:letter+digit)
(define char-set:control char-set:iso-control)))

151
scsh/lib/cset-package.scm Normal file
View File

@ -0,0 +1,151 @@
;;; SRFI-14 interface for Scheme48 -*- Scheme -*-
;;;
;;; Complete interface spec for the SRFI-14 char-set-lib library in the
;;; Scheme48 interface and module language. The interface is fully typed, in
;;; the Scheme48 type notation. The structure definitions also provide a
;;; formal description of the external dependencies of the source code.
(define-interface char-set-interface
(export (char-set? (proc (:value) :boolean))
((char-set= char-set<=) (proc (&rest :value) :boolean))
(char-set-hash (proc (:value &opt :exact-integer) :exact-integer))
;; Cursors are exact integers in the reference implementation.
;; These typings would be different with a different cursor
;; implementation.
;; Too bad Scheme doesn't have abstract data types.
(char-set-cursor (proc (:value) :exact-integer))
(char-set-ref (proc (:value :exact-integer) :char))
(char-set-cursor-next (proc (:value :exact-integer) :exact-integer))
(end-of-char-set? (proc (:value) :boolean))
(char-set-fold (proc ((proc (:char :value) :value) :value :value)
:value))
(char-set-unfold (proc ((proc (:value) :boolean)
(proc (:value) :value)
(proc (:value) :value)
:value
&opt :value)
:value))
(char-set-unfold! (proc ((proc (:value) :boolean)
(proc (:value) :value)
(proc (:value) :value)
:value :value)
:value))
(char-set-for-each (proc ((proc (:char) :values) :value) :unspecific))
(char-set-map (proc ((proc (:char) :char) :value) :value))
(char-set-copy (proc (:value) :value))
(char-set (proc (&rest :char) :value))
(list->char-set (proc (:value &opt :value) :value))
(list->char-set! (proc (:value :value) :value))
(string->char-set (proc (:value &opt :value) :value))
(string->char-set! (proc (:value :value) :value))
(ucs-range->char-set (proc (:exact-integer :exact-integer &opt
:boolean :value)
:value))
(ucs-range->char-set! (proc (:exact-integer :exact-integer
:boolean :value)
:value))
(char-set-filter (proc ((proc (:char) :boolean) :value &opt :value) :value))
(char-set-filter! (proc ((proc (:char) :boolean) :value :value) :value))
(->char-set (proc (:value) :value))
(char-set-size (proc (:value) :exact-integer))
(char-set-count (proc ((proc (:char) :boolean) :value) :exact-integer))
(char-set-contains? (proc (:char :value) :boolean))
(char-set-every (proc ((proc (:char) :boolean) :value) :boolean))
(char-set-any (proc ((proc (:char) :boolean) :value) :value))
((char-set-adjoin char-set-delete
char-set-adjoin! char-set-delete!)
(proc (:value &rest :char) :value))
(char-set->list (proc (:value) :value))
(char-set->string (proc (:value) :string))
(char-set-complement (proc (:value) :value))
((char-set-union char-set-intersection char-set-xor)
(proc (&rest :value) :value))
(char-set-difference (proc (:value &opt :value) :value))
(char-set-diff+intersection (proc (:value &rest :value)
(some-values :value :value)))
(char-set-complement! (proc (:value) :value))
((char-set-union! char-set-intersection!
char-set-xor! char-set-difference!)
(proc (:value &opt :value) :value))
(char-set-diff+intersection! (proc (:value :value &rest :value)
(some-values :value :value)))
char-set:lower-case
char-set:upper-case
char-set:letter
char-set:digit
char-set:letter+digit
char-set:graphic
char-set:printing
char-set:whitespace
char-set:blank
char-set:iso-control
char-set:punctuation
char-set:symbol
char-set:hex-digit
char-set:ascii
char-set:empty
char-set:full
))
; rdelim.scm gets into the innards of char-sets.
(define-interface scsh-char-set-low-level-interface
(export (char-set:s (proc (:value) :string))))
(define-structures ((char-set-lib char-set-interface)
(scsh-char-set-low-level-lib scsh-char-set-low-level-interface))
(open error-package ; ERROR procedure
let-opt ; LET-OPTIONALS* and :OPTIONAL
ascii ; CHAR->ASCII ASCII->CHAR
bitwise ; BITWISE-AND
jar-d-r-t-package ; DEFINE-RECORD-TYPE/JAR macro.
scheme)
(begin (define (check-arg pred val caller)
(let lp ((val val))
(if (pred val) val (lp (error "Bad argument" val pred caller)))))
(define %latin1->char ascii->char) ; Works for S48
(define %char->latin1 char->ascii) ; Works for S48
;; Here's a SRFI-19 d-r-t defined in terms of jar's almost-identical
;; d-r-t.
(define-syntax define-record-type
(syntax-rules ()
((define-record-type ?name ?stuff ...)
(define-record-type/jar ?name ?name ?stuff ...)))))
(files cset-lib)
(optimize auto-integrate))
;;; Import jar's DEFINE-RECORD-TYPE macro, and export it under the
;;; name DEFINE-RECORD-TYPE/JAR.
(define-structure jar-d-r-t-package (export (define-record-type/jar :syntax))
(open define-record-types ; JAR's record macro
scheme)
(begin (define-syntax define-record-type/jar
(syntax-rules ()
((define-record-type/jar ?stuff ...)
(define-record-type ?stuff ...))))))

200
scsh/lib/cset-tests.scm Normal file
View File

@ -0,0 +1,200 @@
;;; This is a regression testing suite for the SRFI-14 char-set library.
;;; Olin Shivers
(let-syntax ((test (syntax-rules ()
((test form ...)
(cond ((not form) (error "Test failed" 'form)) ...
(else 'OK))))))
(let ((vowel (lambda (c) (member c '(#\a #\e #\i #\o #\u)))))
(test
(not (char-set? 5))
(char-set? (char-set #\a #\e #\i #\o #\u))
(char-set=)
(char-set= (char-set))
(char-set= (char-set #\a #\e #\i #\o #\u)
(string->char-set "ioeauaiii"))
(not (char-set= (char-set #\e #\i #\o #\u)
(string->char-set "ioeauaiii")))
(char-set<=)
(char-set<= (char-set))
(char-set<= (char-set #\a #\e #\i #\o #\u)
(string->char-set "ioeauaiii"))
(char-set<= (char-set #\e #\i #\o #\u)
(string->char-set "ioeauaiii"))
(<= 0 (char-set-hash char-set:graphic 100) 99)
(= 4 (char-set-fold (lambda (c i) (+ i 1)) 0
(char-set #\e #\i #\o #\u #\e #\e)))
(char-set= (string->char-set "eiaou2468013579999")
(char-set-unfold null? car cdr '(#\a #\e #\i #\o #\u #\u #\u)
char-set:digit))
(char-set= (string->char-set "eiaou246801357999")
(char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u)
(string->char-set "0123456789")))
(not (char-set= (string->char-set "eiaou246801357")
(char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u)
(string->char-set "0123456789"))))
(let ((cs (string->char-set "0123456789")))
(char-set-for-each (lambda (c) (set! cs (char-set-delete cs c)))
(string->char-set "02468000"))
(char-set= cs (string->char-set "97531")))
(not (let ((cs (string->char-set "0123456789")))
(char-set-for-each (lambda (c) (set! cs (char-set-delete cs c)))
(string->char-set "02468"))
(char-set= cs (string->char-set "7531"))))
(char-set= (char-set-map char-upcase (string->char-set "aeiou"))
(string->char-set "IOUAEEEE"))
(not (char-set= (char-set-map char-upcase (string->char-set "aeiou"))
(string->char-set "OUAEEEE")))
(char-set= (char-set-copy (string->char-set "aeiou"))
(string->char-set "aeiou"))
(char-set= (char-set #\x #\y) (string->char-set "xy"))
(not (char-set= (char-set #\x #\y #\z) (string->char-set "xy")))
(char-set= (string->char-set "xy") (list->char-set '(#\x #\y)))
(not (char-set= (string->char-set "axy") (list->char-set '(#\x #\y))))
(char-set= (string->char-set "xy12345")
(list->char-set '(#\x #\y) (string->char-set "12345")))
(not (char-set= (string->char-set "y12345")
(list->char-set '(#\x #\y) (string->char-set "12345"))))
(char-set= (string->char-set "xy12345")
(list->char-set! '(#\x #\y) (string->char-set "12345")))
(not (char-set= (string->char-set "y12345")
(list->char-set! '(#\x #\y) (string->char-set "12345"))))
(char-set= (string->char-set "aeiou12345")
(char-set-filter vowel? char-set:ascii (string->char-set "12345")))
(not (char-set= (string->char-set "aeou12345")
(char-set-filter vowel? char-set:ascii (string->char-set "12345"))))
(char-set= (string->char-set "aeiou12345")
(char-set-filter! vowel? char-set:ascii (string->char-set "12345")))
(not (char-set= (string->char-set "aeou12345")
(char-set-filter! vowel? char-set:ascii (string->char-set "12345"))))
(char-set= (string->char-set "abcdef12345")
(ucs-range->char-set 97 103 #t (string->char-set "12345")))
(not (char-set= (string->char-set "abcef12345")
(ucs-range->char-set 97 103 #t (string->char-set "12345"))))
(char-set= (string->char-set "abcdef12345")
(ucs-range->char-set! 97 103 #t (string->char-set "12345")))
(not (char-set= (string->char-set "abcef12345")
(ucs-range->char-set! 97 103 #t (string->char-set "12345"))))
(char-set= (->char-set #\x)
(->char-set "x")
(->char-set (char-set #\x)))
(not (char-set= (->char-set #\x)
(->char-set "y")
(->char-set (char-set #\x))))
(= 10 (char-set-size (char-set-intersection char-set:ascii char-set:digit)))
(= 5 (char-set-count vowel? char-set:ascii))
(equal? '(#\x) (char-set->list (char-set #\x)))
(not (equal? '(#\X) (char-set->list (char-set #\x))))
(equal? "x" (char-set->string (char-set #\x)))
(not (equal? "X" (char-set->string (char-set #\x))))
(char-set-contains? (->char-set "xyz") #\x)
(not (char-set-contains? (->char-set "xyz") #\a))
(char-set-every char-lower-case? (->char-set "abcd"))
(not (char-set-every char-lower-case? (->char-set "abcD")))
(char-set-any char-lower-case? (->char-set "abcd"))
(not (char-set-any char-lower-case? (->char-set "ABCD")))
(char-set= (->char-set "ABCD")
(let ((cs (->char-set "abcd")))
(let lp ((cur (char-set-cursor cs)) (ans '()))
(if (end-of-char-set? cur) (list->char-set ans)
(lp (char-set-cursor-next cs cur)
(cons (char-upcase (char-set-ref cs cur)) ans))))))
(char-set= (char-set-adjoin (->char-set "123") #\x #\a)
(->char-set "123xa"))
(not (char-set= (char-set-adjoin (->char-set "123") #\x #\a)
(->char-set "123x")))
(char-set= (char-set-adjoin! (->char-set "123") #\x #\a)
(->char-set "123xa"))
(not (char-set= (char-set-adjoin! (->char-set "123") #\x #\a)
(->char-set "123x")))
(char-set= (char-set-delete (->char-set "123") #\2 #\a #\2)
(->char-set "13"))
(not (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2)
(->char-set "13a")))
(char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2)
(->char-set "13"))
(not (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2)
(->char-set "13a")))
(char-set= (char-set-intersection char-set:hex-digit (char-set-complement char-set:digit))
(->char-set "abcdefABCDEF"))
(char-set= (char-set-intersection! (char-set-complement! (->char-set "0123456789"))
char-set:hex-digit)
(->char-set "abcdefABCDEF"))
(char-set= (char-set-union char-set:hex-digit
(->char-set "abcdefghijkl"))
(->char-set "abcdefABCDEFghijkl0123456789"))
(char-set= (char-set-union! (->char-set "abcdefghijkl")
char-set:hex-digit)
(->char-set "abcdefABCDEFghijkl0123456789"))
(char-set= (char-set-difference (->char-set "abcdefghijklmn")
char-set:hex-digit)
(->char-set "ghijklmn"))
(char-set= (char-set-difference! (->char-set "abcdefghijklmn")
char-set:hex-digit)
(->char-set "ghijklmn"))
(char-set= (char-set-xor (->char-set "0123456789")
char-set:hex-digit)
(->char-set "abcdefABCDEF"))
(char-set= (char-set-xor! (->char-set "0123456789")
char-set:hex-digit)
(->char-set "abcdefABCDEF"))
(call-with-values (lambda ()
(char-set-diff+intersection char-set:hex-digit
char-set:letter))
(lambda (d i)
(and (char-set= d (->char-set "0123456789"))
(char-set= i (->char-set "abcdefABCDEF")))))
(call-with-values (lambda ()
(char-set-diff+intersection! (char-set-copy char-set:hex-digit)
(char-set-copy char-set:letter)))
(lambda (d i)
(and (char-set= d (->char-set "0123456789"))
(char-set= i (->char-set "abcdefABCDEF"))))))
))

View File

@ -16,6 +16,11 @@
;;; This implementation is intended as a portable reference implementation
;;; for SRFI-1. See the porting notes below for more information.
;;; Revision history
;;;;;;;;;;;;;;;;;;;;
;;; This is version 1.1. 12/18/2000
;;; Fixes a small bug in DELETE-DUPLICATES!.
;;; Exported:
;;; xcons tree-copy make-list list-tabulate cons* list-copy
;;; proper-list? circular-list? dotted-list? not-pair? null-list? list=
@ -384,7 +389,7 @@
(define (null-list? l)
(cond ((pair? l) #f)
((null? l) #t)
(else (error "null-pair?: argument out of domain" l))))
(else (error "null-list?: argument out of domain" l))))
(define (list= = . lists)
@ -1239,7 +1244,7 @@
(new-tail (recur (delete x tail elt=))))
(if (eq? tail new-tail) lis (cons x new-tail)))))))
(define (delete-duplicates! lis maybe-=)
(define (delete-duplicates! lis . maybe-=)
(let ((elt= (:optional maybe-= equal?)))
(check-arg procedure? elt= delete-duplicates!)
(let recur ((lis lis))

View File

@ -1,4 +1,4 @@
<!doctype html public '-//W3C//DTD HTML 4.0//EN'
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN"
'http://www.w3.org/TR/REC-html40/strict.dtd'>
<!-- Is there a portable way to write an em-dash?
@ -36,9 +36,15 @@
div.indent { margin-left: 2em; } /* General indentation */
pre.code-example { margin-left: 2em; } /* Indent code examples. */
/* "Continue" class marks text that isn't really the start
** of a new paragraph -- e.g., continuing a para after a
** code sample.
*/
p.continue { text-indent: 0em; margin-top: 0em}
/* This stuff is for definition lists of defined procedures.
** A proc-def2 is used when you want a stack of procs to go
** with one <dd> ... </dd> body. In this case, make the first
** with one dd body. In this case, make the first
** proc a proc-def1, following ones proc-defi's, and the last one
** a proc-defn.
**
@ -101,7 +107,7 @@
a.draft { color: red; }
</style>
<style type="text/css"; media=all>
<style type="text/css" media=all>
/* Nastiness: Here, I'm using a bug to work around a bug.
** Netscape rendering bugs mean you need bogus <dt> and <dd>
** margin settings -- settings which screw up IE's proper rendering.
@ -140,12 +146,28 @@ List Library
</div>
<!--========================================================================-->
<h1>Author</H1>
<H1>Author</H1>
<p>
Olin Shivers
<address>
<a href="http://www.ai.mit.edu/~shivers/">Olin Shivers</A> /
<a href="mailto:shivers@ai.mit.edu">shivers@ai.mit.edu</A>
<a href="http://www.ai.mit.edu/~shivers/">http://www.ai.mit.edu/~shivers/</A> /
<a href="mailto:shivers@ai.mit.edu">shivers@ai.mit.edu</A>
</address>
<!--========================================================================-->
<H1>Status</H1>
<p>
This SRFI is currently in ``final status. To see an explanation of each status that a SRFI can hold, see <A HREF="http://srfi.schemers.org/srfi-process.html">here</A>.
You can access the discussion via <A HREF=mail-archive/maillist.html>the archive of the mailing list</A>.
<P>
<UL>
<LI>Received: 1998/11/08</LI>
<LI>Draft: 1998/12/22-1999/03/09</LI>
<LI>Revised: several times</LI>
<LI>Final: 1999/10/09</LI>
</UL>
<!--========================================================================-->
<h1>Table of contents</H1>
@ -154,7 +176,7 @@ List Library
-->
<ul id=toc-table>
<li><a href="#Abstract">Abstract</a>
<li><a href="#Introduction">Introduction</a>
<li><a href="#Rationale">Rationale</a>
<li><a href="#ProcedureIndex">Procedure index</a>
<li><a href="#GeneralDiscussion">General discussion</a>
<ul>
@ -196,14 +218,8 @@ reference implementation of the spec. The reference implementation is
<li>completely open, public-domain source
</ul>
<strong><em>Note: This is a working draft, and tends to lag the plain-text version in terms of actual content.
See <a href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt">
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt</a>
for the latest copy of the plain-text draft.
</em></strong>
<!--========================================================================-->
<h1><a name="Introduction">Introduction</a></h1>
<h1><a name="Rationale">Rationale</a></h1>
<p>
The set of basic list and pair operations provided by R4RS/<abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr> Scheme is far
from satisfactory. Because this set is so small and basic, most
@ -278,9 +294,9 @@ library and get good results with it.
<h1><a name="ProcedureIndex">Procedure Index</a></h1>
<p>
Here is a short list of the procedures provided by the list-lib package.
<a href="#R5RS">R5RS</a></abbr> procedures are shown in
<span class=r5rs-proc>bold</span class=r5rs-proc>;
extended <a href="#R5RS">R5RS</a></abbr>
<abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr> procedures are shown in
<span class=r5rs-proc>bold</span>;
extended <abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr>
procedures, in <span class=r5rs-procx>bold italic</span>.
<div class=indent>
<dl>
@ -1033,9 +1049,10 @@ partition the entire universe of Scheme values.
<!--
==== car cdr
============================================================================-->
<dt class=proc-def1>
<a name="car"></a>
<a name="cdr"></a>
<dt class=proc-def1><code class=proc-def>car</code><var> pair -&gt; value</var>
<code class=proc-def>car</code><var> pair -&gt; value</var>
<dt class=proc-defn><code class=proc-def>cdr</code><var> pair -&gt; value</var>
<dd class=proc-def>
[<abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr>]
@ -2786,6 +2803,7 @@ That is, it must be the case that
<div class=indent>
<code>(eq? <var>x</var> <var>y</var>)</code> => <code>(<var>=</var> <var>x</var> <var>y</var>)</code>.
</div>
<p class=continue>
Note that this implies, in turn, that two lists that are <code>eq?</code> are
also set-equal by any legal comparison procedure. This allows for
constant-time determination of set operations on <code>eq?</code> lists.
@ -3142,7 +3160,7 @@ John David Stone, and Joerg F. Wittenberger. I am grateful to them for their
assistance.
<p>
I am also grateful the authors, implementors and documentors of all the systems
mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted
mentioned in the rationale. Aubrey Jaffer and Kent Pitman should be noted
for their work in producing Web-accessible versions of the R5RS and
<a href="#CommonLisp">Common Lisp</a> spec, which was a tremendous aid.
<p>
@ -3156,31 +3174,15 @@ results, of course.
<dl>
<dt class=biblio>This document, in HTML:
<dd><a href="http://srfi.schemers.org/srfi-1/srfi-1.html">
<dd><a href="srfi-1.html">
http://srfi.schemers.org/srfi-1/srfi-1.html</a>
<br><a class=draft
href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html">
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html (draft)</a>
<dt class=biblio>This document, in simple text format:
<dd><a href="http://srfi.schemers.org/srfi-1/srfi-1.txt">
http://srfi.schemers.org/srfi-1/srfi-1.txt</a>
<br><a class=draft
href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt">
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt (draft)</a>
<dt class=biblio>Source code for the reference implementation:
<dd><a HREF="http://srfi.schemers.org/srfi-1/srfi-1-reference.scm">
<dd><a HREF="srfi-1-reference.scm">
http://srfi.schemers.org/srfi-1/srfi-1-reference.scm</a>
<br><a class=draft
href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm">
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm (draft)</a>
<dt class=biblio>Archive of SRFI-1 discussion-list email:
<dd><a href="http://srfi.schemers.org/srfi-1/mail-archive/maillist.html">
<dd><a href="mail-archive/maillist.html">
http://srfi.schemers.org/srfi-1/mail-archive/maillist.html</a>
<dt class=biblio>SRFI web site:

350
scsh/lib/string-package.scm Normal file
View File

@ -0,0 +1,350 @@
;;; Complete interface spec for the SRFI-13 string-lib and -*- Scheme -*-
;;; string-lib-internals libraries in the Scheme48 interface
;;; and module language. The interfaces are fully typed, in
;;; the Scheme48 type notation. The structure definitions also
;;; provide a formal description of the external dependencies
;;; of the source code.
;;; string-lib
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; string-map string-map!
;;; string-fold string-unfold
;;; string-fold-right string-unfold-right
;;; string-tabulate string-for-each string-for-each-index
;;; string-every string-any
;;; string-hash string-hash-ci
;;; string-compare string-compare-ci
;;; string= string< string> string<= string>= string<>
;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
;;; string-downcase string-upcase string-titlecase
;;; string-downcase! string-upcase! string-titlecase!
;;; string-take string-take-right
;;; string-drop string-drop-right
;;; string-pad string-pad-right
;;; string-trim string-trim-right string-trim-both
;;; string-filter string-delete
;;; string-index string-index-right
;;; string-skip string-skip-right
;;; string-count
;;; string-prefix-length string-prefix-length-ci
;;; string-suffix-length string-suffix-length-ci
;;; string-prefix? string-prefix-ci?
;;; string-suffix? string-suffix-ci?
;;; string-contains string-contains-ci
;;; string-fill! string-copy!
;;; string-copy substring/shared
;;; string-reverse string-reverse! reverse-list->string
;;; string->list
;;; string-concatenate string-concatenate/shared
;;; string-concatenate-reverse string-concatenate-reverse/shared
;;; string-append/shared
;;; xsubstring string-xcopy!
;;; string-null?
;;; string-join
;;; string-tokenize
;;; string-replace
;;;
;;; string? make-string string string-length string-ref string-set!
;;; string-append list->string
;;;
;;; make-kmp-restart-vector string-kmp-partial-search kmp-step
;;; string-parse-start+end
;;; string-parse-final-start+end
;;; let-string-start+end
;;; check-substring-spec
;;; substring-spec-ok?
(define-interface string-lib-interface
(export
;; string-map proc s [start end] -> s
(string-map (proc ((proc (:char) :char)
:string
&opt :exact-integer :exact-integer)
:string))
;; string-map! proc s [start end] -> unspecific
(string-map! (proc ((proc (:char) :values)
:string
&opt :exact-integer :exact-integer)
:unspecific))
;; string-fold kons knil s [start end] -> value
;; string-fold-right kons knil s [start end] -> value
((string-fold string-fold-right)
(proc ((proc (:char :value) :value)
:value :string
&opt :exact-integer :exact-integer)
:value))
;; string-unfold p f g seed [base make-final] -> string
;; string-unfold-right p f g seed [base make-final] -> string
((string-unfold string-unfold)
(proc ((proc (:value) :boolean)
(proc (:value) :char)
(proc (:value) :value)
:value
&opt :string (proc (:value) :string))
:string))
; Enough is enough.
; ;; string-unfoldn p f g seed ... -> string
; (string-unfoldn (proc ((procedure :values :boolean)
; (procedure :values :char)
; (procedure :values :values)
; &rest :value)
; :string))
;; string-tabulate proc len -> string
(string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer)
:string))
;; string-for-each proc s [start end] -> unspecific
;; string-for-each-index proc s [start end] -> unspecific
((string-for-each string-for-each-index)
(proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer)
:unspecific))
;; string-every pred s [start end]
;; string-any pred s [start end]
(string-every
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
:boolean))
(string-any
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
:value))
;; string-hash s [bound start end]
;; string-hash-ci s [bound start end]
((string-hash string-hash-ci)
(proc (:string &opt :exact-integer :exact-integer :exact-integer)
:exact-integer))
;; string-compare string1 string2 lt-proc eq-proc gt-proc [start end]
;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc [start end]
((string-compare string-compare-ci)
(proc (:string :string (proc (:exact-integer) :values)
(proc (:exact-integer) :values)
(proc (:exact-integer) :values)
&opt :exact-integer :exact-integer)
:values))
;; string< string1 string2 [start1 end1 start2 end2]
((string= string< string> string<= string>= string<>
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>)
(proc (:string :string &opt :exact-integer :exact-integer
:exact-integer :exact-integer)
:boolean))
;; string-titlecase string [start end]
;; string-upcase string [start end]
;; string-downcase string [start end]
;; string-titlecase! string [start end]
;; string-upcase! string [start end]
;; string-downcase! string [start end]
((string-titlecase string-upcase string-downcase)
(proc (:string &opt :exact-integer :exact-integer) :string))
((string-titlecase! string-upcase! string-downcase!)
(proc (:string &opt :exact-integer :exact-integer) :unspecific))
;; string-take string nchars
;; string-drop string nchars
;; string-take-right string nchars
;; string-drop-right string nchars
((string-take string-drop string-take-right string-drop-right)
(proc (:string :exact-integer) :string))
;; string-pad string k [char start end]
;; string-pad-right string k [char start end]
((string-pad string-pad-right)
(proc (:string :exact-integer &opt :char :exact-integer :exact-integer)
:string))
;; string-trim string [char/char-set/pred start end]
;; string-trim-right string [char/char-set/pred start end]
;; string-trim-both string [char/char-set/pred start end]
((string-trim string-trim-right string-trim-both)
(proc (:string &opt :value :exact-integer :exact-integer)
:string))
;; string-filter char/char-set/pred string [start end]
;; string-delete char/char-set/pred string [start end]
((string-filter string-delete)
(proc (:value :string &opt :exact-integer :exact-integer) :string))
;; string-index string char/char-set/pred [start end]
;; string-index-right string char/char-set/pred [end start]
;; string-skip string char/char-set/pred [start end]
;; string-skip-right string char/char-set/pred [end start]
((string-index string-index-right string-skip string-skip-right)
(proc (:string :value &opt :exact-integer :exact-integer)
:value))
;; string-count string char/char-set/pred [start end]
(string-count (proc (:string :value &opt :exact-integer :exact-integer)
:exact-integer))
;; string-prefix-length string1 string2 [start1 end1 start2 end2]
;; string-suffix-length string1 string2 [start1 end1 start2 end2]
;; string-prefix-length-ci string1 string2 [start1 end1 start2 end2]
;; string-suffix-length-ci string1 string2 [start1 end1 start2 end2]
((string-prefix-length string-prefix-length-ci
string-suffix-length string-suffix-length-ci)
(proc (:string :string &opt
:exact-integer :exact-integer :exact-integer :exact-integer)
:exact-integer))
;; string-prefix? string1 string2 [start1 end1 start2 end2]
;; string-suffix? string1 string2 [start1 end1 start2 end2]
;; string-prefix-ci? string1 string2 [start1 end1 start2 end2]
;; string-suffix-ci? string1 string2 [start1 end1 start2 end2]
((string-prefix? string-prefix-ci?
string-suffix? string-suffix-ci?)
(proc (:string :string &opt
:exact-integer :exact-integer :exact-integer :exact-integer)
:boolean))
;; string-contains string pattern [s-start s-end p-start p-end]
;; string-contains-ci string pattern [s-start s-end p-start p-end]
((string-contains string-contains-ci)
(proc (:string :string &opt :exact-integer :exact-integer
:exact-integer :exact-integer)
:value))
;; string-fill! string char [start end]
(string-fill! (proc (:string :char &opt :exact-integer :exact-integer)
:unspecific))
;; string-copy! to tstart from [fstart fend]
(string-copy! (proc (:string :exact-integer :string
&opt :exact-integer :exact-integer)
:unspecific))
;; string-copy s [start end] -> string
;; substring/shared s start [end] -> string
(string-copy (proc (:string &opt :exact-integer :exact-integer) :string))
(substring/shared (proc (:string :exact-integer &opt :exact-integer) :string))
;; string-reverse s [start end]
;; string-reverse! s [start end]
(string-reverse (proc (:string &opt :exact-integer :exact-integer) :string))
(string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific))
;; reverse-list->string char-list
;; string->list s [start end]
;; string-concatenate string-list
;; string-concatenate/shared string-list
;; string-append/shared s ...
(reverse-list->string (proc (:value) :string))
(string->list (proc (:string &opt :exact-integer :exact-integer) :value))
((string-concatenate string-concatenate/shared) (proc (:value) :string))
(string-append/shared (proc (&rest :string) :string))
;; string-concatenate-reverse string-list [final-string end]
;; string-concatenate-reverse/shared string-list [final-string end]
((string-concatenate-reverse string-concatenate-reverse/shared)
(proc (:value &opt :string :exact-integer) :string))
;; xsubstring s from [to start end]
;; string-xcopy! target tstart s from [to start end]
(xsubstring (proc (:string :exact-integer &opt
:exact-integer :exact-integer :exact-integer)
:string))
(string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt
:exact-integer :exact-integer :exact-integer)
:unspecific))
;; string-null? s
(string-null? (proc (:string) :boolean))
;; string-join string-list [delim grammar]
(string-join (proc (:value &opt :string :symbol) :string))
;; string-tokenize string [token-chars start end]
(string-tokenize (proc (:string &opt :value :exact-integer :exact-integer)
:value))
;; string-replace s1 s2 start1 end1 [start2 end2]
(string-replace (proc (:string :string :exact-integer :exact-integer
&opt :exact-integer :exact-integer)
:string))
;; Here are the R4RS/R5RS procs
(string? (proc (:value) :boolean))
(make-string (proc (:exact-integer &opt :char) :string))
(string (proc (&rest :char) :string))
(string-length (proc (:string) :exact-integer))
(string-ref (proc (:string :exact-integer) :char))
(string-set! (proc (:string :exact-integer :char) :unspecific))
(string-append (proc (&rest :string) :string))
(list->string (proc (:value) :string))
;; These are the R4RS types for STRING-COPY, STRING-FILL!, and
;; STRING->LIST. The string-lib types are different -- extended.
;(string-copy (proc (:string) :string))
;(string-fill! (proc (:string :char) :unspecific))
;(string->list (proc (:string) :value))
))
;;; make-kmp-restart-vector
;;; string-kmp-partial-search
;;; kmp-step
;;; string-parse-start+end
;;; string-parse-final-start+end
;;; let-string-start+end
;;; check-substring-spec
;;; substring-spec-ok?
(define-interface string-lib-internals-interface
(export
(let-string-start+end :syntax)
(string-parse-start+end (proc ((procedure :values :values) :string :value)
(some-values :exact-integer :exact-integer :value)))
(string-parse-final-start+end (proc ((procedure :values :values) :string :value)
(some-values :exact-integer :exact-integer)))
(check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer)
:unspecific))
(substring-spec-ok? (proc ((procedure :values :values) :string :exact-integer :exact-integer)
:boolean))
;; string-kmp-partial-search pat rv s i [c= p-start s-start s-end] -> integer
(string-kmp-partial-search (proc (:string :vector :string :exact-integer
&opt (proc (:char :char) :boolean)
:exact-integer :exact-integer :exact-integer)
:exact-integer))
;; make-kmp-restart-vector s [c= start end] -> vector
(make-kmp-restart-vector (proc (:string &opt (proc (:char :char) :boolean)
:exact-integer :exact-integer)
:vector))
;; kmp-step pat rv c i c= p-start -> integer
(kmp-step (proc (:string :vector :char :exact-integer
(proc (:char :char) :boolean)
:exact-integer)
:exact-integer))
))
(define-structures ((string-lib string-lib-interface)
(string-lib-internals string-lib-internals-interface))
(access scheme) ; Get at R5RS SUBSTRING
(open receiving ; RECEIVE
char-set-lib ; Various
bitwise ; BITWISE-AND for hashing
error-package ; ERROR
let-opt ; LET-OPTIONALS* :OPTIONAL
scheme)
;; A few cheesy S48/scsh definitions for string-lib dependencies:
(begin (define (check-arg pred val caller)
(let lp ((val val))
(if (pred val) val (lp (error "Bad argument" val pred caller)))))
;; These two internal procedures are correctly defined for ASCII or
;; Latin-1. They are *not* correct for Unicode.
(define (char-cased? c) (char-set-contains? char-set:letter c))
(define (char-titlecase c) (char-upcase c)))
(files string-lib))

View File

@ -52,7 +52,7 @@
;;; line two. Return these as a list of strings.
(define read-files-secondary-args
(let ((non-newline (char-set-invert (char-set #\newline))))
(let ((non-newline (char-set-complement! (char-set #\newline))))
(lambda (fname)
(call-with-input-file fname
(lambda (port)

View File

@ -198,7 +198,7 @@
define-record-types
defrec-package
receiving
char-set-package
char-set-lib
error-package
ascii
primitives ; JMG add-finalizer!
@ -237,7 +237,7 @@
(open re-internals
conditionals
re-level-0
char-set-package
char-set-lib
scsh-utilities ; fold
error-package
ascii
@ -253,7 +253,7 @@
(define-structure rx-syntax rx-syntax-interface
(open re-level-0
char-set-package
char-set-lib
rx-lib
scheme)
(for-syntax (open sre-internal-syntax-tools scheme))

View File

@ -74,8 +74,8 @@
;;; Two useful standard char sets
(define nonl-chars (char-set-invert (char-set #\newline)))
(define word-chars (char-set-union (char-set #\_) char-set:alphanumeric))
(define nonl-chars (char-set-complement (char-set #\newline)))
(define word-chars (char-set-union (char-set #\_) char-set:letter+digit))
;;; Little utility that should be moved to scsh's utilities.scm
(define (partition pred lis)
@ -232,8 +232,8 @@
(map parse-char-class (cdr sre))
r))
(cs (if (char-set? cs)
(char-set-invert cs)
`(,(r 'char-set-invert) ,cs))))
(char-set-complement cs)
`(,(r 'char-set-complement) ,cs))))
(if cset? cs (make-re-char-set cs))))
((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
@ -281,15 +281,15 @@
((nonl) nonl-chars)
((lower-case lower) char-set:lower-case)
((upper-case upper) char-set:upper-case)
((alphabetic alpha) char-set:alphabetic)
((numeric digit num) char-set:numeric)
((alphanumeric alnum alphanum) char-set:alphanumeric)
((alphabetic alpha) char-set:letter)
((numeric digit num) char-set:digit)
((alphanumeric alnum alphanum) char-set:letter+digit)
((punctuation punct) char-set:punctuation)
((graphic graph) char-set:graphic)
((blank) char-set:blank)
((whitespace space white) char-set:whitespace)
((printing print) char-set:printing)
((control cntrl) char-set:control)
((control cntrl) char-set:iso-control)
((hex-digit xdigit hex) char-set:hex-digit)
((ascii) char-set:ascii)
(else (error "Illegal regular expression" sre)))))
@ -346,10 +346,9 @@
(if (< i 0)
(if cs? cset (uncase-char-set cset)) ; Case fold if necessary.
(lp (- i 2)
(char-set-union!
cset
(ascii-range->char-set (char->ascii (string-ref specs (- i 1)))
(+ 1 (char->ascii (string-ref specs i)))))))))))
(ucs-range->char-set! (char->ascii (string-ref specs (- i 1)))
(+ 1 (char->ascii (string-ref specs i)))
#f cset)))))))
;;; (regexp->scheme re r)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -451,17 +450,17 @@
(else #f)))
(if one
(switch char-set= cs
((char-set:alphanumeric) alphanum)
((char-set:letter+digit) alphanum)
((char-set:graphic) graph)
((char-set:hex-digit) hex)
(else #f))
(and (char-set= cs char-set:alphabetic) alpha)))
(and (char-set= cs char-set:letter) alpha)))
(and (char-set= cs char-set:lower-case) lower)) ; a, not A
(if biga
(and (not space) (char-set= cs char-set:upper-case) upper)
(if one
(and (not space) (char-set= cs char-set:numeric) num)
(and (not space) (char-set= cs char-set:digit) num)
(if space
(switch char-set= cs
((char-set:whitespace) white)
@ -469,7 +468,7 @@
(else #f))
(switch char-set= cs
((char-set:punctuation) punct)
((char-set:control) ctl)
((char-set:iso-control) ctl)
(else #f))))))))
@ -478,21 +477,21 @@
(try-classify-char-set cs
'char-set:full 'nonl-chars
'char-set:lower-case 'char-set:upper-case
'char-set:alphabetic 'char-set:numeric
'char-set:alphanumeric 'char-set:punctuation
'char-set:letter 'char-set:digit
'char-set:letter+digit 'char-set:punctuation
'char-set:graphic 'char-set:whitespace
'char-set:printing 'char-set:control
'char-set:printing 'char-set:iso-control
'char-set:hex-digit 'char-set:blank
'char-set:ascii))))
(? ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code.
((char-set-empty? cs) (r 'char-set:empty))
((try cs) => r)
((try (char-set-invert cs)) =>
(lambda (name) `(,(r 'char-set-invert) ,name)))
((try (char-set-complement cs)) =>
(lambda (name) `(,(r 'char-set-complement) ,name)))
(else
(receive (loose+ ranges+) (char-set->in-pair cs)
(receive (loose- ranges-) (char-set->in-pair (char-set-invert cs))
(receive (loose- ranges-) (char-set->in-pair (char-set-complement cs))
(let ((makeit (r 'spec->char-set)))
(if (< (+ (length loose-) (* 12 (length ranges-)))
(+ (length loose+) (* 12 (length ranges+))))
@ -517,9 +516,9 @@
'ascii)))
(nchars (char-set-size cs)))
(? ((zero? nchars) `(,(r '|)))
((= 1 nchars) (apply string (char-set-members cs)))
((= 1 nchars) (apply string (char-set->list cs)))
((try cs) => r)
((try (char-set-invert cs)) =>
((try (char-set-complement cs)) =>
(lambda (name) `(,(r '~) ,name)))
(else (receive (cs rp comp?) (char-set->in-sexp-spec cs)
(let ((args (append (? ((string=? cs "") '())
@ -619,7 +618,7 @@
`(,(car r) ,(cdr r) . ,lis))
'() ranges)))))))
(receive (cs+ rp+) (->sexp-pair cset)
(receive (cs- rp-) (->sexp-pair (char-set-invert cset))
(receive (cs- rp-) (->sexp-pair (char-set-complement cset))
(if (< (+ (string-length cs-) (string-length rp-))
(+ (string-length cs+) (string-length rp+)))
(values cs- rp- #t)

View File

@ -375,11 +375,11 @@
(? ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set
((= 1 nchars) ; Singleton set
(translate-string (string (car (char-set-members cset)))))
(translate-string (string (car (char-set->list cset)))))
;; General case. Try both [...] and [^...].
(else (let ((s- (->bracket-string cset #t))
(s+ (->bracket-string (char-set-invert cset) #f)))
(s+ (->bracket-string (char-set-complement cset) #f)))
(values (if (< (string-length s-) (string-length s+))
s- s+)
1 0 '#())))))))

View File

@ -162,7 +162,7 @@
(string->char-set (re-string:chars elt))))
res))))
(if (= 1 (char-set-size cset))
(make-re-string (apply string (char-set-members cset)))
(make-re-string (apply string (char-set->list cset)))
(make-re-char-set cset)))
(if (pair? res)
@ -387,7 +387,7 @@
(char-set-full? cs)))))
(define re-nonl
(make-re-char-set/posix (char-set-invert (char-set #\newline))
(make-re-char-set/posix (char-set-complement (char-set #\newline))
"[^\n]"
'#()))
@ -414,7 +414,7 @@
(define re-word
(let ((wcs (char-set-union char-set:alphanumeric ; Word chars
(let ((wcs (char-set-union char-set:letter+digit ; Word chars
(char-set #\_))))
(make-re-seq (list re-bow
(make-re-repeat 1 #f (make-re-char-set wcs))

View File

@ -36,5 +36,5 @@
ranges))))
(if in?
(doit loose ranges)
(char-set-invert! (doit loose ranges)))))
(char-set-complement! (doit loose ranges)))))

View File

@ -47,7 +47,7 @@
(values (let ((cs (re-char-set:cset re)))
(if (and (char-set? cs)
(= 1 (char-set-size cs)))
(make-re-string (string (car (char-set-members cs))))
(make-re-string (string (car (char-set->list cs))))
re))
0))
@ -243,7 +243,7 @@
(tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))
(tail (? ((zero? numchars) tail) ; Drop empty char set.
((= 1 numchars) ; {c} => "c"
(cons (make-re-string (string (car (char-set-members cset))))
(cons (make-re-string (string (car (char-set->list cset))))
tail))
(else (cons (make-re-char-set cset) tail)))))
tail))

View File

@ -128,7 +128,7 @@
((#\]) (if (= i i0)
(lp i1 (char-set-adjoin! cset #\]))
(let ((cset (if negate?
(char-set-invert! cset)
(char-set-complement! cset)
cset)))
(values (make-re-char-set cset) i1))))