Rest of Olins merge for SRFI-13 and 14.
This commit is contained in:
parent
e18289f61c
commit
0ffb123bee
28
Makefile.in
28
Makefile.in
|
@ -81,9 +81,9 @@ BIG_HEAP = -h 5000000
|
|||
# LINKER_VM = ./$(VM) $(BIG_HEAP)
|
||||
# LINKER_RUNNABLE = $(LINKER_VM) -i $(IMAGE)
|
||||
|
||||
# therefor according to 2.
|
||||
LINKER_VM = $(RUNNABLE) $(BIG_HEAP)
|
||||
LINKER_RUNNABLE = $(RUNNABLE)
|
||||
# therefor according to 2 but canot use scsh since -i is not understood
|
||||
LINKER_VM = scheme48 $(BIG_HEAP)
|
||||
LINKER_RUNNABLE = scheme48
|
||||
|
||||
LINKER_IMAGE = build/linker.image
|
||||
LINKER = $(LINKER_VM) -i $(LINKER_IMAGE)
|
||||
|
@ -382,7 +382,7 @@ go:
|
|||
echo '#!/bin/sh' >$@ && \
|
||||
echo >>$@ && \
|
||||
echo "lib=`pwd`" >>$@ && \
|
||||
echo 'exec $$lib/$(VM) -o $$lib/$(VM) -i $$lib/scsh/$(IMAGE) "$$@"' \
|
||||
echo 'exec $$lib/$(VM) -o $$lib/$(VM) -i $$lib/scsh/scsh.image "$$@"' \
|
||||
>>$@ && \
|
||||
chmod +x $@
|
||||
|
||||
|
@ -766,6 +766,7 @@ SCHEME =scsh/awk.scm \
|
|||
scsh/scsh-version.scm \
|
||||
scsh/scsh.scm \
|
||||
scsh/select.scm \
|
||||
scsh/sighandlers.scm \
|
||||
scsh/startup.scm \
|
||||
scsh/stringcoll.scm \
|
||||
scsh/syntax-helpers.scm \
|
||||
|
@ -798,16 +799,6 @@ scsh/scsh: scsh/scsh-tramp.c
|
|||
-DIMAGE=\"$(LIB)/scsh.image\" \
|
||||
scsh/scsh-tramp.c
|
||||
|
||||
loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
|
||||
$(srcdir)/scsh/machine/packages.scm \
|
||||
$(srcdir)/scsh/rx/packages.scm \
|
||||
$(srcdir)/scsh/rx/cond-package.scm \
|
||||
$(srcdir)/scsh/scsh-package.scm \
|
||||
$(srcdir)/scsh/lib/string-pack.scm \
|
||||
$(srcdir)/scsh/lib/list-pack.scm \
|
||||
$(srcdir)/scsh/lib/ccp-pack.scm
|
||||
|
||||
|
||||
bs: build/build-scsh-image
|
||||
sh $(srcdir)/build/build-scsh-image "$(srcdir)" "$(LIB)" "$(IMAGE)" \
|
||||
"$(VM)" cig/cig.image
|
||||
|
@ -817,9 +808,13 @@ loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
|
|||
$(srcdir)/scsh/rx/packages.scm \
|
||||
$(srcdir)/scsh/rx/cond-package.scm \
|
||||
$(srcdir)/scsh/scsh-package.scm \
|
||||
$(srcdir)/scsh/lib/string-pack.scm \
|
||||
$(srcdir)/scsh/lib/cset-package.scm \
|
||||
$(srcdir)/scsh/lib/string-package.scm \
|
||||
$(srcdir)/scsh/lib/list-pack.scm \
|
||||
$(srcdir)/scsh/lib/ccp-pack.scm
|
||||
$(srcdir)/scsh/lib/ccp-pack.scm \
|
||||
$(srcdir)/scsh/lib/char-package.scm \
|
||||
$(srcdir)/scsh/lib/cset-obsolete.scm
|
||||
|
||||
|
||||
scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
||||
(echo ",translate =scheme48/ $(srcdir)/scheme/"; \
|
||||
|
@ -828,7 +823,6 @@ scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
|||
echo ",config"; \
|
||||
echo ",load $(loads)"; \
|
||||
echo ",load-package scsh"; \
|
||||
echo ",load-package events"; \
|
||||
echo ",load-package scsh-here-string-hax"; \
|
||||
echo ",translate =scheme48/ $(LIB)/"; \
|
||||
echo ",load-package list-lib"; \
|
||||
|
|
|
@ -318,7 +318,7 @@
|
|||
s)))
|
||||
|
||||
((concat) ; CONCAT-delimiter reader.
|
||||
(let ((not-delims (char-set-invert delims)))
|
||||
(let ((not-delims (char-set-complement delims)))
|
||||
(lambda maybe-port
|
||||
(let* ((p (:optional maybe-port (current-input-port)))
|
||||
(s (read-delimited delims p 'concat)))
|
||||
|
@ -328,7 +328,7 @@
|
|||
(string-append s extra-delims))))))))
|
||||
|
||||
((split) ; SPLIT-delimiter reader.
|
||||
(let ((not-delims (char-set-invert delims)))
|
||||
(let ((not-delims (char-set-complement delims)))
|
||||
(lambda maybe-port
|
||||
(let ((p (:optional maybe-port (current-input-port))))
|
||||
(receive (s delim) (read-delimited delims p 'split)
|
||||
|
|
|
@ -150,16 +150,15 @@
|
|||
(case c
|
||||
((#\])
|
||||
(let ((cset (fold (lambda (elt cset)
|
||||
(char-set-union
|
||||
cset
|
||||
(if (char? elt)
|
||||
(char-set elt)
|
||||
(ascii-range->char-set (char->ascii (car elt))
|
||||
(+ 1 (char->ascii (cdr elt)))))))
|
||||
char-set:empty
|
||||
(if (char? elt)
|
||||
(char-set-adjoin! cset elt)
|
||||
(ucs-range->char-set! (char->ascii (car elt))
|
||||
(+ 1 (char->ascii (cdr elt)))
|
||||
#f cset)))
|
||||
(char-set-copy char-set:empty)
|
||||
elts)))
|
||||
(values (re-char-set (if negate?
|
||||
(char-set-invert cset)
|
||||
(char-set-compelment! cset)
|
||||
cset))
|
||||
i)))
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@
|
|||
))
|
||||
|
||||
(define-structure ccp-lib ccp-lib-interface
|
||||
(open char-set-package
|
||||
(open char-set-lib
|
||||
ascii
|
||||
defrec-package
|
||||
string-lib
|
||||
|
|
|
@ -95,11 +95,11 @@
|
|||
(every (lambda (ccp2)
|
||||
(and (char-set= domain (ccp:domain ccp2))
|
||||
(let ((cmap2 (ccp:map ccp2)))
|
||||
(char-set-every? (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(char=? (string-ref cmap i)
|
||||
(string-ref cmap2 i))))
|
||||
domain))))
|
||||
(char-set-every (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(char=? (string-ref cmap i)
|
||||
(string-ref cmap2 i))))
|
||||
domain))))
|
||||
rest)))
|
||||
|
||||
|
||||
|
@ -116,11 +116,11 @@
|
|||
(rest (cdr rest)))
|
||||
(and (char-set<= domain1 domain2)
|
||||
(let ((cmap2 (ccp:map ccp2)))
|
||||
(char-set-every? (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(char=? (string-ref cmap1 i)
|
||||
(string-ref cmap2 i))))
|
||||
domain1))
|
||||
(char-set-every (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(char=? (string-ref cmap1 i)
|
||||
(string-ref cmap2 i))))
|
||||
domain1))
|
||||
(lp domain2 cmap2 rest))))))
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
;;; These defs are things for characters *not* in SRFIs 13 & 14.
|
||||
;;; It includes some R5RS defs that are not correct in S48 in a Latin-1 world.
|
||||
|
||||
(define-interface char-set-predicates-interface
|
||||
(export
|
||||
((char-lower-case? ; R5RS
|
||||
char-upper-case? ; R5RS
|
||||
char-alphabetic? ; R5RS
|
||||
char-numeric? ; R5RS
|
||||
char-whitespace? ; R5RS
|
||||
|
||||
char-alphanumeric? ; For compatibility w/old code
|
||||
|
||||
char-letter? ; Scsh
|
||||
char-digit?
|
||||
char-letter+digit?
|
||||
char-graphic?
|
||||
char-printing?
|
||||
char-blank?
|
||||
char-iso-control?
|
||||
char-punctuation?
|
||||
char-symbol?
|
||||
char-hex-digit?
|
||||
char-ascii?) (proc (:char) :boolean))))
|
||||
|
||||
|
||||
(define-structure char-set-predicates-lib char-set-predicates-interface
|
||||
(open error-package ; ERROR
|
||||
scsh-utilities ; DEPRECATED-PROC
|
||||
char-set-lib
|
||||
scheme)
|
||||
|
||||
(begin
|
||||
;; These are R5RS. We can't use the native S48 ones, because they
|
||||
;; don't handle full Latin-1.
|
||||
(define (char-lower-case? c) (char-set-contains? char-set:lower-case c))
|
||||
(define (char-upper-case? c) (char-set-contains? char-set:upper-case c))
|
||||
(define (char-alphabetic? c) (char-set-contains? char-set:letter c))
|
||||
(define (char-numeric? c) (char-set-contains? char-set:digit c))
|
||||
(define (char-whitespace? c) (char-set-contains? char-set:whitespace c))
|
||||
|
||||
;; These are scsh extensions to R5RS.
|
||||
(define (char-letter? c) (char-set-contains? char-set:letter c))
|
||||
(define (char-digit? c) (char-set-contains? char-set:digit c))
|
||||
(define (char-letter+digit? c) (char-set-contains? char-set:letter+digit c))
|
||||
(define (char-graphic? c) (char-set-contains? char-set:graphic c))
|
||||
(define (char-printing? c) (char-set-contains? char-set:printing c))
|
||||
(define (char-blank? c) (char-set-contains? char-set:blank c))
|
||||
(define (char-iso-control? c) (char-set-contains? char-set:iso-control c))
|
||||
(define (char-punctuation? c) (char-set-contains? char-set:punctuation c))
|
||||
(define (char-symbol? c) (char-set-contains? char-set:symbol c))
|
||||
(define (char-hex-digit? c) (char-set-contains? char-set:hex-digit c))
|
||||
(define (char-ascii? c) (char-set-contains? char-set:ascii c))
|
||||
|
||||
;; Obsolete scsh.
|
||||
(define char-alphanumeric?
|
||||
(deprecated-proc char-letter+digit? 'char-alphanumeric?
|
||||
"Use CHAR-LETTER+DIGIT? instead.")))
|
||||
(optimize auto-integrate))
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,804 @@
|
|||
;;; SRFI-14 character-sets library -*- Scheme -*-
|
||||
;;;
|
||||
;;; - Ported from MIT Scheme runtime by Brian D. Carlstrom.
|
||||
;;; - Massively rehacked & extended by Olin Shivers 6/98.
|
||||
;;; - Massively redesigned and rehacked 5/2000 during SRFI process.
|
||||
;;; At this point, the code bears the following relationship to the
|
||||
;;; MIT Scheme code: "This is my grandfather's axe. My father replaced
|
||||
;;; the head, and I have replaced the handle." Nonetheless, we preserve
|
||||
;;; the MIT Scheme copyright:
|
||||
;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology
|
||||
;;; The MIT Scheme license is a "free software" license. See the end of
|
||||
;;; this file for the tedious details.
|
||||
|
||||
;;; Exports:
|
||||
;;; char-set? char-set= char-set<=
|
||||
;;; char-set-hash
|
||||
;;; char-set-cursor char-set-ref char-set-cursor-next end-of-char-set?
|
||||
;;; char-set-fold char-set-unfold char-set-unfold!
|
||||
;;; char-set-for-each char-set-map
|
||||
;;; char-set-copy char-set
|
||||
;;;
|
||||
;;; list->char-set string->char-set
|
||||
;;; list->char-set! string->char-set!
|
||||
;;;
|
||||
;;; filterchar-set ucs-range->char-set ->char-set
|
||||
;;; filterchar-set! ucs-range->char-set!
|
||||
;;;
|
||||
;;; char-set->list char-set->string
|
||||
;;;
|
||||
;;; char-set-size char-set-count char-set-contains?
|
||||
;;; char-set-every char-set-any
|
||||
;;;
|
||||
;;; char-set-adjoin char-set-delete
|
||||
;;; char-set-adjoin! char-set-delete!
|
||||
;;;
|
||||
|
||||
;;; char-set-complement char-set-union char-set-intersection
|
||||
;;; char-set-complement! char-set-union! char-set-intersection!
|
||||
;;;
|
||||
;;; char-set-difference char-set-xor char-set-diff+intersection
|
||||
;;; char-set-difference! char-set-xor! char-set-diff+intersection!
|
||||
;;;
|
||||
;;; char-set:lower-case char-set:upper-case char-set:title-case
|
||||
;;; char-set:letter char-set:digit char-set:letter+digit
|
||||
;;; char-set:graphic char-set:printing char-set:whitespace
|
||||
;;; char-set:iso-control char-set:punctuation char-set:symbol
|
||||
;;; char-set:hex-digit char-set:blank char-set:ascii
|
||||
;;; char-set:empty char-set:full
|
||||
|
||||
;;; Imports
|
||||
;;; This code has the following non-R5RS dependencies:
|
||||
;;; - ERROR
|
||||
;;; - %LATIN1->CHAR %CHAR->LATIN1
|
||||
;;; - LET-OPTIONALS* and :OPTIONAL macros for parsing, checking & defaulting
|
||||
;;; optional arguments from rest lists.
|
||||
;;; - BITWISE-AND for CHAR-SET-HASH
|
||||
;;; - The SRFI-19 DEFINE-RECORD-TYPE record macro
|
||||
;;; - A simple CHECK-ARG procedure:
|
||||
;;; (lambda (pred val caller) (if (not (pred val)) (error val caller)))
|
||||
|
||||
;;; This is simple code, not great code. Char sets are represented as 256-char
|
||||
;;; strings. If char I is ASCII/Latin-1 0, then it isn't in the set; if char I
|
||||
;;; is ASCII/Latin-1 1, then it is in the set.
|
||||
;;; - Should be rewritten to use bit strings or byte vecs.
|
||||
;;; - Is Latin-1 specific. Would certainly have to be rewritten for Unicode.
|
||||
|
||||
;;; See the end of the file for porting and performance-tuning notes.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define-record-type :char-set
|
||||
(make-char-set s)
|
||||
char-set?
|
||||
(s char-set:s))
|
||||
|
||||
|
||||
(define (%string-copy s) (substring s 0 (string-length s)))
|
||||
|
||||
;;; Parse, type-check & default a final optional BASE-CS parameter from
|
||||
;;; a rest argument. Return a *fresh copy* of the underlying string.
|
||||
;;; The default is the empty set. The PROC argument is to help us
|
||||
;;; generate informative error exceptions.
|
||||
|
||||
(define (%default-base maybe-base proc)
|
||||
(if (pair? maybe-base)
|
||||
(let ((bcs (car maybe-base))
|
||||
(tail (cdr maybe-base)))
|
||||
(if (null? tail)
|
||||
(if (char-set? bcs) (%string-copy (char-set:s bcs))
|
||||
(error "BASE-CS parameter not a char-set" proc bcs))
|
||||
(error "Expected final base char set -- too many parameters"
|
||||
proc maybe-base)))
|
||||
(make-string 256 (%latin1->char 0))))
|
||||
|
||||
;;; If CS is really a char-set, do CHAR-SET:S, otw report an error msg on
|
||||
;;; behalf of our caller, PROC. This procedure exists basically to provide
|
||||
;;; explicit error-checking & reporting.
|
||||
|
||||
(define (%char-set:s/check cs proc)
|
||||
(let lp ((cs cs))
|
||||
(if (char-set? cs) (char-set:s cs)
|
||||
(lp (error "Not a char-set" cs proc)))))
|
||||
|
||||
|
||||
|
||||
;;; These internal functions hide a lot of the dependency on the
|
||||
;;; underlying string representation of char sets. They should be
|
||||
;;; inlined if possible.
|
||||
|
||||
(define (si=0? s i) (zero? (%char->latin1 (string-ref s i))))
|
||||
(define (si=1? s i) (not (si=0? s i)))
|
||||
(define c0 (%latin1->char 0))
|
||||
(define c1 (%latin1->char 1))
|
||||
(define (si s i) (%char->latin1 (string-ref s i)))
|
||||
(define (%set0! s i) (string-set! s i c0))
|
||||
(define (%set1! s i) (string-set! s i c1))
|
||||
|
||||
;;; These do various "s[i] := s[i] op val" operations -- see
|
||||
;;; %CHAR-SET-ALGEBRA. They are used to implement the various
|
||||
;;; set-algebra procedures.
|
||||
(define (setv! s i v) (string-set! s i (%latin1->char v))) ; SET to a Value.
|
||||
(define (%not! s i v) (setv! s i (- 1 v)))
|
||||
(define (%and! s i v) (if (zero? v) (%set0! s i)))
|
||||
(define (%or! s i v) (if (not (zero? v)) (%set1! s i)))
|
||||
(define (%minus! s i v) (if (not (zero? v)) (%set0! s i)))
|
||||
(define (%xor! s i v) (if (not (zero? v)) (setv! s i (- 1 (si s i)))))
|
||||
|
||||
|
||||
(define (char-set-copy cs)
|
||||
(make-char-set (%string-copy (%char-set:s/check cs char-set-copy))))
|
||||
|
||||
(define (char-set= . rest)
|
||||
(or (null? rest)
|
||||
(let* ((cs1 (car rest))
|
||||
(rest (cdr rest))
|
||||
(s1 (%char-set:s/check cs1 char-set=)))
|
||||
(let lp ((rest rest))
|
||||
(or (not (pair? rest))
|
||||
(and (string=? s1 (%char-set:s/check (car rest) char-set=))
|
||||
(lp (cdr rest))))))))
|
||||
|
||||
(define (char-set<= . rest)
|
||||
(or (null? rest)
|
||||
(let ((cs1 (car rest))
|
||||
(rest (cdr rest)))
|
||||
(let lp ((s1 (%char-set:s/check cs1 char-set<=)) (rest rest))
|
||||
(or (not (pair? rest))
|
||||
(let ((s2 (%char-set:s/check (car rest) char-set<=))
|
||||
(rest (cdr rest)))
|
||||
(if (eq? s1 s2) (lp s2 rest) ; Fast path
|
||||
(let lp2 ((i 255)) ; Real test
|
||||
(if (< i 0) (lp s2 rest)
|
||||
(and (<= (si s1 i) (si s2 i))
|
||||
(lp2 (- i 1))))))))))))
|
||||
|
||||
;;; Hash
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in
|
||||
;;; to keep the intermediate values small. (We do the calculation with just
|
||||
;;; enough bits to represent BOUND, masking off high bits at each step in
|
||||
;;; calculation. If this screws up any important properties of the hash
|
||||
;;; function I'd like to hear about it. -Olin)
|
||||
;;;
|
||||
;;; If you keep BOUND small enough, the intermediate calculations will
|
||||
;;; always be fixnums. How small is dependent on the underlying Scheme system;
|
||||
;;; we use a default BOUND of 2^22 = 4194304, which should hack it in
|
||||
;;; Schemes that give you at least 29 signed bits for fixnums. The core
|
||||
;;; calculation that you don't want to overflow is, worst case,
|
||||
;;; (+ 65535 (* 37 (- bound 1)))
|
||||
;;; where 65535 is the max character code. Choose the default BOUND to be the
|
||||
;;; biggest power of two that won't cause this expression to fixnum overflow,
|
||||
;;; and everything will be copacetic.
|
||||
|
||||
(define (char-set-hash cs . maybe-bound)
|
||||
(let* ((bound (:optional maybe-bound 4194304 (lambda (n) (and (integer? n)
|
||||
(exact? n)
|
||||
(<= 0 n)))))
|
||||
(bound (if (zero? bound) 4194304 bound)) ; 0 means default.
|
||||
(s (%char-set:s/check cs char-set-hash))
|
||||
;; Compute a 111...1 mask that will cover BOUND-1:
|
||||
(mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh?
|
||||
(if (>= i bound) (- i 1) (lp (+ i i))))))
|
||||
|
||||
(let lp ((i 255) (ans 0))
|
||||
(if (< i 0) (modulo ans bound)
|
||||
(lp (- i 1)
|
||||
(if (si=0? s i) ans
|
||||
(bitwise-and mask (+ (* 37 ans) i))))))))
|
||||
|
||||
|
||||
(define (char-set-contains? cs char)
|
||||
(si=1? (%char-set:s/check cs char-set-contains?)
|
||||
(%char->latin1 (check-arg char? char char-set-contains?))))
|
||||
|
||||
(define (char-set-size cs)
|
||||
(let ((s (%char-set:s/check cs char-set-size)))
|
||||
(let lp ((i 255) (size 0))
|
||||
(if (< i 0) size
|
||||
(lp (- i 1) (+ size (si s i)))))))
|
||||
|
||||
(define (char-set-count pred cset)
|
||||
(check-arg procedure? pred char-set-count)
|
||||
(let ((s (%char-set:s/check cset char-set-count)))
|
||||
(let lp ((i 255) (count 0))
|
||||
(if (< i 0) count
|
||||
(lp (- i 1)
|
||||
(if (and (si=1? s i) (pred (%latin1->char i)))
|
||||
(+ count 1)
|
||||
count))))))
|
||||
|
||||
|
||||
;;; -- Adjoin & delete
|
||||
|
||||
(define (%set-char-set set proc cs chars)
|
||||
(let ((s (%string-copy (%char-set:s/check cs proc))))
|
||||
(for-each (lambda (c) (set s (%char->latin1 c)))
|
||||
chars)
|
||||
(make-char-set s)))
|
||||
|
||||
(define (%set-char-set! set proc cs chars)
|
||||
(let ((s (%char-set:s/check cs proc)))
|
||||
(for-each (lambda (c) (set s (%char->latin1 c)))
|
||||
chars))
|
||||
cs)
|
||||
|
||||
(define (char-set-adjoin cs . chars)
|
||||
(%set-char-set %set1! char-set-adjoin cs chars))
|
||||
(define (char-set-adjoin! cs . chars)
|
||||
(%set-char-set! %set1! char-set-adjoin! cs chars))
|
||||
(define (char-set-delete cs . chars)
|
||||
(%set-char-set %set0! char-set-delete cs chars))
|
||||
(define (char-set-delete! cs . chars)
|
||||
(%set-char-set! %set0! char-set-delete! cs chars))
|
||||
|
||||
|
||||
;;; Cursors
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Simple implementation. A cursors is an integer index into the
|
||||
;;; mark vector, and -1 for the end-of-char-set cursor.
|
||||
;;;
|
||||
;;; If we represented char sets as a bit set, we could do the following
|
||||
;;; trick to pick the lowest bit out of the set:
|
||||
;;; (count-bits (xor (- cset 1) cset))
|
||||
;;; (But first mask out the bits already scanned by the cursor first.)
|
||||
|
||||
(define (char-set-cursor cset)
|
||||
(%char-set-cursor-next cset 256 char-set-cursor))
|
||||
|
||||
(define (end-of-char-set? cursor) (< cursor 0))
|
||||
|
||||
(define (char-set-ref cset cursor) (%latin1->char cursor))
|
||||
|
||||
(define (char-set-cursor-next cset cursor)
|
||||
(check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i 255))) cursor
|
||||
char-set-cursor-next)
|
||||
(%char-set-cursor-next cset cursor char-set-cursor-next))
|
||||
|
||||
(define (%char-set-cursor-next cset cursor proc) ; Internal
|
||||
(let ((s (%char-set:s/check cset proc)))
|
||||
(let lp ((cur cursor))
|
||||
(let ((cur (- cur 1)))
|
||||
(if (or (< cur 0) (si=1? s cur)) cur
|
||||
(lp cur))))))
|
||||
|
||||
|
||||
;;; -- for-each map fold unfold every any
|
||||
|
||||
(define (char-set-for-each proc cs)
|
||||
(check-arg procedure? proc char-set-for-each)
|
||||
(let ((s (%char-set:s/check cs char-set-for-each)))
|
||||
(let lp ((i 255))
|
||||
(cond ((>= i 0)
|
||||
(if (si=1? s i) (proc (%latin1->char i)))
|
||||
(lp (- i 1)))))))
|
||||
|
||||
(define (char-set-map proc cs)
|
||||
(check-arg procedure? proc char-set-map)
|
||||
(let ((s (%char-set:s/check cs char-set-map))
|
||||
(ans (make-string 256 c0)))
|
||||
(let lp ((i 255))
|
||||
(cond ((>= i 0)
|
||||
(if (si=1? s i)
|
||||
(%set1! ans (%char->latin1 (proc (%latin1->char i)))))
|
||||
(lp (- i 1)))))
|
||||
(make-char-set ans)))
|
||||
|
||||
(define (char-set-fold kons knil cs)
|
||||
(check-arg procedure? kons char-set-fold)
|
||||
(let ((s (%char-set:s/check cs char-set-fold)))
|
||||
(let lp ((i 255) (ans knil))
|
||||
(if (< i 0) ans
|
||||
(lp (- i 1)
|
||||
(if (si=0? s i) ans
|
||||
(kons (%latin1->char i) ans)))))))
|
||||
|
||||
(define (char-set-every pred cs)
|
||||
(check-arg procedure? pred char-set-every)
|
||||
(let ((s (%char-set:s/check cs char-set-every)))
|
||||
(let lp ((i 255))
|
||||
(or (< i 0)
|
||||
(and (or (si=0? s i) (pred (%latin1->char i)))
|
||||
(lp (- i 1)))))))
|
||||
|
||||
(define (char-set-any pred cs)
|
||||
(check-arg procedure? pred char-set-any)
|
||||
(let ((s (%char-set:s/check cs char-set-any)))
|
||||
(let lp ((i 255))
|
||||
(and (>= i 0)
|
||||
(or (and (si=1? s i) (pred (%latin1->char i)))
|
||||
(lp (- i 1)))))))
|
||||
|
||||
|
||||
(define (%char-set-unfold! proc p f g s seed)
|
||||
(check-arg procedure? p proc)
|
||||
(check-arg procedure? f proc)
|
||||
(check-arg procedure? g proc)
|
||||
(let lp ((seed seed))
|
||||
(cond ((not (p seed)) ; P says we are done.
|
||||
(%set1! s (%char->latin1 (f seed))) ; Add (F SEED) to set.
|
||||
(lp (g seed)))))) ; Loop on (G SEED).
|
||||
|
||||
(define (char-set-unfold p f g seed . maybe-base)
|
||||
(let ((bs (%default-base maybe-base char-set-unfold)))
|
||||
(%char-set-unfold! char-set-unfold p f g bs seed)
|
||||
(make-char-set bs)))
|
||||
|
||||
(define (char-set-unfold! p f g seed base-cset)
|
||||
(%char-set-unfold! char-set-unfold! p f g
|
||||
(%char-set:s/check base-cset char-set-unfold!)
|
||||
seed)
|
||||
base-cset)
|
||||
|
||||
|
||||
|
||||
;;; list <--> char-set
|
||||
|
||||
(define (%list->char-set! chars s)
|
||||
(for-each (lambda (char) (%set1! s (%char->latin1 char)))
|
||||
chars))
|
||||
|
||||
(define (char-set . chars)
|
||||
(let ((s (make-string 256 c0)))
|
||||
(%list->char-set! chars s)
|
||||
(make-char-set s)))
|
||||
|
||||
(define (list->char-set chars . maybe-base)
|
||||
(let ((bs (%default-base maybe-base list->char-set)))
|
||||
(%list->char-set! chars bs)
|
||||
(make-char-set bs)))
|
||||
|
||||
(define (list->char-set! chars base-cs)
|
||||
(%list->char-set! chars (%char-set:s/check base-cs list->char-set!))
|
||||
base-cs)
|
||||
|
||||
|
||||
(define (char-set->list cs)
|
||||
(let ((s (%char-set:s/check cs char-set->list)))
|
||||
(let lp ((i 255) (ans '()))
|
||||
(if (< i 0) ans
|
||||
(lp (- i 1)
|
||||
(if (si=0? s i) ans
|
||||
(cons (%latin1->char i) ans)))))))
|
||||
|
||||
|
||||
|
||||
;;; string <--> char-set
|
||||
|
||||
(define (%string->char-set! str bs proc)
|
||||
(check-arg string? str proc)
|
||||
(do ((i (- (string-length str) 1) (- i 1)))
|
||||
((< i 0))
|
||||
(%set1! bs (%char->latin1 (string-ref str i)))))
|
||||
|
||||
(define (string->char-set str . maybe-base)
|
||||
(let ((bs (%default-base maybe-base string->char-set)))
|
||||
(%string->char-set! str bs string->char-set)
|
||||
(make-char-set bs)))
|
||||
|
||||
(define (string->char-set! str base-cs)
|
||||
(%string->char-set! str (%char-set:s/check base-cs string->char-set!)
|
||||
string->char-set!)
|
||||
base-cs)
|
||||
|
||||
|
||||
(define (char-set->string cs)
|
||||
(let* ((s (%char-set:s/check cs char-set->string))
|
||||
(ans (make-string (char-set-size cs))))
|
||||
(let lp ((i 255) (j 0))
|
||||
(if (< i 0) ans
|
||||
(let ((j (if (si=0? s i) j
|
||||
(begin (string-set! ans j (%latin1->char i))
|
||||
(+ j 1)))))
|
||||
(lp (- i 1) j))))))
|
||||
|
||||
|
||||
;;; -- UCS-range -> char-set
|
||||
|
||||
(define (%ucs-range->char-set! lower upper error? bs proc)
|
||||
(check-arg (lambda (x) (and (integer? x) (exact? x) (<= 0 x))) lower proc)
|
||||
(check-arg (lambda (x) (and (integer? x) (exact? x) (<= lower x))) upper proc)
|
||||
|
||||
(if (and (< lower upper) (< 256 upper) error?)
|
||||
(error "Requested UCS range contains unavailable characters -- this implementation only supports Latin-1"
|
||||
proc lower upper))
|
||||
|
||||
(let lp ((i (- (min upper 256) 1)))
|
||||
(cond ((<= lower i) (%set1! bs i) (lp (- i 1))))))
|
||||
|
||||
(define (ucs-range->char-set lower upper . rest)
|
||||
(let-optionals* rest ((error? #f) rest)
|
||||
(let ((bs (%default-base rest ucs-range->char-set)))
|
||||
(%ucs-range->char-set! lower upper error? bs ucs-range->char-set)
|
||||
(make-char-set bs))))
|
||||
|
||||
(define (ucs-range->char-set! lower upper error? base-cs)
|
||||
(%ucs-range->char-set! lower upper error?
|
||||
(%char-set:s/check base-cs ucs-range->char-set!)
|
||||
ucs-range->char-set)
|
||||
base-cs)
|
||||
|
||||
|
||||
;;; -- predicate -> char-set
|
||||
|
||||
(define (%char-set-filter! pred ds bs proc)
|
||||
(check-arg procedure? pred proc)
|
||||
(let lp ((i 255))
|
||||
(cond ((>= i 0)
|
||||
(if (and (si=1? ds i) (pred (%latin1->char i)))
|
||||
(%set1! bs i))
|
||||
(lp (- i 1))))))
|
||||
|
||||
(define (char-set-filter predicate domain . maybe-base)
|
||||
(let ((bs (%default-base maybe-base char-set-filter)))
|
||||
(%char-set-filter! predicate
|
||||
(%char-set:s/check domain char-set-filter!)
|
||||
bs
|
||||
char-set-filter)
|
||||
(make-char-set bs)))
|
||||
|
||||
(define (char-set-filter! predicate domain base-cs)
|
||||
(%char-set-filter! predicate
|
||||
(%char-set:s/check domain char-set-filter!)
|
||||
(%char-set:s/check base-cs char-set-filter!)
|
||||
char-set-filter!)
|
||||
base-cs)
|
||||
|
||||
|
||||
;;; {string, char, char-set, char predicate} -> char-set
|
||||
|
||||
(define (->char-set x)
|
||||
(cond ((char-set? x) x)
|
||||
((string? x) (string->char-set x))
|
||||
((char? x) (char-set x))
|
||||
(else (error "->char-set: Not a charset, string or char." x))))
|
||||
|
||||
|
||||
|
||||
;;; Set algebra
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; The exported ! procs are "linear update" -- allowed, but not required, to
|
||||
;;; side-effect their first argument when computing their result. In other
|
||||
;;; words, you must use them as if they were completely functional, just like
|
||||
;;; their non-! counterparts, and you must additionally ensure that their
|
||||
;;; first arguments are "dead" at the point of call. In return, we promise a
|
||||
;;; more efficient result, plus allowing you to always assume char-sets are
|
||||
;;; unchangeable values.
|
||||
|
||||
;;; Apply P to each index and its char code in S: (P I VAL).
|
||||
;;; Used by the set-algebra ops.
|
||||
|
||||
(define (%string-iter p s)
|
||||
(let lp ((i (- (string-length s) 1)))
|
||||
(cond ((>= i 0)
|
||||
(p i (%char->latin1 (string-ref s i)))
|
||||
(lp (- i 1))))))
|
||||
|
||||
;;; String S represents some initial char-set. (OP s i val) does some
|
||||
;;; kind of s[i] := s[i] op val update. Do
|
||||
;;; S := S OP CSETi
|
||||
;;; for all the char-sets in the list CSETS. The n-ary set-algebra ops
|
||||
;;; all use this internal proc.
|
||||
|
||||
(define (%char-set-algebra s csets op proc)
|
||||
(for-each (lambda (cset)
|
||||
(let ((s2 (%char-set:s/check cset proc)))
|
||||
(let lp ((i 255))
|
||||
(cond ((>= i 0)
|
||||
(op s i (si s2 i))
|
||||
(lp (- i 1)))))))
|
||||
csets))
|
||||
|
||||
|
||||
;;; -- Complement
|
||||
|
||||
(define (char-set-complement cs)
|
||||
(let ((s (%char-set:s/check cs char-set-complement))
|
||||
(ans (make-string 256)))
|
||||
(%string-iter (lambda (i v) (%not! ans i v)) s)
|
||||
(make-char-set ans)))
|
||||
|
||||
(define (char-set-complement! cset)
|
||||
(let ((s (%char-set:s/check cset char-set-complement!)))
|
||||
(%string-iter (lambda (i v) (%not! s i v)) s))
|
||||
cset)
|
||||
|
||||
|
||||
;;; -- Union
|
||||
|
||||
(define (char-set-union! cset1 . csets)
|
||||
(%char-set-algebra (%char-set:s/check cset1 char-set-union!)
|
||||
csets %or! char-set-union!)
|
||||
cset1)
|
||||
|
||||
(define (char-set-union . csets)
|
||||
(if (pair? csets)
|
||||
(let ((s (%string-copy (%char-set:s/check (car csets) char-set-union))))
|
||||
(%char-set-algebra s (cdr csets) %or! char-set-union)
|
||||
(make-char-set s))
|
||||
(char-set-copy char-set:empty)))
|
||||
|
||||
|
||||
;;; -- Intersection
|
||||
|
||||
(define (char-set-intersection! cset1 . csets)
|
||||
(%char-set-algebra (%char-set:s/check cset1 char-set-intersection!)
|
||||
csets %and! char-set-intersection!)
|
||||
cset1)
|
||||
|
||||
(define (char-set-intersection . csets)
|
||||
(if (pair? csets)
|
||||
(let ((s (%string-copy (%char-set:s/check (car csets) char-set-intersection))))
|
||||
(%char-set-algebra s (cdr csets) %and! char-set-intersection)
|
||||
(make-char-set s))
|
||||
(char-set-copy char-set:full)))
|
||||
|
||||
|
||||
;;; -- Difference
|
||||
|
||||
(define (char-set-difference! cset1 . csets)
|
||||
(%char-set-algebra (%char-set:s/check cset1 char-set-difference!)
|
||||
csets %minus! char-set-difference!)
|
||||
cset1)
|
||||
|
||||
(define (char-set-difference cs1 . csets)
|
||||
(if (pair? csets)
|
||||
(let ((s (%string-copy (%char-set:s/check cs1 char-set-difference))))
|
||||
(%char-set-algebra s csets %minus! char-set-difference)
|
||||
(make-char-set s))
|
||||
(char-set-copy cs1)))
|
||||
|
||||
|
||||
;;; -- Xor
|
||||
|
||||
(define (char-set-xor! cset1 . csets)
|
||||
(%char-set-algebra (%char-set:s/check cset1 char-set-xor!)
|
||||
csets %xor! char-set-xor!)
|
||||
cset1)
|
||||
|
||||
(define (char-set-xor . csets)
|
||||
(if (pair? csets)
|
||||
(let ((s (%string-copy (%char-set:s/check (car csets) char-set-xor))))
|
||||
(%char-set-algebra s (cdr csets) %xor! char-set-xor)
|
||||
(make-char-set s))
|
||||
(char-set-copy char-set:empty)))
|
||||
|
||||
|
||||
;;; -- Difference & intersection
|
||||
|
||||
(define (%char-set-diff+intersection! diff int csets proc)
|
||||
(for-each (lambda (cs)
|
||||
(%string-iter (lambda (i v)
|
||||
(if (not (zero? v))
|
||||
(cond ((si=1? diff i)
|
||||
(%set0! diff i)
|
||||
(%set1! int i)))))
|
||||
(%char-set:s/check cs proc)))
|
||||
csets))
|
||||
|
||||
(define (char-set-diff+intersection! cs1 cs2 . csets)
|
||||
(let ((s1 (%char-set:s/check cs1 char-set-diff+intersection!))
|
||||
(s2 (%char-set:s/check cs2 char-set-diff+intersection!)))
|
||||
(%string-iter (lambda (i v) (if (zero? v)
|
||||
(%set0! s2 i)
|
||||
(if (si=1? s2 i) (%set0! s1 i))))
|
||||
s1)
|
||||
(%char-set-diff+intersection! s1 s2 csets char-set-diff+intersection!))
|
||||
(values cs1 cs2))
|
||||
|
||||
(define (char-set-diff+intersection cs1 . csets)
|
||||
(let ((diff (string-copy (%char-set:s/check cs1 char-set-diff+intersection)))
|
||||
(int (make-string 256 c0)))
|
||||
(%char-set-diff+intersection! diff int csets char-set-diff+intersection)
|
||||
(values (make-char-set diff) (make-char-set int))))
|
||||
|
||||
|
||||
;;;; System character sets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; These definitions are for Latin-1.
|
||||
;;;
|
||||
;;; If your Scheme implementation allows you to mark the underlying strings
|
||||
;;; as immutable, you should do so -- it would be very, very bad if a client's
|
||||
;;; buggy code corrupted these constants.
|
||||
|
||||
(define char-set:empty (char-set))
|
||||
(define char-set:full (char-set-complement char-set:empty))
|
||||
|
||||
(define char-set:lower-case
|
||||
(let* ((a-z (ucs-range->char-set #x61 #x7B))
|
||||
(latin1 (ucs-range->char-set! #xdf #xf7 #t a-z))
|
||||
(latin2 (ucs-range->char-set! #xf8 #x100 #t latin1)))
|
||||
(char-set-adjoin! latin2 (%latin1->char #xb5))))
|
||||
|
||||
(define char-set:upper-case
|
||||
(let ((A-Z (ucs-range->char-set #x41 #x5B)))
|
||||
;; Add in the Latin-1 upper-case chars.
|
||||
(ucs-range->char-set! #xd8 #xdf #t
|
||||
(ucs-range->char-set! #xc0 #xd7 #t A-Z))))
|
||||
|
||||
(define char-set:title-case char-set:empty)
|
||||
|
||||
(define char-set:letter
|
||||
(let ((u/l (char-set-union char-set:upper-case char-set:lower-case)))
|
||||
(char-set-adjoin! u/l
|
||||
(%latin1->char #xaa) ; FEMININE ORDINAL INDICATOR
|
||||
(%latin1->char #xba)))) ; MASCULINE ORDINAL INDICATOR
|
||||
|
||||
(define char-set:digit (string->char-set "0123456789"))
|
||||
(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF"))
|
||||
|
||||
(define char-set:letter+digit
|
||||
(char-set-union char-set:letter char-set:digit))
|
||||
|
||||
(define char-set:punctuation
|
||||
(let ((ascii (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}"))
|
||||
(latin-1-chars (map %latin1->char '(#xA1 ; INVERTED EXCLAMATION MARK
|
||||
#xAB ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
#xAD ; SOFT HYPHEN
|
||||
#xB7 ; MIDDLE DOT
|
||||
#xBB ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
#xBF)))) ; INVERTED QUESTION MARK
|
||||
(list->char-set! latin-1-chars ascii)))
|
||||
|
||||
(define char-set:symbol
|
||||
(let ((ascii (string->char-set "$+<=>^`|~"))
|
||||
(latin-1-chars (map %latin1->char '(#x00A2 ; CENT SIGN
|
||||
#x00A3 ; POUND SIGN
|
||||
#x00A4 ; CURRENCY SIGN
|
||||
#x00A5 ; YEN SIGN
|
||||
#x00A6 ; BROKEN BAR
|
||||
#x00A7 ; SECTION SIGN
|
||||
#x00A8 ; DIAERESIS
|
||||
#x00A9 ; COPYRIGHT SIGN
|
||||
#x00AC ; NOT SIGN
|
||||
#x00AE ; REGISTERED SIGN
|
||||
#x00AF ; MACRON
|
||||
#x00B0 ; DEGREE SIGN
|
||||
#x00B1 ; PLUS-MINUS SIGN
|
||||
#x00B4 ; ACUTE ACCENT
|
||||
#x00B6 ; PILCROW SIGN
|
||||
#x00B8 ; CEDILLA
|
||||
#x00D7 ; MULTIPLICATION SIGN
|
||||
#x00F7)))) ; DIVISION SIGN
|
||||
(list->char-set! latin-1-chars ascii)))
|
||||
|
||||
|
||||
(define char-set:graphic
|
||||
(char-set-union char-set:letter+digit char-set:punctuation char-set:symbol))
|
||||
|
||||
(define char-set:whitespace
|
||||
(list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION
|
||||
#x0A ; LINE FEED
|
||||
#x0B ; VERTICAL TABULATION
|
||||
#x0C ; FORM FEED
|
||||
#x0D ; CARRIAGE RETURN
|
||||
#x20 ; SPACE
|
||||
#xA0))))
|
||||
|
||||
(define char-set:printing (char-set-union char-set:whitespace char-set:graphic)) ; NO-BREAK SPACE
|
||||
|
||||
(define char-set:blank
|
||||
(list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION
|
||||
#x20 ; SPACE
|
||||
#xA0)))) ; NO-BREAK SPACE
|
||||
|
||||
|
||||
(define char-set:iso-control
|
||||
(ucs-range->char-set! #x7F #xA0 #t (ucs-range->char-set 0 32)))
|
||||
|
||||
(define char-set:ascii (ucs-range->char-set 0 128))
|
||||
|
||||
|
||||
;;; Porting & performance-tuning notes
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; See the section at the beginning of this file on external dependencies.
|
||||
;;;
|
||||
;;; First and foremost, rewrite this code to use bit vectors of some sort.
|
||||
;;; This will give big speedup and memory savings.
|
||||
;;;
|
||||
;;; - LET-OPTIONALS* macro.
|
||||
;;; This is only used once. You can rewrite the use, port the hairy macro
|
||||
;;; definition (which is implemented using a Clinger-Rees low-level
|
||||
;;; explicit-renaming macro system), or port the simple, high-level
|
||||
;;; definition, which is less efficient.
|
||||
;;;
|
||||
;;; - :OPTIONAL macro
|
||||
;;; Very simply defined using an R5RS high-level macro.
|
||||
;;;
|
||||
;;; Implementations that can arrange for the base char sets to be immutable
|
||||
;;; should do so. (E.g., Scheme 48 allows one to mark a string as immutable,
|
||||
;;; which can be used to protect the underlying strings.) It would be very,
|
||||
;;; very bad if a client's buggy code corrupted these constants.
|
||||
;;;
|
||||
;;; There is a fair amount of argument checking. This is, strictly speaking,
|
||||
;;; unnecessary -- the actual body of the procedures will blow up if an
|
||||
;;; illegal value is passed in. However, the error message will not be as good
|
||||
;;; as if the error were caught at the "higher level." Also, a very, very
|
||||
;;; smart Scheme compiler may be able to exploit having the type checks done
|
||||
;;; early, so that the actual body of the procedures can assume proper values.
|
||||
;;; This isn't likely; this kind of compiler technology isn't common any
|
||||
;;; longer.
|
||||
;;;
|
||||
;;; The overhead of optional-argument parsing is irritating. The optional
|
||||
;;; arguments must be consed into a rest list on entry, and then parsed out.
|
||||
;;; Function call should be a matter of a few register moves and a jump; it
|
||||
;;; should not involve heap allocation! Your Scheme system may have a superior
|
||||
;;; non-R5RS optional-argument system that can eliminate this overhead. If so,
|
||||
;;; then this is a prime candidate for optimising these procedures,
|
||||
;;; *especially* the many optional BASE-CS parameters.
|
||||
;;;
|
||||
;;; Note that optional arguments are also a barrier to procedure integration.
|
||||
;;; If your Scheme system permits you to specify alternate entry points
|
||||
;;; for a call when the number of optional arguments is known in a manner
|
||||
;;; that enables inlining/integration, this can provide performance
|
||||
;;; improvements.
|
||||
;;;
|
||||
;;; There is enough *explicit* error checking that *all* internal operations
|
||||
;;; should *never* produce a type or index-range error. Period. Feel like
|
||||
;;; living dangerously? *Big* performance win to be had by replacing string
|
||||
;;; and record-field accessors and setters with unsafe equivalents in the
|
||||
;;; code. Similarly, fixnum-specific operators can speed up the arithmetic
|
||||
;;; done on the index values in the inner loops. The only arguments that are
|
||||
;;; not completely error checked are
|
||||
;;; - string lists (complete checking requires time proportional to the
|
||||
;;; length of the list)
|
||||
;;; - procedure arguments, such as char->char maps & predicates.
|
||||
;;; There is no way to check the range & domain of procedures in Scheme.
|
||||
;;; Procedures that take these parameters cannot fully check their
|
||||
;;; arguments. But all other types to all other procedures are fully
|
||||
;;; checked.
|
||||
;;;
|
||||
;;; This does open up the alternate possibility of simply *removing* these
|
||||
;;; checks, and letting the safe primitives raise the errors. On a dumb
|
||||
;;; Scheme system, this would provide speed (by eliminating the redundant
|
||||
;;; error checks) at the cost of error-message clarity.
|
||||
;;;
|
||||
;;; In an interpreted Scheme, some of these procedures, or the internal
|
||||
;;; routines with % prefixes, are excellent candidates for being rewritten
|
||||
;;; in C.
|
||||
;;;
|
||||
;;; It would also be nice to have the ability to mark some of these
|
||||
;;; routines as candidates for inlining/integration.
|
||||
;;;
|
||||
;;; See the comments preceding the hash function code for notes on tuning
|
||||
;;; the default bound so that the code never overflows your implementation's
|
||||
;;; fixnum size into bignum calculation.
|
||||
;;;
|
||||
;;; All the %-prefixed routines in this source code are written
|
||||
;;; to be called internally to this library. They do *not* perform
|
||||
;;; friendly error checks on the inputs; they assume everything is
|
||||
;;; proper. They also do not take optional arguments. These two properties
|
||||
;;; save calling overhead and enable procedure integration -- but they
|
||||
;;; are not appropriate for exported routines.
|
||||
|
||||
;;; Copyright notice
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology
|
||||
;;;
|
||||
;;; This material was developed by the Scheme project at the Massachusetts
|
||||
;;; Institute of Technology, Department of Electrical Engineering and
|
||||
;;; Computer Science. Permission to copy and modify this software, to
|
||||
;;; redistribute either the original software or a modified version, and
|
||||
;;; to use this software for any purpose is granted, subject to the
|
||||
;;; following restrictions and understandings.
|
||||
;;;
|
||||
;;; 1. Any copy made of this software must include this copyright notice
|
||||
;;; in full.
|
||||
;;;
|
||||
;;; 2. Users of this software agree to make their best efforts (a) to
|
||||
;;; return to the MIT Scheme project any improvements or extensions that
|
||||
;;; they make, so that these may be included in future releases; and (b)
|
||||
;;; to inform MIT of noteworthy uses of this software.
|
||||
;;;
|
||||
;;; 3. All materials developed as a consequence of the use of this
|
||||
;;; software shall duly acknowledge such use, in accordance with the usual
|
||||
;;; standards of acknowledging credit in academic research.
|
||||
;;;
|
||||
;;; 4. MIT has made no warrantee or representation that the operation of
|
||||
;;; this software will be error-free, and MIT is under no obligation to
|
||||
;;; provide any services, by way of maintenance, update, or otherwise.
|
||||
;;;
|
||||
;;; 5. In conjunction with products arising from the use of this material,
|
||||
;;; there shall be no use of the name of the Massachusetts Institute of
|
||||
;;; Technology nor of any adaptation thereof in any advertising,
|
||||
;;; promotional, or sales literature without prior written consent from
|
||||
;;; MIT in each case.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,52 @@
|
|||
;;; Support for obsolete, deprecated 0.5.2 char-set procedures.
|
||||
;;; Will go away in a future release.
|
||||
|
||||
(define-interface obsolete-char-set-interface
|
||||
(export char-set-members ; char-set->list
|
||||
chars->char-set ; list->char-set
|
||||
ascii-range->char-set ; ucs-range->char-set (not exact)
|
||||
predicate->char-set ; char-set-filter (not exact)
|
||||
->char-set ; no longer handles a predicate
|
||||
char-set-every? ; char-set-every
|
||||
|
||||
char-set-invert ; char-set-complement
|
||||
char-set-invert! ; char-set-complement!
|
||||
|
||||
char-set:alphabetic ; char-set:letter
|
||||
char-set:numeric ; char-set:digit
|
||||
char-set:alphanumeric ; char-set:letter+digit
|
||||
char-set:control)) ; char-set:iso-control
|
||||
|
||||
|
||||
(define-structure obsolete-char-set-lib obsolete-char-set-interface
|
||||
(open scsh-utilities char-set-lib scheme)
|
||||
(begin
|
||||
|
||||
(define char-set-members
|
||||
(deprecated-proc char-set->list 'char-set-members
|
||||
"Use CHAR-SET->LIST instead."))
|
||||
(define chars->char-set
|
||||
(deprecated-proc list->char-set 'chars->char-set
|
||||
"Use LIST->CHAR-SET instead."))
|
||||
(define ascii-range->char-set
|
||||
(deprecated-proc (lambda (lower upper) (ucs-range->char-set lower upper #t))
|
||||
'ascii-range->char-set
|
||||
"Use UCS-RANGE->CHAR-SET instead."))
|
||||
(define predicate->char-set
|
||||
(deprecated-proc (lambda (pred) (char-set-filter pred char-set:full))
|
||||
'predicate->char-set
|
||||
"Change code to use CHAR-SET-FILTER."))
|
||||
(define char-set-every?
|
||||
(deprecated-proc char-set-every 'char-set-every?
|
||||
"Use CHAR-SET-EVERYyn instead."))
|
||||
(define char-set-invert
|
||||
(deprecated-proc char-set-complement 'char-set-invert
|
||||
"Use CHAR-SET-COMPLEMENTyn instead."))
|
||||
(define char-set-invert!
|
||||
(deprecated-proc char-set-complement! 'char-set-invert!
|
||||
"Use CHAR-SET-COMPLEMENT!yn instead."))
|
||||
|
||||
(define char-set:alphabetic char-set:letter)
|
||||
(define char-set:numeric char-set:digit)
|
||||
(define char-set:alphanumeric char-set:letter+digit)
|
||||
(define char-set:control char-set:iso-control)))
|
|
@ -0,0 +1,151 @@
|
|||
;;; SRFI-14 interface for Scheme48 -*- Scheme -*-
|
||||
;;;
|
||||
;;; Complete interface spec for the SRFI-14 char-set-lib library in the
|
||||
;;; Scheme48 interface and module language. The interface is fully typed, in
|
||||
;;; the Scheme48 type notation. The structure definitions also provide a
|
||||
;;; formal description of the external dependencies of the source code.
|
||||
|
||||
(define-interface char-set-interface
|
||||
(export (char-set? (proc (:value) :boolean))
|
||||
((char-set= char-set<=) (proc (&rest :value) :boolean))
|
||||
|
||||
(char-set-hash (proc (:value &opt :exact-integer) :exact-integer))
|
||||
|
||||
;; Cursors are exact integers in the reference implementation.
|
||||
;; These typings would be different with a different cursor
|
||||
;; implementation.
|
||||
;; Too bad Scheme doesn't have abstract data types.
|
||||
(char-set-cursor (proc (:value) :exact-integer))
|
||||
(char-set-ref (proc (:value :exact-integer) :char))
|
||||
(char-set-cursor-next (proc (:value :exact-integer) :exact-integer))
|
||||
(end-of-char-set? (proc (:value) :boolean))
|
||||
|
||||
(char-set-fold (proc ((proc (:char :value) :value) :value :value)
|
||||
:value))
|
||||
(char-set-unfold (proc ((proc (:value) :boolean)
|
||||
(proc (:value) :value)
|
||||
(proc (:value) :value)
|
||||
:value
|
||||
&opt :value)
|
||||
:value))
|
||||
|
||||
(char-set-unfold! (proc ((proc (:value) :boolean)
|
||||
(proc (:value) :value)
|
||||
(proc (:value) :value)
|
||||
:value :value)
|
||||
:value))
|
||||
|
||||
(char-set-for-each (proc ((proc (:char) :values) :value) :unspecific))
|
||||
(char-set-map (proc ((proc (:char) :char) :value) :value))
|
||||
|
||||
(char-set-copy (proc (:value) :value))
|
||||
|
||||
(char-set (proc (&rest :char) :value))
|
||||
|
||||
(list->char-set (proc (:value &opt :value) :value))
|
||||
(list->char-set! (proc (:value :value) :value))
|
||||
|
||||
(string->char-set (proc (:value &opt :value) :value))
|
||||
(string->char-set! (proc (:value :value) :value))
|
||||
|
||||
(ucs-range->char-set (proc (:exact-integer :exact-integer &opt
|
||||
:boolean :value)
|
||||
:value))
|
||||
(ucs-range->char-set! (proc (:exact-integer :exact-integer
|
||||
:boolean :value)
|
||||
:value))
|
||||
|
||||
(char-set-filter (proc ((proc (:char) :boolean) :value &opt :value) :value))
|
||||
(char-set-filter! (proc ((proc (:char) :boolean) :value :value) :value))
|
||||
|
||||
(->char-set (proc (:value) :value))
|
||||
|
||||
(char-set-size (proc (:value) :exact-integer))
|
||||
(char-set-count (proc ((proc (:char) :boolean) :value) :exact-integer))
|
||||
(char-set-contains? (proc (:char :value) :boolean))
|
||||
|
||||
(char-set-every (proc ((proc (:char) :boolean) :value) :boolean))
|
||||
(char-set-any (proc ((proc (:char) :boolean) :value) :value))
|
||||
|
||||
((char-set-adjoin char-set-delete
|
||||
char-set-adjoin! char-set-delete!)
|
||||
(proc (:value &rest :char) :value))
|
||||
|
||||
(char-set->list (proc (:value) :value))
|
||||
(char-set->string (proc (:value) :string))
|
||||
|
||||
(char-set-complement (proc (:value) :value))
|
||||
((char-set-union char-set-intersection char-set-xor)
|
||||
(proc (&rest :value) :value))
|
||||
|
||||
(char-set-difference (proc (:value &opt :value) :value))
|
||||
|
||||
(char-set-diff+intersection (proc (:value &rest :value)
|
||||
(some-values :value :value)))
|
||||
|
||||
(char-set-complement! (proc (:value) :value))
|
||||
|
||||
((char-set-union! char-set-intersection!
|
||||
char-set-xor! char-set-difference!)
|
||||
(proc (:value &opt :value) :value))
|
||||
|
||||
(char-set-diff+intersection! (proc (:value :value &rest :value)
|
||||
(some-values :value :value)))
|
||||
|
||||
char-set:lower-case
|
||||
char-set:upper-case
|
||||
char-set:letter
|
||||
char-set:digit
|
||||
char-set:letter+digit
|
||||
char-set:graphic
|
||||
char-set:printing
|
||||
char-set:whitespace
|
||||
char-set:blank
|
||||
char-set:iso-control
|
||||
char-set:punctuation
|
||||
char-set:symbol
|
||||
char-set:hex-digit
|
||||
char-set:ascii
|
||||
char-set:empty
|
||||
char-set:full
|
||||
))
|
||||
|
||||
; rdelim.scm gets into the innards of char-sets.
|
||||
(define-interface scsh-char-set-low-level-interface
|
||||
(export (char-set:s (proc (:value) :string))))
|
||||
|
||||
(define-structures ((char-set-lib char-set-interface)
|
||||
(scsh-char-set-low-level-lib scsh-char-set-low-level-interface))
|
||||
(open error-package ; ERROR procedure
|
||||
let-opt ; LET-OPTIONALS* and :OPTIONAL
|
||||
ascii ; CHAR->ASCII ASCII->CHAR
|
||||
bitwise ; BITWISE-AND
|
||||
jar-d-r-t-package ; DEFINE-RECORD-TYPE/JAR macro.
|
||||
scheme)
|
||||
|
||||
(begin (define (check-arg pred val caller)
|
||||
(let lp ((val val))
|
||||
(if (pred val) val (lp (error "Bad argument" val pred caller)))))
|
||||
|
||||
(define %latin1->char ascii->char) ; Works for S48
|
||||
(define %char->latin1 char->ascii) ; Works for S48
|
||||
|
||||
;; Here's a SRFI-19 d-r-t defined in terms of jar's almost-identical
|
||||
;; d-r-t.
|
||||
(define-syntax define-record-type
|
||||
(syntax-rules ()
|
||||
((define-record-type ?name ?stuff ...)
|
||||
(define-record-type/jar ?name ?name ?stuff ...)))))
|
||||
|
||||
(files cset-lib)
|
||||
(optimize auto-integrate))
|
||||
|
||||
;;; Import jar's DEFINE-RECORD-TYPE macro, and export it under the
|
||||
;;; name DEFINE-RECORD-TYPE/JAR.
|
||||
(define-structure jar-d-r-t-package (export (define-record-type/jar :syntax))
|
||||
(open define-record-types ; JAR's record macro
|
||||
scheme)
|
||||
(begin (define-syntax define-record-type/jar
|
||||
(syntax-rules ()
|
||||
((define-record-type/jar ?stuff ...)
|
||||
(define-record-type ?stuff ...))))))
|
|
@ -0,0 +1,200 @@
|
|||
;;; This is a regression testing suite for the SRFI-14 char-set library.
|
||||
;;; Olin Shivers
|
||||
|
||||
(let-syntax ((test (syntax-rules ()
|
||||
((test form ...)
|
||||
(cond ((not form) (error "Test failed" 'form)) ...
|
||||
(else 'OK))))))
|
||||
(let ((vowel (lambda (c) (member c '(#\a #\e #\i #\o #\u)))))
|
||||
|
||||
(test
|
||||
(not (char-set? 5))
|
||||
|
||||
(char-set? (char-set #\a #\e #\i #\o #\u))
|
||||
|
||||
(char-set=)
|
||||
(char-set= (char-set))
|
||||
|
||||
(char-set= (char-set #\a #\e #\i #\o #\u)
|
||||
(string->char-set "ioeauaiii"))
|
||||
|
||||
(not (char-set= (char-set #\e #\i #\o #\u)
|
||||
(string->char-set "ioeauaiii")))
|
||||
|
||||
(char-set<=)
|
||||
(char-set<= (char-set))
|
||||
|
||||
(char-set<= (char-set #\a #\e #\i #\o #\u)
|
||||
(string->char-set "ioeauaiii"))
|
||||
|
||||
(char-set<= (char-set #\e #\i #\o #\u)
|
||||
(string->char-set "ioeauaiii"))
|
||||
|
||||
(<= 0 (char-set-hash char-set:graphic 100) 99)
|
||||
|
||||
(= 4 (char-set-fold (lambda (c i) (+ i 1)) 0
|
||||
(char-set #\e #\i #\o #\u #\e #\e)))
|
||||
|
||||
(char-set= (string->char-set "eiaou2468013579999")
|
||||
(char-set-unfold null? car cdr '(#\a #\e #\i #\o #\u #\u #\u)
|
||||
char-set:digit))
|
||||
|
||||
(char-set= (string->char-set "eiaou246801357999")
|
||||
(char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u)
|
||||
(string->char-set "0123456789")))
|
||||
|
||||
(not (char-set= (string->char-set "eiaou246801357")
|
||||
(char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u)
|
||||
(string->char-set "0123456789"))))
|
||||
|
||||
(let ((cs (string->char-set "0123456789")))
|
||||
(char-set-for-each (lambda (c) (set! cs (char-set-delete cs c)))
|
||||
(string->char-set "02468000"))
|
||||
(char-set= cs (string->char-set "97531")))
|
||||
|
||||
(not (let ((cs (string->char-set "0123456789")))
|
||||
(char-set-for-each (lambda (c) (set! cs (char-set-delete cs c)))
|
||||
(string->char-set "02468"))
|
||||
(char-set= cs (string->char-set "7531"))))
|
||||
|
||||
(char-set= (char-set-map char-upcase (string->char-set "aeiou"))
|
||||
(string->char-set "IOUAEEEE"))
|
||||
|
||||
(not (char-set= (char-set-map char-upcase (string->char-set "aeiou"))
|
||||
(string->char-set "OUAEEEE")))
|
||||
|
||||
(char-set= (char-set-copy (string->char-set "aeiou"))
|
||||
(string->char-set "aeiou"))
|
||||
|
||||
(char-set= (char-set #\x #\y) (string->char-set "xy"))
|
||||
(not (char-set= (char-set #\x #\y #\z) (string->char-set "xy")))
|
||||
|
||||
(char-set= (string->char-set "xy") (list->char-set '(#\x #\y)))
|
||||
(not (char-set= (string->char-set "axy") (list->char-set '(#\x #\y))))
|
||||
|
||||
(char-set= (string->char-set "xy12345")
|
||||
(list->char-set '(#\x #\y) (string->char-set "12345")))
|
||||
(not (char-set= (string->char-set "y12345")
|
||||
(list->char-set '(#\x #\y) (string->char-set "12345"))))
|
||||
|
||||
(char-set= (string->char-set "xy12345")
|
||||
(list->char-set! '(#\x #\y) (string->char-set "12345")))
|
||||
(not (char-set= (string->char-set "y12345")
|
||||
(list->char-set! '(#\x #\y) (string->char-set "12345"))))
|
||||
|
||||
(char-set= (string->char-set "aeiou12345")
|
||||
(char-set-filter vowel? char-set:ascii (string->char-set "12345")))
|
||||
(not (char-set= (string->char-set "aeou12345")
|
||||
(char-set-filter vowel? char-set:ascii (string->char-set "12345"))))
|
||||
|
||||
(char-set= (string->char-set "aeiou12345")
|
||||
(char-set-filter! vowel? char-set:ascii (string->char-set "12345")))
|
||||
(not (char-set= (string->char-set "aeou12345")
|
||||
(char-set-filter! vowel? char-set:ascii (string->char-set "12345"))))
|
||||
|
||||
|
||||
(char-set= (string->char-set "abcdef12345")
|
||||
(ucs-range->char-set 97 103 #t (string->char-set "12345")))
|
||||
(not (char-set= (string->char-set "abcef12345")
|
||||
(ucs-range->char-set 97 103 #t (string->char-set "12345"))))
|
||||
|
||||
(char-set= (string->char-set "abcdef12345")
|
||||
(ucs-range->char-set! 97 103 #t (string->char-set "12345")))
|
||||
(not (char-set= (string->char-set "abcef12345")
|
||||
(ucs-range->char-set! 97 103 #t (string->char-set "12345"))))
|
||||
|
||||
|
||||
(char-set= (->char-set #\x)
|
||||
(->char-set "x")
|
||||
(->char-set (char-set #\x)))
|
||||
|
||||
(not (char-set= (->char-set #\x)
|
||||
(->char-set "y")
|
||||
(->char-set (char-set #\x))))
|
||||
|
||||
(= 10 (char-set-size (char-set-intersection char-set:ascii char-set:digit)))
|
||||
|
||||
(= 5 (char-set-count vowel? char-set:ascii))
|
||||
|
||||
(equal? '(#\x) (char-set->list (char-set #\x)))
|
||||
(not (equal? '(#\X) (char-set->list (char-set #\x))))
|
||||
|
||||
(equal? "x" (char-set->string (char-set #\x)))
|
||||
(not (equal? "X" (char-set->string (char-set #\x))))
|
||||
|
||||
(char-set-contains? (->char-set "xyz") #\x)
|
||||
(not (char-set-contains? (->char-set "xyz") #\a))
|
||||
|
||||
(char-set-every char-lower-case? (->char-set "abcd"))
|
||||
(not (char-set-every char-lower-case? (->char-set "abcD")))
|
||||
(char-set-any char-lower-case? (->char-set "abcd"))
|
||||
(not (char-set-any char-lower-case? (->char-set "ABCD")))
|
||||
|
||||
(char-set= (->char-set "ABCD")
|
||||
(let ((cs (->char-set "abcd")))
|
||||
(let lp ((cur (char-set-cursor cs)) (ans '()))
|
||||
(if (end-of-char-set? cur) (list->char-set ans)
|
||||
(lp (char-set-cursor-next cs cur)
|
||||
(cons (char-upcase (char-set-ref cs cur)) ans))))))
|
||||
|
||||
|
||||
(char-set= (char-set-adjoin (->char-set "123") #\x #\a)
|
||||
(->char-set "123xa"))
|
||||
(not (char-set= (char-set-adjoin (->char-set "123") #\x #\a)
|
||||
(->char-set "123x")))
|
||||
(char-set= (char-set-adjoin! (->char-set "123") #\x #\a)
|
||||
(->char-set "123xa"))
|
||||
(not (char-set= (char-set-adjoin! (->char-set "123") #\x #\a)
|
||||
(->char-set "123x")))
|
||||
|
||||
(char-set= (char-set-delete (->char-set "123") #\2 #\a #\2)
|
||||
(->char-set "13"))
|
||||
(not (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2)
|
||||
(->char-set "13a")))
|
||||
(char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2)
|
||||
(->char-set "13"))
|
||||
(not (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2)
|
||||
(->char-set "13a")))
|
||||
|
||||
(char-set= (char-set-intersection char-set:hex-digit (char-set-complement char-set:digit))
|
||||
(->char-set "abcdefABCDEF"))
|
||||
(char-set= (char-set-intersection! (char-set-complement! (->char-set "0123456789"))
|
||||
char-set:hex-digit)
|
||||
(->char-set "abcdefABCDEF"))
|
||||
|
||||
(char-set= (char-set-union char-set:hex-digit
|
||||
(->char-set "abcdefghijkl"))
|
||||
(->char-set "abcdefABCDEFghijkl0123456789"))
|
||||
(char-set= (char-set-union! (->char-set "abcdefghijkl")
|
||||
char-set:hex-digit)
|
||||
(->char-set "abcdefABCDEFghijkl0123456789"))
|
||||
|
||||
(char-set= (char-set-difference (->char-set "abcdefghijklmn")
|
||||
char-set:hex-digit)
|
||||
(->char-set "ghijklmn"))
|
||||
(char-set= (char-set-difference! (->char-set "abcdefghijklmn")
|
||||
char-set:hex-digit)
|
||||
(->char-set "ghijklmn"))
|
||||
|
||||
(char-set= (char-set-xor (->char-set "0123456789")
|
||||
char-set:hex-digit)
|
||||
(->char-set "abcdefABCDEF"))
|
||||
(char-set= (char-set-xor! (->char-set "0123456789")
|
||||
char-set:hex-digit)
|
||||
(->char-set "abcdefABCDEF"))
|
||||
|
||||
(call-with-values (lambda ()
|
||||
(char-set-diff+intersection char-set:hex-digit
|
||||
char-set:letter))
|
||||
(lambda (d i)
|
||||
(and (char-set= d (->char-set "0123456789"))
|
||||
(char-set= i (->char-set "abcdefABCDEF")))))
|
||||
|
||||
(call-with-values (lambda ()
|
||||
(char-set-diff+intersection! (char-set-copy char-set:hex-digit)
|
||||
(char-set-copy char-set:letter)))
|
||||
(lambda (d i)
|
||||
(and (char-set= d (->char-set "0123456789"))
|
||||
(char-set= i (->char-set "abcdefABCDEF"))))))
|
||||
|
||||
))
|
|
@ -16,6 +16,11 @@
|
|||
;;; This implementation is intended as a portable reference implementation
|
||||
;;; for SRFI-1. See the porting notes below for more information.
|
||||
|
||||
;;; Revision history
|
||||
;;;;;;;;;;;;;;;;;;;;
|
||||
;;; This is version 1.1. 12/18/2000
|
||||
;;; Fixes a small bug in DELETE-DUPLICATES!.
|
||||
|
||||
;;; Exported:
|
||||
;;; xcons tree-copy make-list list-tabulate cons* list-copy
|
||||
;;; proper-list? circular-list? dotted-list? not-pair? null-list? list=
|
||||
|
@ -384,7 +389,7 @@
|
|||
(define (null-list? l)
|
||||
(cond ((pair? l) #f)
|
||||
((null? l) #t)
|
||||
(else (error "null-pair?: argument out of domain" l))))
|
||||
(else (error "null-list?: argument out of domain" l))))
|
||||
|
||||
|
||||
(define (list= = . lists)
|
||||
|
@ -1239,7 +1244,7 @@
|
|||
(new-tail (recur (delete x tail elt=))))
|
||||
(if (eq? tail new-tail) lis (cons x new-tail)))))))
|
||||
|
||||
(define (delete-duplicates! lis maybe-=)
|
||||
(define (delete-duplicates! lis . maybe-=)
|
||||
(let ((elt= (:optional maybe-= equal?)))
|
||||
(check-arg procedure? elt= delete-duplicates!)
|
||||
(let recur ((lis lis))
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
<!doctype html public '-//W3C//DTD HTML 4.0//EN'
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN"
|
||||
'http://www.w3.org/TR/REC-html40/strict.dtd'>
|
||||
|
||||
<!-- Is there a portable way to write an em-dash?
|
||||
|
@ -36,9 +36,15 @@
|
|||
div.indent { margin-left: 2em; } /* General indentation */
|
||||
pre.code-example { margin-left: 2em; } /* Indent code examples. */
|
||||
|
||||
/* "Continue" class marks text that isn't really the start
|
||||
** of a new paragraph -- e.g., continuing a para after a
|
||||
** code sample.
|
||||
*/
|
||||
p.continue { text-indent: 0em; margin-top: 0em}
|
||||
|
||||
/* This stuff is for definition lists of defined procedures.
|
||||
** A proc-def2 is used when you want a stack of procs to go
|
||||
** with one <dd> ... </dd> body. In this case, make the first
|
||||
** with one dd body. In this case, make the first
|
||||
** proc a proc-def1, following ones proc-defi's, and the last one
|
||||
** a proc-defn.
|
||||
**
|
||||
|
@ -101,7 +107,7 @@
|
|||
a.draft { color: red; }
|
||||
</style>
|
||||
|
||||
<style type="text/css"; media=all>
|
||||
<style type="text/css" media=all>
|
||||
/* Nastiness: Here, I'm using a bug to work around a bug.
|
||||
** Netscape rendering bugs mean you need bogus <dt> and <dd>
|
||||
** margin settings -- settings which screw up IE's proper rendering.
|
||||
|
@ -140,12 +146,28 @@ List Library
|
|||
</div>
|
||||
|
||||
<!--========================================================================-->
|
||||
<h1>Author</H1>
|
||||
<H1>Author</H1>
|
||||
<p>
|
||||
Olin Shivers
|
||||
|
||||
<address>
|
||||
<a href="http://www.ai.mit.edu/~shivers/">Olin Shivers</A> /
|
||||
<a href="mailto:shivers@ai.mit.edu">shivers@ai.mit.edu</A>
|
||||
<a href="http://www.ai.mit.edu/~shivers/">http://www.ai.mit.edu/~shivers/</A> /
|
||||
<a href="mailto:shivers@ai.mit.edu">shivers@ai.mit.edu</A>
|
||||
</address>
|
||||
|
||||
<!--========================================================================-->
|
||||
<H1>Status</H1>
|
||||
<p>
|
||||
This SRFI is currently in ``final status. To see an explanation of each status that a SRFI can hold, see <A HREF="http://srfi.schemers.org/srfi-process.html">here</A>.
|
||||
You can access the discussion via <A HREF=mail-archive/maillist.html>the archive of the mailing list</A>.
|
||||
<P>
|
||||
<UL>
|
||||
<LI>Received: 1998/11/08</LI>
|
||||
<LI>Draft: 1998/12/22-1999/03/09</LI>
|
||||
<LI>Revised: several times</LI>
|
||||
<LI>Final: 1999/10/09</LI>
|
||||
</UL>
|
||||
|
||||
<!--========================================================================-->
|
||||
<h1>Table of contents</H1>
|
||||
|
||||
|
@ -154,7 +176,7 @@ List Library
|
|||
-->
|
||||
<ul id=toc-table>
|
||||
<li><a href="#Abstract">Abstract</a>
|
||||
<li><a href="#Introduction">Introduction</a>
|
||||
<li><a href="#Rationale">Rationale</a>
|
||||
<li><a href="#ProcedureIndex">Procedure index</a>
|
||||
<li><a href="#GeneralDiscussion">General discussion</a>
|
||||
<ul>
|
||||
|
@ -196,14 +218,8 @@ reference implementation of the spec. The reference implementation is
|
|||
<li>completely open, public-domain source
|
||||
</ul>
|
||||
|
||||
<strong><em>Note: This is a working draft, and tends to lag the plain-text version in terms of actual content.
|
||||
See <a href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt">
|
||||
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt</a>
|
||||
for the latest copy of the plain-text draft.
|
||||
</em></strong>
|
||||
|
||||
<!--========================================================================-->
|
||||
<h1><a name="Introduction">Introduction</a></h1>
|
||||
<h1><a name="Rationale">Rationale</a></h1>
|
||||
<p>
|
||||
The set of basic list and pair operations provided by R4RS/<abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr> Scheme is far
|
||||
from satisfactory. Because this set is so small and basic, most
|
||||
|
@ -278,9 +294,9 @@ library and get good results with it.
|
|||
<h1><a name="ProcedureIndex">Procedure Index</a></h1>
|
||||
<p>
|
||||
Here is a short list of the procedures provided by the list-lib package.
|
||||
<a href="#R5RS">R5RS</a></abbr> procedures are shown in
|
||||
<span class=r5rs-proc>bold</span class=r5rs-proc>;
|
||||
extended <a href="#R5RS">R5RS</a></abbr>
|
||||
<abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr> procedures are shown in
|
||||
<span class=r5rs-proc>bold</span>;
|
||||
extended <abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr>
|
||||
procedures, in <span class=r5rs-procx>bold italic</span>.
|
||||
<div class=indent>
|
||||
<dl>
|
||||
|
@ -1033,9 +1049,10 @@ partition the entire universe of Scheme values.
|
|||
<!--
|
||||
==== car cdr
|
||||
============================================================================-->
|
||||
<dt class=proc-def1>
|
||||
<a name="car"></a>
|
||||
<a name="cdr"></a>
|
||||
<dt class=proc-def1><code class=proc-def>car</code><var> pair -> value</var>
|
||||
<code class=proc-def>car</code><var> pair -> value</var>
|
||||
<dt class=proc-defn><code class=proc-def>cdr</code><var> pair -> value</var>
|
||||
<dd class=proc-def>
|
||||
[<abbr title="Revised^5 Report on Scheme"><a href="#R5RS">R5RS</a></abbr>]
|
||||
|
@ -2786,6 +2803,7 @@ That is, it must be the case that
|
|||
<div class=indent>
|
||||
<code>(eq? <var>x</var> <var>y</var>)</code> => <code>(<var>=</var> <var>x</var> <var>y</var>)</code>.
|
||||
</div>
|
||||
<p class=continue>
|
||||
Note that this implies, in turn, that two lists that are <code>eq?</code> are
|
||||
also set-equal by any legal comparison procedure. This allows for
|
||||
constant-time determination of set operations on <code>eq?</code> lists.
|
||||
|
@ -3142,7 +3160,7 @@ John David Stone, and Joerg F. Wittenberger. I am grateful to them for their
|
|||
assistance.
|
||||
<p>
|
||||
I am also grateful the authors, implementors and documentors of all the systems
|
||||
mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted
|
||||
mentioned in the rationale. Aubrey Jaffer and Kent Pitman should be noted
|
||||
for their work in producing Web-accessible versions of the R5RS and
|
||||
<a href="#CommonLisp">Common Lisp</a> spec, which was a tremendous aid.
|
||||
<p>
|
||||
|
@ -3156,31 +3174,15 @@ results, of course.
|
|||
|
||||
<dl>
|
||||
<dt class=biblio>This document, in HTML:
|
||||
<dd><a href="http://srfi.schemers.org/srfi-1/srfi-1.html">
|
||||
<dd><a href="srfi-1.html">
|
||||
http://srfi.schemers.org/srfi-1/srfi-1.html</a>
|
||||
|
||||
<br><a class=draft
|
||||
href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html">
|
||||
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html (draft)</a>
|
||||
|
||||
<dt class=biblio>This document, in simple text format:
|
||||
<dd><a href="http://srfi.schemers.org/srfi-1/srfi-1.txt">
|
||||
http://srfi.schemers.org/srfi-1/srfi-1.txt</a>
|
||||
|
||||
<br><a class=draft
|
||||
href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt">
|
||||
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt (draft)</a>
|
||||
|
||||
<dt class=biblio>Source code for the reference implementation:
|
||||
<dd><a HREF="http://srfi.schemers.org/srfi-1/srfi-1-reference.scm">
|
||||
<dd><a HREF="srfi-1-reference.scm">
|
||||
http://srfi.schemers.org/srfi-1/srfi-1-reference.scm</a>
|
||||
|
||||
<br><a class=draft
|
||||
href="ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm">
|
||||
ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm (draft)</a>
|
||||
|
||||
<dt class=biblio>Archive of SRFI-1 discussion-list email:
|
||||
<dd><a href="http://srfi.schemers.org/srfi-1/mail-archive/maillist.html">
|
||||
<dd><a href="mail-archive/maillist.html">
|
||||
http://srfi.schemers.org/srfi-1/mail-archive/maillist.html</a>
|
||||
|
||||
<dt class=biblio>SRFI web site:
|
||||
|
|
|
@ -0,0 +1,350 @@
|
|||
;;; Complete interface spec for the SRFI-13 string-lib and -*- Scheme -*-
|
||||
;;; string-lib-internals libraries in the Scheme48 interface
|
||||
;;; and module language. The interfaces are fully typed, in
|
||||
;;; the Scheme48 type notation. The structure definitions also
|
||||
;;; provide a formal description of the external dependencies
|
||||
;;; of the source code.
|
||||
|
||||
;;; string-lib
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; string-map string-map!
|
||||
;;; string-fold string-unfold
|
||||
;;; string-fold-right string-unfold-right
|
||||
;;; string-tabulate string-for-each string-for-each-index
|
||||
;;; string-every string-any
|
||||
;;; string-hash string-hash-ci
|
||||
;;; string-compare string-compare-ci
|
||||
;;; string= string< string> string<= string>= string<>
|
||||
;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
|
||||
;;; string-downcase string-upcase string-titlecase
|
||||
;;; string-downcase! string-upcase! string-titlecase!
|
||||
;;; string-take string-take-right
|
||||
;;; string-drop string-drop-right
|
||||
;;; string-pad string-pad-right
|
||||
;;; string-trim string-trim-right string-trim-both
|
||||
;;; string-filter string-delete
|
||||
;;; string-index string-index-right
|
||||
;;; string-skip string-skip-right
|
||||
;;; string-count
|
||||
;;; string-prefix-length string-prefix-length-ci
|
||||
;;; string-suffix-length string-suffix-length-ci
|
||||
;;; string-prefix? string-prefix-ci?
|
||||
;;; string-suffix? string-suffix-ci?
|
||||
;;; string-contains string-contains-ci
|
||||
;;; string-fill! string-copy!
|
||||
;;; string-copy substring/shared
|
||||
;;; string-reverse string-reverse! reverse-list->string
|
||||
;;; string->list
|
||||
;;; string-concatenate string-concatenate/shared
|
||||
;;; string-concatenate-reverse string-concatenate-reverse/shared
|
||||
;;; string-append/shared
|
||||
;;; xsubstring string-xcopy!
|
||||
;;; string-null?
|
||||
;;; string-join
|
||||
;;; string-tokenize
|
||||
;;; string-replace
|
||||
;;;
|
||||
;;; string? make-string string string-length string-ref string-set!
|
||||
;;; string-append list->string
|
||||
;;;
|
||||
;;; make-kmp-restart-vector string-kmp-partial-search kmp-step
|
||||
;;; string-parse-start+end
|
||||
;;; string-parse-final-start+end
|
||||
;;; let-string-start+end
|
||||
;;; check-substring-spec
|
||||
;;; substring-spec-ok?
|
||||
|
||||
(define-interface string-lib-interface
|
||||
(export
|
||||
;; string-map proc s [start end] -> s
|
||||
(string-map (proc ((proc (:char) :char)
|
||||
:string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-map! proc s [start end] -> unspecific
|
||||
(string-map! (proc ((proc (:char) :values)
|
||||
:string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-fold kons knil s [start end] -> value
|
||||
;; string-fold-right kons knil s [start end] -> value
|
||||
((string-fold string-fold-right)
|
||||
(proc ((proc (:char :value) :value)
|
||||
:value :string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-unfold p f g seed [base make-final] -> string
|
||||
;; string-unfold-right p f g seed [base make-final] -> string
|
||||
((string-unfold string-unfold)
|
||||
(proc ((proc (:value) :boolean)
|
||||
(proc (:value) :char)
|
||||
(proc (:value) :value)
|
||||
:value
|
||||
&opt :string (proc (:value) :string))
|
||||
:string))
|
||||
|
||||
; Enough is enough.
|
||||
; ;; string-unfoldn p f g seed ... -> string
|
||||
; (string-unfoldn (proc ((procedure :values :boolean)
|
||||
; (procedure :values :char)
|
||||
; (procedure :values :values)
|
||||
; &rest :value)
|
||||
; :string))
|
||||
|
||||
;; string-tabulate proc len -> string
|
||||
(string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-for-each proc s [start end] -> unspecific
|
||||
;; string-for-each-index proc s [start end] -> unspecific
|
||||
((string-for-each string-for-each-index)
|
||||
(proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-every pred s [start end]
|
||||
;; string-any pred s [start end]
|
||||
(string-every
|
||||
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
|
||||
:boolean))
|
||||
(string-any
|
||||
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-hash s [bound start end]
|
||||
;; string-hash-ci s [bound start end]
|
||||
((string-hash string-hash-ci)
|
||||
(proc (:string &opt :exact-integer :exact-integer :exact-integer)
|
||||
:exact-integer))
|
||||
|
||||
;; string-compare string1 string2 lt-proc eq-proc gt-proc [start end]
|
||||
;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc [start end]
|
||||
((string-compare string-compare-ci)
|
||||
(proc (:string :string (proc (:exact-integer) :values)
|
||||
(proc (:exact-integer) :values)
|
||||
(proc (:exact-integer) :values)
|
||||
&opt :exact-integer :exact-integer)
|
||||
:values))
|
||||
|
||||
;; string< string1 string2 [start1 end1 start2 end2]
|
||||
((string= string< string> string<= string>= string<>
|
||||
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>)
|
||||
(proc (:string :string &opt :exact-integer :exact-integer
|
||||
:exact-integer :exact-integer)
|
||||
:boolean))
|
||||
|
||||
;; string-titlecase string [start end]
|
||||
;; string-upcase string [start end]
|
||||
;; string-downcase string [start end]
|
||||
;; string-titlecase! string [start end]
|
||||
;; string-upcase! string [start end]
|
||||
;; string-downcase! string [start end]
|
||||
((string-titlecase string-upcase string-downcase)
|
||||
(proc (:string &opt :exact-integer :exact-integer) :string))
|
||||
((string-titlecase! string-upcase! string-downcase!)
|
||||
(proc (:string &opt :exact-integer :exact-integer) :unspecific))
|
||||
|
||||
;; string-take string nchars
|
||||
;; string-drop string nchars
|
||||
;; string-take-right string nchars
|
||||
;; string-drop-right string nchars
|
||||
((string-take string-drop string-take-right string-drop-right)
|
||||
(proc (:string :exact-integer) :string))
|
||||
|
||||
;; string-pad string k [char start end]
|
||||
;; string-pad-right string k [char start end]
|
||||
((string-pad string-pad-right)
|
||||
(proc (:string :exact-integer &opt :char :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-trim string [char/char-set/pred start end]
|
||||
;; string-trim-right string [char/char-set/pred start end]
|
||||
;; string-trim-both string [char/char-set/pred start end]
|
||||
((string-trim string-trim-right string-trim-both)
|
||||
(proc (:string &opt :value :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-filter char/char-set/pred string [start end]
|
||||
;; string-delete char/char-set/pred string [start end]
|
||||
((string-filter string-delete)
|
||||
(proc (:value :string &opt :exact-integer :exact-integer) :string))
|
||||
|
||||
;; string-index string char/char-set/pred [start end]
|
||||
;; string-index-right string char/char-set/pred [end start]
|
||||
;; string-skip string char/char-set/pred [start end]
|
||||
;; string-skip-right string char/char-set/pred [end start]
|
||||
((string-index string-index-right string-skip string-skip-right)
|
||||
(proc (:string :value &opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-count string char/char-set/pred [start end]
|
||||
(string-count (proc (:string :value &opt :exact-integer :exact-integer)
|
||||
:exact-integer))
|
||||
|
||||
;; string-prefix-length string1 string2 [start1 end1 start2 end2]
|
||||
;; string-suffix-length string1 string2 [start1 end1 start2 end2]
|
||||
;; string-prefix-length-ci string1 string2 [start1 end1 start2 end2]
|
||||
;; string-suffix-length-ci string1 string2 [start1 end1 start2 end2]
|
||||
((string-prefix-length string-prefix-length-ci
|
||||
string-suffix-length string-suffix-length-ci)
|
||||
(proc (:string :string &opt
|
||||
:exact-integer :exact-integer :exact-integer :exact-integer)
|
||||
:exact-integer))
|
||||
|
||||
;; string-prefix? string1 string2 [start1 end1 start2 end2]
|
||||
;; string-suffix? string1 string2 [start1 end1 start2 end2]
|
||||
;; string-prefix-ci? string1 string2 [start1 end1 start2 end2]
|
||||
;; string-suffix-ci? string1 string2 [start1 end1 start2 end2]
|
||||
((string-prefix? string-prefix-ci?
|
||||
string-suffix? string-suffix-ci?)
|
||||
(proc (:string :string &opt
|
||||
:exact-integer :exact-integer :exact-integer :exact-integer)
|
||||
:boolean))
|
||||
|
||||
;; string-contains string pattern [s-start s-end p-start p-end]
|
||||
;; string-contains-ci string pattern [s-start s-end p-start p-end]
|
||||
((string-contains string-contains-ci)
|
||||
(proc (:string :string &opt :exact-integer :exact-integer
|
||||
:exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-fill! string char [start end]
|
||||
(string-fill! (proc (:string :char &opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-copy! to tstart from [fstart fend]
|
||||
(string-copy! (proc (:string :exact-integer :string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-copy s [start end] -> string
|
||||
;; substring/shared s start [end] -> string
|
||||
(string-copy (proc (:string &opt :exact-integer :exact-integer) :string))
|
||||
(substring/shared (proc (:string :exact-integer &opt :exact-integer) :string))
|
||||
|
||||
;; string-reverse s [start end]
|
||||
;; string-reverse! s [start end]
|
||||
(string-reverse (proc (:string &opt :exact-integer :exact-integer) :string))
|
||||
(string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific))
|
||||
|
||||
;; reverse-list->string char-list
|
||||
;; string->list s [start end]
|
||||
;; string-concatenate string-list
|
||||
;; string-concatenate/shared string-list
|
||||
;; string-append/shared s ...
|
||||
(reverse-list->string (proc (:value) :string))
|
||||
(string->list (proc (:string &opt :exact-integer :exact-integer) :value))
|
||||
((string-concatenate string-concatenate/shared) (proc (:value) :string))
|
||||
(string-append/shared (proc (&rest :string) :string))
|
||||
|
||||
;; string-concatenate-reverse string-list [final-string end]
|
||||
;; string-concatenate-reverse/shared string-list [final-string end]
|
||||
((string-concatenate-reverse string-concatenate-reverse/shared)
|
||||
(proc (:value &opt :string :exact-integer) :string))
|
||||
|
||||
;; xsubstring s from [to start end]
|
||||
;; string-xcopy! target tstart s from [to start end]
|
||||
(xsubstring (proc (:string :exact-integer &opt
|
||||
:exact-integer :exact-integer :exact-integer)
|
||||
:string))
|
||||
(string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt
|
||||
:exact-integer :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-null? s
|
||||
(string-null? (proc (:string) :boolean))
|
||||
|
||||
;; string-join string-list [delim grammar]
|
||||
(string-join (proc (:value &opt :string :symbol) :string))
|
||||
|
||||
;; string-tokenize string [token-chars start end]
|
||||
(string-tokenize (proc (:string &opt :value :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-replace s1 s2 start1 end1 [start2 end2]
|
||||
(string-replace (proc (:string :string :exact-integer :exact-integer
|
||||
&opt :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; Here are the R4RS/R5RS procs
|
||||
(string? (proc (:value) :boolean))
|
||||
(make-string (proc (:exact-integer &opt :char) :string))
|
||||
(string (proc (&rest :char) :string))
|
||||
(string-length (proc (:string) :exact-integer))
|
||||
(string-ref (proc (:string :exact-integer) :char))
|
||||
(string-set! (proc (:string :exact-integer :char) :unspecific))
|
||||
(string-append (proc (&rest :string) :string))
|
||||
(list->string (proc (:value) :string))
|
||||
|
||||
;; These are the R4RS types for STRING-COPY, STRING-FILL!, and
|
||||
;; STRING->LIST. The string-lib types are different -- extended.
|
||||
;(string-copy (proc (:string) :string))
|
||||
;(string-fill! (proc (:string :char) :unspecific))
|
||||
;(string->list (proc (:string) :value))
|
||||
|
||||
))
|
||||
|
||||
|
||||
;;; make-kmp-restart-vector
|
||||
;;; string-kmp-partial-search
|
||||
;;; kmp-step
|
||||
;;; string-parse-start+end
|
||||
;;; string-parse-final-start+end
|
||||
;;; let-string-start+end
|
||||
;;; check-substring-spec
|
||||
;;; substring-spec-ok?
|
||||
|
||||
(define-interface string-lib-internals-interface
|
||||
(export
|
||||
(let-string-start+end :syntax)
|
||||
(string-parse-start+end (proc ((procedure :values :values) :string :value)
|
||||
(some-values :exact-integer :exact-integer :value)))
|
||||
(string-parse-final-start+end (proc ((procedure :values :values) :string :value)
|
||||
(some-values :exact-integer :exact-integer)))
|
||||
(check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
(substring-spec-ok? (proc ((procedure :values :values) :string :exact-integer :exact-integer)
|
||||
:boolean))
|
||||
|
||||
;; string-kmp-partial-search pat rv s i [c= p-start s-start s-end] -> integer
|
||||
(string-kmp-partial-search (proc (:string :vector :string :exact-integer
|
||||
&opt (proc (:char :char) :boolean)
|
||||
:exact-integer :exact-integer :exact-integer)
|
||||
:exact-integer))
|
||||
|
||||
;; make-kmp-restart-vector s [c= start end] -> vector
|
||||
(make-kmp-restart-vector (proc (:string &opt (proc (:char :char) :boolean)
|
||||
:exact-integer :exact-integer)
|
||||
:vector))
|
||||
|
||||
;; kmp-step pat rv c i c= p-start -> integer
|
||||
(kmp-step (proc (:string :vector :char :exact-integer
|
||||
(proc (:char :char) :boolean)
|
||||
:exact-integer)
|
||||
:exact-integer))
|
||||
))
|
||||
|
||||
|
||||
(define-structures ((string-lib string-lib-interface)
|
||||
(string-lib-internals string-lib-internals-interface))
|
||||
(access scheme) ; Get at R5RS SUBSTRING
|
||||
(open receiving ; RECEIVE
|
||||
char-set-lib ; Various
|
||||
bitwise ; BITWISE-AND for hashing
|
||||
error-package ; ERROR
|
||||
let-opt ; LET-OPTIONALS* :OPTIONAL
|
||||
scheme)
|
||||
|
||||
;; A few cheesy S48/scsh definitions for string-lib dependencies:
|
||||
(begin (define (check-arg pred val caller)
|
||||
(let lp ((val val))
|
||||
(if (pred val) val (lp (error "Bad argument" val pred caller)))))
|
||||
|
||||
;; These two internal procedures are correctly defined for ASCII or
|
||||
;; Latin-1. They are *not* correct for Unicode.
|
||||
(define (char-cased? c) (char-set-contains? char-set:letter c))
|
||||
(define (char-titlecase c) (char-upcase c)))
|
||||
|
||||
(files string-lib))
|
|
@ -52,7 +52,7 @@
|
|||
;;; line two. Return these as a list of strings.
|
||||
|
||||
(define read-files-secondary-args
|
||||
(let ((non-newline (char-set-invert (char-set #\newline))))
|
||||
(let ((non-newline (char-set-complement! (char-set #\newline))))
|
||||
(lambda (fname)
|
||||
(call-with-input-file fname
|
||||
(lambda (port)
|
||||
|
|
|
@ -198,7 +198,7 @@
|
|||
define-record-types
|
||||
defrec-package
|
||||
receiving
|
||||
char-set-package
|
||||
char-set-lib
|
||||
error-package
|
||||
ascii
|
||||
primitives ; JMG add-finalizer!
|
||||
|
@ -237,7 +237,7 @@
|
|||
(open re-internals
|
||||
conditionals
|
||||
re-level-0
|
||||
char-set-package
|
||||
char-set-lib
|
||||
scsh-utilities ; fold
|
||||
error-package
|
||||
ascii
|
||||
|
@ -253,7 +253,7 @@
|
|||
|
||||
(define-structure rx-syntax rx-syntax-interface
|
||||
(open re-level-0
|
||||
char-set-package
|
||||
char-set-lib
|
||||
rx-lib
|
||||
scheme)
|
||||
(for-syntax (open sre-internal-syntax-tools scheme))
|
||||
|
|
|
@ -74,8 +74,8 @@
|
|||
|
||||
|
||||
;;; Two useful standard char sets
|
||||
(define nonl-chars (char-set-invert (char-set #\newline)))
|
||||
(define word-chars (char-set-union (char-set #\_) char-set:alphanumeric))
|
||||
(define nonl-chars (char-set-complement (char-set #\newline)))
|
||||
(define word-chars (char-set-union (char-set #\_) char-set:letter+digit))
|
||||
|
||||
;;; Little utility that should be moved to scsh's utilities.scm
|
||||
(define (partition pred lis)
|
||||
|
@ -232,8 +232,8 @@
|
|||
(map parse-char-class (cdr sre))
|
||||
r))
|
||||
(cs (if (char-set? cs)
|
||||
(char-set-invert cs)
|
||||
`(,(r 'char-set-invert) ,cs))))
|
||||
(char-set-complement cs)
|
||||
`(,(r 'char-set-complement) ,cs))))
|
||||
(if cset? cs (make-re-char-set cs))))
|
||||
|
||||
((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
|
||||
|
@ -281,15 +281,15 @@
|
|||
((nonl) nonl-chars)
|
||||
((lower-case lower) char-set:lower-case)
|
||||
((upper-case upper) char-set:upper-case)
|
||||
((alphabetic alpha) char-set:alphabetic)
|
||||
((numeric digit num) char-set:numeric)
|
||||
((alphanumeric alnum alphanum) char-set:alphanumeric)
|
||||
((alphabetic alpha) char-set:letter)
|
||||
((numeric digit num) char-set:digit)
|
||||
((alphanumeric alnum alphanum) char-set:letter+digit)
|
||||
((punctuation punct) char-set:punctuation)
|
||||
((graphic graph) char-set:graphic)
|
||||
((blank) char-set:blank)
|
||||
((whitespace space white) char-set:whitespace)
|
||||
((printing print) char-set:printing)
|
||||
((control cntrl) char-set:control)
|
||||
((control cntrl) char-set:iso-control)
|
||||
((hex-digit xdigit hex) char-set:hex-digit)
|
||||
((ascii) char-set:ascii)
|
||||
(else (error "Illegal regular expression" sre)))))
|
||||
|
@ -346,10 +346,9 @@
|
|||
(if (< i 0)
|
||||
(if cs? cset (uncase-char-set cset)) ; Case fold if necessary.
|
||||
(lp (- i 2)
|
||||
(char-set-union!
|
||||
cset
|
||||
(ascii-range->char-set (char->ascii (string-ref specs (- i 1)))
|
||||
(+ 1 (char->ascii (string-ref specs i)))))))))))
|
||||
(ucs-range->char-set! (char->ascii (string-ref specs (- i 1)))
|
||||
(+ 1 (char->ascii (string-ref specs i)))
|
||||
#f cset)))))))
|
||||
|
||||
;;; (regexp->scheme re r)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
@ -451,17 +450,17 @@
|
|||
(else #f)))
|
||||
(if one
|
||||
(switch char-set= cs
|
||||
((char-set:alphanumeric) alphanum)
|
||||
((char-set:letter+digit) alphanum)
|
||||
((char-set:graphic) graph)
|
||||
((char-set:hex-digit) hex)
|
||||
(else #f))
|
||||
(and (char-set= cs char-set:alphabetic) alpha)))
|
||||
(and (char-set= cs char-set:letter) alpha)))
|
||||
(and (char-set= cs char-set:lower-case) lower)) ; a, not A
|
||||
|
||||
(if biga
|
||||
(and (not space) (char-set= cs char-set:upper-case) upper)
|
||||
(if one
|
||||
(and (not space) (char-set= cs char-set:numeric) num)
|
||||
(and (not space) (char-set= cs char-set:digit) num)
|
||||
(if space
|
||||
(switch char-set= cs
|
||||
((char-set:whitespace) white)
|
||||
|
@ -469,7 +468,7 @@
|
|||
(else #f))
|
||||
(switch char-set= cs
|
||||
((char-set:punctuation) punct)
|
||||
((char-set:control) ctl)
|
||||
((char-set:iso-control) ctl)
|
||||
(else #f))))))))
|
||||
|
||||
|
||||
|
@ -478,21 +477,21 @@
|
|||
(try-classify-char-set cs
|
||||
'char-set:full 'nonl-chars
|
||||
'char-set:lower-case 'char-set:upper-case
|
||||
'char-set:alphabetic 'char-set:numeric
|
||||
'char-set:alphanumeric 'char-set:punctuation
|
||||
'char-set:letter 'char-set:digit
|
||||
'char-set:letter+digit 'char-set:punctuation
|
||||
'char-set:graphic 'char-set:whitespace
|
||||
'char-set:printing 'char-set:control
|
||||
'char-set:printing 'char-set:iso-control
|
||||
'char-set:hex-digit 'char-set:blank
|
||||
'char-set:ascii))))
|
||||
(? ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code.
|
||||
((char-set-empty? cs) (r 'char-set:empty))
|
||||
((try cs) => r)
|
||||
((try (char-set-invert cs)) =>
|
||||
(lambda (name) `(,(r 'char-set-invert) ,name)))
|
||||
((try (char-set-complement cs)) =>
|
||||
(lambda (name) `(,(r 'char-set-complement) ,name)))
|
||||
|
||||
(else
|
||||
(receive (loose+ ranges+) (char-set->in-pair cs)
|
||||
(receive (loose- ranges-) (char-set->in-pair (char-set-invert cs))
|
||||
(receive (loose- ranges-) (char-set->in-pair (char-set-complement cs))
|
||||
(let ((makeit (r 'spec->char-set)))
|
||||
(if (< (+ (length loose-) (* 12 (length ranges-)))
|
||||
(+ (length loose+) (* 12 (length ranges+))))
|
||||
|
@ -517,9 +516,9 @@
|
|||
'ascii)))
|
||||
(nchars (char-set-size cs)))
|
||||
(? ((zero? nchars) `(,(r '|)))
|
||||
((= 1 nchars) (apply string (char-set-members cs)))
|
||||
((= 1 nchars) (apply string (char-set->list cs)))
|
||||
((try cs) => r)
|
||||
((try (char-set-invert cs)) =>
|
||||
((try (char-set-complement cs)) =>
|
||||
(lambda (name) `(,(r '~) ,name)))
|
||||
(else (receive (cs rp comp?) (char-set->in-sexp-spec cs)
|
||||
(let ((args (append (? ((string=? cs "") '())
|
||||
|
@ -619,7 +618,7 @@
|
|||
`(,(car r) ,(cdr r) . ,lis))
|
||||
'() ranges)))))))
|
||||
(receive (cs+ rp+) (->sexp-pair cset)
|
||||
(receive (cs- rp-) (->sexp-pair (char-set-invert cset))
|
||||
(receive (cs- rp-) (->sexp-pair (char-set-complement cset))
|
||||
(if (< (+ (string-length cs-) (string-length rp-))
|
||||
(+ (string-length cs+) (string-length rp+)))
|
||||
(values cs- rp- #t)
|
||||
|
|
|
@ -375,11 +375,11 @@
|
|||
(? ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set
|
||||
|
||||
((= 1 nchars) ; Singleton set
|
||||
(translate-string (string (car (char-set-members cset)))))
|
||||
(translate-string (string (car (char-set->list cset)))))
|
||||
|
||||
;; General case. Try both [...] and [^...].
|
||||
(else (let ((s- (->bracket-string cset #t))
|
||||
(s+ (->bracket-string (char-set-invert cset) #f)))
|
||||
(s+ (->bracket-string (char-set-complement cset) #f)))
|
||||
(values (if (< (string-length s-) (string-length s+))
|
||||
s- s+)
|
||||
1 0 '#())))))))
|
||||
|
|
|
@ -162,7 +162,7 @@
|
|||
(string->char-set (re-string:chars elt))))
|
||||
res))))
|
||||
(if (= 1 (char-set-size cset))
|
||||
(make-re-string (apply string (char-set-members cset)))
|
||||
(make-re-string (apply string (char-set->list cset)))
|
||||
(make-re-char-set cset)))
|
||||
|
||||
(if (pair? res)
|
||||
|
@ -387,7 +387,7 @@
|
|||
(char-set-full? cs)))))
|
||||
|
||||
(define re-nonl
|
||||
(make-re-char-set/posix (char-set-invert (char-set #\newline))
|
||||
(make-re-char-set/posix (char-set-complement (char-set #\newline))
|
||||
"[^\n]"
|
||||
'#()))
|
||||
|
||||
|
@ -414,7 +414,7 @@
|
|||
|
||||
|
||||
(define re-word
|
||||
(let ((wcs (char-set-union char-set:alphanumeric ; Word chars
|
||||
(let ((wcs (char-set-union char-set:letter+digit ; Word chars
|
||||
(char-set #\_))))
|
||||
(make-re-seq (list re-bow
|
||||
(make-re-repeat 1 #f (make-re-char-set wcs))
|
||||
|
|
|
@ -36,5 +36,5 @@
|
|||
ranges))))
|
||||
(if in?
|
||||
(doit loose ranges)
|
||||
(char-set-invert! (doit loose ranges)))))
|
||||
(char-set-complement! (doit loose ranges)))))
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
(values (let ((cs (re-char-set:cset re)))
|
||||
(if (and (char-set? cs)
|
||||
(= 1 (char-set-size cs)))
|
||||
(make-re-string (string (car (char-set-members cs))))
|
||||
(make-re-string (string (car (char-set->list cs))))
|
||||
re))
|
||||
0))
|
||||
|
||||
|
@ -243,7 +243,7 @@
|
|||
(tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))
|
||||
(tail (? ((zero? numchars) tail) ; Drop empty char set.
|
||||
((= 1 numchars) ; {c} => "c"
|
||||
(cons (make-re-string (string (car (char-set-members cset))))
|
||||
(cons (make-re-string (string (car (char-set->list cset))))
|
||||
tail))
|
||||
(else (cons (make-re-char-set cset) tail)))))
|
||||
tail))
|
||||
|
|
|
@ -128,7 +128,7 @@
|
|||
((#\]) (if (= i i0)
|
||||
(lp i1 (char-set-adjoin! cset #\]))
|
||||
(let ((cset (if negate?
|
||||
(char-set-invert! cset)
|
||||
(char-set-complement! cset)
|
||||
cset)))
|
||||
(values (make-re-char-set cset) i1))))
|
||||
|
||||
|
|
Loading…
Reference in New Issue