;;; SRFI 13 string library reference implementation -*- Scheme -*- ;;; Olin Shivers 7/2000 ;;; ;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. ;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved. ;;; The details of the copyrights appear at the end of the file. Short ;;; summary: BSD-style open source. ;;; Exports: ;;; string-map string-map! ;;; string-fold string-unfold ;;; string-fold-right string-unfold-right ;;; string-tabulate string-for-each string-for-each-index ;;; string-every string-any ;;; string-hash string-hash-ci ;;; string-compare string-compare-ci ;;; string= string< string> string<= string>= string<> ;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> ;;; string-downcase string-upcase string-titlecase ;;; string-downcase! string-upcase! string-titlecase! ;;; string-take string-take-right ;;; string-drop string-drop-right ;;; string-pad string-pad-right ;;; string-trim string-trim-right string-trim-both ;;; string-filter string-delete ;;; string-index string-index-right ;;; string-skip string-skip-right ;;; string-count ;;; string-prefix-length string-prefix-length-ci ;;; string-suffix-length string-suffix-length-ci ;;; string-prefix? string-prefix-ci? ;;; string-suffix? string-suffix-ci? ;;; string-contains string-contains-ci ;;; string-copy! substring/shared ;;; string-reverse string-reverse! reverse-list->string ;;; string-concatenate string-concatenate/shared string-concatenate-reverse ;;; string-append/shared ;;; xsubstring string-xcopy! ;;; string-null? ;;; string-join ;;; string-tokenize ;;; string-replace ;;; ;;; R5RS extended: ;;; string->list string-copy string-fill! ;;; ;;; R5RS re-exports: ;;; string? make-string string-length string-ref string-set! ;;; ;;; R5RS re-exports (also defined here but commented-out): ;;; string string-append list->string ;;; ;;; Low-level routines: ;;; make-kmp-restart-vector string-kmp-partial-search kmp-step ;;; string-parse-start+end ;;; string-parse-final-start+end ;;; let-string-start+end ;;; check-substring-spec ;;; substring-spec-ok? ;;; Imports ;;; This is a fairly large library. While it was written for portability, you ;;; must be aware of its dependencies in order to run it in a given scheme ;;; implementation. Here is a complete list of the dependencies it has and the ;;; assumptions it makes beyond stock R5RS Scheme: ;;; ;;; This code has the following non-R5RS dependencies: ;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro; ;;; ;;; - Various imports from the char-set library for the routines that can ;;; take char-set arguments; ;;; ;;; - An n-ary ERROR procedure; ;;; ;;; - BITWISE-AND for the hash functions; ;;; ;;; - A simple CHECK-ARG procedure for checking parameter values; it is ;;; (lambda (pred val proc) ;;; (if (pred val) val (error "Bad arg" val pred proc))) ;;; ;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting & ;;; type-checking optional parameters from a rest argument; ;;; ;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE & ;;; STRING-TITLECASE! procedures. The former returns true iff a character is ;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z. ;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII & ;;; Latin-1, it is the same as CHAR-UPCASE. ;;; ;;; The code depends upon a small set of core string primitives from R5RS: ;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING ;;; (Actually, SUBSTRING is not a primitive, but we assume that an ;;; implementation's native version is probably faster than one we could ;;; define, so we import it from R5RS.) ;;; ;;; The code depends upon a small set of R5RS character primitives: ;;; char? char=? char-ci=? charinteger (for the hash functions) ;;; ;;; We assume the following: ;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE ;;; - CHAR-CI=? is equivalent to ;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1)) ;;; (char-downcase (char-upcase c2)))) ;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive ;;; and consistent with Unicode's 1-1 char-mapping spec. ;;; These things are typically true, but if not, you would need to modify ;;; the case-mapping and case-insensitive routines. ;;; Enough introductory blather. On to the source code. (But see the end of ;;; the file for further notes on porting & performance tuning.) ; Start S48 additions (define (check-arg pred val caller) (if (not (pred val)) (error val caller)) val) (define-syntax :optional (syntax-rules () ((:optional rest default-exp) (let ((maybe-arg rest)) (if (pair? maybe-arg) (if (null? (cdr maybe-arg)) (car maybe-arg) (error "too many optional arguments" maybe-arg)) default-exp))) ((:optional rest default-exp arg-test) (let ((maybe-arg rest)) (if (pair? maybe-arg) (if (null? (cdr maybe-arg)) (let ((val (car maybe-arg))) (if (arg-test val) val (error "Optional argument failed test" 'arg-test val))) (error "too many optional arguments" maybe-arg)) default-exp))))) (define-syntax let-optionals* (syntax-rules () ((let-optionals* arg (opt-clause ...) body ...) (let ((rest arg)) (%let-optionals* rest (opt-clause ...) body ...))))) (define-syntax %let-optionals* (syntax-rules () ((%let-optionals* arg (((var ...) xparser) opt-clause ...) body ...) (call-with-values (lambda () (xparser arg)) (lambda (rest var ...) (%let-optionals* rest (opt-clause ...) body ...)))) ((%let-optionals* arg ((var default) opt-clause ...) body ...) (call-with-values (lambda () (if (null? arg) (values default '()) (values (car arg) (cdr arg)))) (lambda (var rest) (%let-optionals* rest (opt-clause ...) body ...)))) ((%let-optionals* arg ((var default test) opt-clause ...) body ...) (call-with-values (lambda () (if (null? arg) (values default '()) (let ((var (car arg))) (if test (values var (cdr arg)) (error "arg failed LET-OPT test" var))))) (lambda (var rest) (%let-optionals* rest (opt-clause ...) body ...)))) ((%let-optionals* arg ((var default test supplied?) opt-clause ...) body ...) (call-with-values (lambda () (if (null? arg) (values default #f '()) (let ((var (car arg))) (if test (values var #t (cdr arg)) (error "arg failed LET-OPT test" var))))) (lambda (var supplied? rest) (%let-optionals* rest (opt-clause ...) body ...)))) ((%let-optionals* arg (rest) body ...) (let ((rest arg)) body ...)) ((%let-optionals* arg () body ...) (if (null? arg) (begin body ...) (error "Too many arguments in let-opt" arg))))) (define (char-cased? ch) (or (and (char<=? #\a ch) (char<=? ch #\z)) (and (char<=? #\A ch) (char<=? ch #\Z)))) (define char-titlecase char-upcase) ; End S48 additions ;;; Support for START/END substring specs ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This macro parses optional start/end arguments from arg lists, defaulting ;;; them to 0/(string-length s), and checks them for correctness. (define-syntax let-string-start+end (syntax-rules () ((let-string-start+end (start end) proc s-exp args-exp body ...) (receive (start end) (string-parse-final-start+end proc s-exp args-exp) body ...)) ((let-string-start+end (start end rest) proc s-exp args-exp body ...) (receive (rest start end) (string-parse-start+end proc s-exp args-exp) body ...)))) ;;; This one parses out a *pair* of final start/end indices. ;;; Not exported; for internal use. (define-syntax let-string-start+end2 (syntax-rules () ((l-s-s+e2 (start1 end1 start2 end2) proc s1 s2 args body ...) (let ((procv proc)) ; Make sure PROC is only evaluated once. (let-string-start+end (start1 end1 rest) procv s1 args (let-string-start+end (start2 end2) procv s2 rest body ...)))))) ;;; Returns three values: rest start end (define (string-parse-start+end proc s args) (if (not (string? s)) (error "Non-string value" proc s)) (let ((slen (string-length s))) (if (pair? args) (let ((start (car args)) (args (cdr args))) (if (and (integer? start) (exact? start) (>= start 0)) (receive (end args) (if (pair? args) (let ((end (car args)) (args (cdr args))) (if (and (integer? end) (exact? end) (<= end slen)) (values end args) (error "Illegal substring END spec" proc end s))) (values slen args)) (if (<= start end) (values args start end) (error "Illegal substring START/END spec" proc start end s))) (error "Illegal substring START spec" proc start s))) (values '() 0 slen)))) (define (string-parse-final-start+end proc s args) (receive (rest start end) (string-parse-start+end proc s args) (if (pair? rest) (error "Extra arguments to procedure" proc rest) (values start end)))) (define (substring-spec-ok? s start end) (and (string? s) (integer? start) (exact? start) (integer? end) (exact? end) (<= 0 start) (<= start end) (<= end (string-length s)))) (define (check-substring-spec proc s start end) (if (not (substring-spec-ok? s start end)) (error "Illegal substring spec." proc s start end))) ;;; Defined by R5RS, so commented out here. ;(define (string . chars) ; (let* ((len (length chars)) ; (ans (make-string len))) ; (do ((i 0 (+ i 1)) ; (chars chars (cdr chars))) ; ((>= i len)) ; (string-set! ans i (car chars))) ; ans)) ; ;(define (string . chars) (string-unfold null? car cdr chars)) ;;; substring/shared S START [END] ;;; string-copy S [START END] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; All this goop is just arg parsing & checking surrounding a call to the ;;; actual primitive, %SUBSTRING/SHARED. (define (substring/shared s start . maybe-end) (check-arg string? s substring/shared) (let ((slen (string-length s))) (check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start))) start substring/shared) (%substring/shared s start (:optional maybe-end slen (lambda (end) (and (integer? end) (exact? end) (<= start end) (<= end slen))))))) ;;; Split out so that other routines in this library can avoid arg-parsing ;;; overhead for END parameter. (define (%substring/shared s start end) (if (and (zero? start) (= end (string-length s))) s (substring s start end))) (define (string-copy s . maybe-start+end) (let-string-start+end (start end) string-copy s maybe-start+end (substring s start end))) ;This library uses the R5RS SUBSTRING, but doesn't export it. ;Here is a definition, just for completeness. ;(define (substring s start end) ; (check-substring-spec substring s start end) ; (let* ((slen (- end start)) ; (ans (make-string slen))) ; (do ((i 0 (+ i 1)) ; (j start (+ j 1))) ; ((>= i slen) ans) ; (string-set! ans i (string-ref s j))))) ;;; Basic iterators and other higher-order abstractions ;;; (string-map proc s [start end]) ;;; (string-map! proc s [start end]) ;;; (string-fold kons knil s [start end]) ;;; (string-fold-right kons knil s [start end]) ;;; (string-unfold p f g seed [base make-final]) ;;; (string-unfold-right p f g seed [base make-final]) ;;; (string-for-each proc s [start end]) ;;; (string-for-each-index proc s [start end]) ;;; (string-every char-set/char/pred s [start end]) ;;; (string-any char-set/char/pred s [start end]) ;;; (string-tabulate proc len) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; You want compiler support for high-level transforms on fold and unfold ops. ;;; You'd at least like a lot of inlining for clients of these procedures. ;;; Don't hold your breath. (define (string-map proc s . maybe-start+end) (check-arg procedure? proc string-map) (let-string-start+end (start end) string-map s maybe-start+end (%string-map proc s start end))) (define (%string-map proc s start end) ; Internal utility (let* ((len (- end start)) (ans (make-string len))) (do ((i (- end 1) (- i 1)) (j (- len 1) (- j 1))) ((< j 0)) (string-set! ans j (proc (string-ref s i)))) ans)) (define (string-map! proc s . maybe-start+end) (check-arg procedure? proc string-map!) (let-string-start+end (start end) string-map! s maybe-start+end (%string-map! proc s start end))) (define (%string-map! proc s start end) (do ((i (- end 1) (- i 1))) ((< i start)) (string-set! s i (proc (string-ref s i))))) (define (string-fold kons knil s . maybe-start+end) (check-arg procedure? kons string-fold) (let-string-start+end (start end) string-fold s maybe-start+end (let lp ((v knil) (i start)) (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) v)))) (define (string-fold-right kons knil s . maybe-start+end) (check-arg procedure? kons string-fold-right) (let-string-start+end (start end) string-fold-right s maybe-start+end (let lp ((v knil) (i (- end 1))) (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) v)))) ;;; (string-unfold p f g seed [base make-final]) ;;; This is the fundamental constructor for strings. ;;; - G is used to generate a series of "seed" values from the initial seed: ;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... ;;; - P tells us when to stop -- when it returns true when applied to one ;;; of these seed values. ;;; - F maps each seed value to the corresponding character ;;; in the result string. These chars are assembled into the ;;; string in a left-to-right order. ;;; - BASE is the optional initial/leftmost portion of the constructed string; ;;; it defaults to the empty string "". ;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns ;;; true) to produce the final/rightmost portion of the constructed string. ;;; It defaults to (LAMBDA (X) ""). ;;; ;;; In other words, the following (simple, inefficient) definition holds: ;;; (define (string-unfold p f g seed base make-final) ;;; (string-append base ;;; (let recur ((seed seed)) ;;; (if (p seed) (make-final seed) ;;; (string-append (string (f seed)) ;;; (recur (g seed))))))) ;;; ;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to ;;; reverse a string, copy a string, convert a list to a string, read ;;; a port into a string, and so forth. Examples: ;;; (port->string port) = ;;; (string-unfold (compose eof-object? peek-char) ;;; read-char values port) ;;; ;;; (list->string lis) = (string-unfold null? car cdr lis) ;;; ;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) ;;; A problem with the following simple formulation is that it pushes one ;;; stack frame for every char in the result string -- an issue if you are ;;; using it to read a 100kchar string. So we don't use it -- but I include ;;; it to give a clear, straightforward description of what the function ;;; does. ;(define (string-unfold p f g seed base make-final) ; (let ((ans (let recur ((seed seed) (i (string-length base))) ; (if (p seed) ; (let* ((final (make-final seed)) ; (ans (make-string (+ i (string-length final))))) ; (string-copy! ans i final) ; ans) ; ; (let* ((c (f seed)) ; (s (recur (g seed) (+ i 1)))) ; (string-set! s i c) ; s))))) ; (string-copy! ans 0 base) ; ans)) ;;; The strategy is to allocate a series of chunks into which we stash the ;;; chars as we generate them. Chunk size goes up in powers of two starting ;;; with 40 and levelling out at 4k, i.e. ;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096... ;;; This should work pretty well for short strings, 1-line (80 char) strings, ;;; and longer ones. When done, we allocate an answer string and copy the ;;; chars over from the chunk buffers. (define (string-unfold p f g seed . base+make-final) (check-arg procedure? p string-unfold) (check-arg procedure? f string-unfold) (check-arg procedure? g string-unfold) (let-optionals* base+make-final ((base "" (string? base)) (make-final (lambda (x) "") (procedure? make-final))) (let lp ((chunks '()) ; Previously filled chunks (nchars 0) ; Number of chars in CHUNKS (chunk (make-string 40)) ; Current chunk into which we write (chunk-len 40) (i 0) ; Number of chars written into CHUNK (seed seed)) (let lp2 ((i i) (seed seed)) (if (not (p seed)) (let ((c (f seed)) (seed (g seed))) (if (< i chunk-len) (begin (string-set! chunk i c) (lp2 (+ i 1) seed)) (let* ((nchars2 (+ chunk-len nchars)) (chunk-len2 (min 4096 nchars2)) (new-chunk (make-string chunk-len2))) (string-set! new-chunk 0 c) (lp (cons chunk chunks) (+ nchars chunk-len) new-chunk chunk-len2 1 seed)))) ;; We're done. Make the answer string & install the bits. (let* ((final (make-final seed)) (flen (string-length final)) (base-len (string-length base)) (j (+ base-len nchars i)) (ans (make-string (+ j flen)))) (%string-copy! ans j final 0 flen) ; Install FINAL. (let ((j (- j i))) (%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I). (let lp ((j j) (chunks chunks)) ; Install CHUNKS. (if (pair? chunks) (let* ((chunk (car chunks)) (chunks (cdr chunks)) (chunk-len (string-length chunk)) (j (- j chunk-len))) (%string-copy! ans j chunk 0 chunk-len) (lp j chunks))))) (%string-copy! ans 0 base 0 base-len) ; Install BASE. ans)))))) (define (string-unfold-right p f g seed . base+make-final) (let-optionals* base+make-final ((base "" (string? base)) (make-final (lambda (x) "") (procedure? make-final))) (let lp ((chunks '()) ; Previously filled chunks (nchars 0) ; Number of chars in CHUNKS (chunk (make-string 40)) ; Current chunk into which we write (chunk-len 40) (i 40) ; Number of chars available in CHUNK (seed seed)) (let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right (if (not (p seed)) ; to left. (let ((c (f seed)) (seed (g seed))) (if (> i 0) (let ((i (- i 1))) (string-set! chunk i c) (lp2 i seed)) (let* ((nchars2 (+ chunk-len nchars)) (chunk-len2 (min 4096 nchars2)) (new-chunk (make-string chunk-len2)) (i (- chunk-len2 1))) (string-set! new-chunk i c) (lp (cons chunk chunks) (+ nchars chunk-len) new-chunk chunk-len2 i seed)))) ;; We're done. Make the answer string & install the bits. (let* ((final (make-final seed)) (flen (string-length final)) (base-len (string-length base)) (chunk-used (- chunk-len i)) (j (+ base-len nchars chunk-used)) (ans (make-string (+ j flen)))) (%string-copy! ans 0 final 0 flen) ; Install FINAL. (%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,). (let lp ((j (+ flen chunk-used)) ; Install CHUNKS. (chunks chunks)) (if (pair? chunks) (let* ((chunk (car chunks)) (chunks (cdr chunks)) (chunk-len (string-length chunk))) (%string-copy! ans j chunk 0 chunk-len) (lp (+ j chunk-len) chunks)) (%string-copy! ans j base 0 base-len))); Install BASE. ans)))))) (define (string-for-each proc s . maybe-start+end) (check-arg procedure? proc string-for-each) (let-string-start+end (start end) string-for-each s maybe-start+end (let lp ((i start)) (if (< i end) (begin (proc (string-ref s i)) (lp (+ i 1))))))) (define (string-for-each-index proc s . maybe-start+end) (check-arg procedure? proc string-for-each-index) (let-string-start+end (start end) string-for-each-index s maybe-start+end (let lp ((i start)) (if (< i end) (begin (proc i) (lp (+ i 1))))))) (define (string-every criterion s . maybe-start+end) (let-string-start+end (start end) string-every s maybe-start+end (cond ((char? criterion) (let lp ((i start)) (or (>= i end) (and (char=? criterion (string-ref s i)) (lp (+ i 1)))))) ((char-set? criterion) (let lp ((i start)) (or (>= i end) (and (char-set-contains? criterion (string-ref s i)) (lp (+ i 1)))))) ((procedure? criterion) ; Slightly funky loop so that (or (= start end) ; final (PRED S[END-1]) call (let lp ((i start)) ; is a tail call. (let ((c (string-ref s i)) (i1 (+ i 1))) (if (= i1 end) (criterion c) ; Tail call. (and (criterion c) (lp i1))))))) (else (error "Second param is neither char-set, char, or predicate procedure." string-every criterion))))) (define (string-any criterion s . maybe-start+end) (let-string-start+end (start end) string-any s maybe-start+end (cond ((char? criterion) (let lp ((i start)) (and (< i end) (or (char=? criterion (string-ref s i)) (lp (+ i 1)))))) ((char-set? criterion) (let lp ((i start)) (and (< i end) (or (char-set-contains? criterion (string-ref s i)) (lp (+ i 1)))))) ((procedure? criterion) ; Slightly funky loop so that (and (< start end) ; final (PRED S[END-1]) call (let lp ((i start)) ; is a tail call. (let ((c (string-ref s i)) (i1 (+ i 1))) (if (= i1 end) (criterion c) ; Tail call (or (criterion c) (lp i1))))))) (else (error "Second param is neither char-set, char, or predicate procedure." string-any criterion))))) (define (string-tabulate proc len) (check-arg procedure? proc string-tabulate) (check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val))) len string-tabulate) (let ((s (make-string len))) (do ((i (- len 1) (- i 1))) ((< i 0)) (string-set! s i (proc i))) s)) ;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2] ;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Find the length of the common prefix/suffix. ;;; It is not required that the two substrings passed be of equal length. ;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. ;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons, ;;; so should be as tense as possible. (define (%string-prefix-length s1 start1 end1 s2 start2 end2) (let* ((delta (min (- end1 start1) (- end2 start2))) (end1 (+ start1 delta))) (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path delta (let lp ((i start1) (j start2)) ; Regular path (if (or (>= i end1) (not (char=? (string-ref s1 i) (string-ref s2 j)))) (- i start1) (lp (+ i 1) (+ j 1))))))) (define (%string-suffix-length s1 start1 end1 s2 start2 end2) (let* ((delta (min (- end1 start1) (- end2 start2))) (start1 (- end1 delta))) (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path delta (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path (if (or (< i start1) (not (char=? (string-ref s1 i) (string-ref s2 j)))) (- (- end1 i) 1) (lp (- i 1) (- j 1))))))) (define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2) (let* ((delta (min (- end1 start1) (- end2 start2))) (end1 (+ start1 delta))) (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path delta (let lp ((i start1) (j start2)) ; Regular path (if (or (>= i end1) (not (char-ci=? (string-ref s1 i) (string-ref s2 j)))) (- i start1) (lp (+ i 1) (+ j 1))))))) (define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2) (let* ((delta (min (- end1 start1) (- end2 start2))) (start1 (- end1 delta))) (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path delta (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path (if (or (< i start1) (not (char-ci=? (string-ref s1 i) (string-ref s2 j)))) (- (- end1 i) 1) (lp (- i 1) (- j 1))))))) (define (string-prefix-length s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-prefix-length s1 s2 maybe-starts+ends (%string-prefix-length s1 start1 end1 s2 start2 end2))) (define (string-suffix-length s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-suffix-length s1 s2 maybe-starts+ends (%string-suffix-length s1 start1 end1 s2 start2 end2))) (define (string-prefix-length-ci s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-prefix-length-ci s1 s2 maybe-starts+ends (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) (define (string-suffix-length-ci s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-suffix-length-ci s1 s2 maybe-starts+ends (%string-suffix-length-ci s1 start1 end1 s2 start2 end2))) ;;; string-prefix? s1 s2 [start1 end1 start2 end2] ;;; string-suffix? s1 s2 [start1 end1 start2 end2] ;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2] ;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; These are all simple derivatives of the previous counting funs. (define (string-prefix? s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-prefix? s1 s2 maybe-starts+ends (%string-prefix? s1 start1 end1 s2 start2 end2))) (define (string-suffix? s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-suffix? s1 s2 maybe-starts+ends (%string-suffix? s1 start1 end1 s2 start2 end2))) (define (string-prefix-ci? s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-prefix-ci? s1 s2 maybe-starts+ends (%string-prefix-ci? s1 start1 end1 s2 start2 end2))) (define (string-suffix-ci? s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-suffix-ci? s1 s2 maybe-starts+ends (%string-suffix-ci? s1 start1 end1 s2 start2 end2))) ;;; Here are the internal routines that do the real work. (define (%string-prefix? s1 start1 end1 s2 start2 end2) (let ((len1 (- end1 start1))) (and (<= len1 (- end2 start2)) ; Quick check (= (%string-prefix-length s1 start1 end1 s2 start2 end2) len1)))) (define (%string-suffix? s1 start1 end1 s2 start2 end2) (let ((len1 (- end1 start1))) (and (<= len1 (- end2 start2)) ; Quick check (= len1 (%string-suffix-length s1 start1 end1 s2 start2 end2))))) (define (%string-prefix-ci? s1 start1 end1 s2 start2 end2) (let ((len1 (- end1 start1))) (and (<= len1 (- end2 start2)) ; Quick check (= len1 (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))))) (define (%string-suffix-ci? s1 start1 end1 s2 start2 end2) (let ((len1 (- end1 start1))) (and (<= len1 (- end2 start2)) ; Quick check (= len1 (%string-suffix-length-ci s1 start1 end1 s2 start2 end2))))) ;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2] ;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Primitive string-comparison functions. ;;; Continuation order is different from MIT Scheme. ;;; Continuations are applied to s1's mismatch index; ;;; in the case of equality, this is END1. (define (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>) (let ((size1 (- end1 start1)) (size2 (- end2 start2))) (let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2))) (if (= match size1) ((if (= match size2) proc= proc<) end1) ((if (= match size2) proc> (if (char)) (+ match start1)))))) (define (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>) (let ((size1 (- end1 start1)) (size2 (- end2 start2))) (let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) (if (= match size1) ((if (= match size2) proc= proc<) end1) ((if (= match size2) proc> (if (char-ci)) (+ start1 match)))))) (define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends) (check-arg procedure? proc< string-compare) (check-arg procedure? proc= string-compare) (check-arg procedure? proc> string-compare) (let-string-start+end2 (start1 end1 start2 end2) string-compare s1 s2 maybe-starts+ends (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>))) (define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends) (check-arg procedure? proc< string-compare-ci) (check-arg procedure? proc= string-compare-ci) (check-arg procedure? proc> string-compare-ci) (let-string-start+end2 (start1 end1 start2 end2) string-compare-ci s1 s2 maybe-starts+ends (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>))) ;;; string= string<> string-ci= string-ci<> ;;; string< string> string-ci< string-ci> ;;; string<= string>= string-ci<= string-ci>= ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Simple definitions in terms of the previous comparison funs. ;;; I sure hope the %STRING-COMPARE calls get integrated. (define (string= s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string= s1 s2 maybe-starts+ends (and (= (- end1 start1) (- end2 start2)) ; Quick filter (or (and (eq? s1 s2) (= start1 start2)) ; Fast path (%string-compare s1 start1 end1 s2 start2 end2 ; Real test (lambda (i) #f) values (lambda (i) #f)))))) (define (string<> s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string<> s1 s2 maybe-starts+ends (or (not (= (- end1 start1) (- end2 start2))) ; Fast path (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter (%string-compare s1 start1 end1 s2 start2 end2 ; Real test values (lambda (i) #f) values))))) (define (string< s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string< s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (< end1 end2) (%string-compare s1 start1 end1 s2 start2 end2 ; Real test values (lambda (i) #f) (lambda (i) #f))))) (define (string> s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string> s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (> end1 end2) (%string-compare s1 start1 end1 s2 start2 end2 ; Real test (lambda (i) #f) (lambda (i) #f) values)))) (define (string<= s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string<= s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (<= end1 end2) (%string-compare s1 start1 end1 s2 start2 end2 ; Real test values values (lambda (i) #f))))) (define (string>= s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string>= s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (>= end1 end2) (%string-compare s1 start1 end1 s2 start2 end2 ; Real test (lambda (i) #f) values values)))) (define (string-ci= s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-ci= s1 s2 maybe-starts+ends (and (= (- end1 start1) (- end2 start2)) ; Quick filter (or (and (eq? s1 s2) (= start1 start2)) ; Fast path (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test (lambda (i) #f) values (lambda (i) #f)))))) (define (string-ci<> s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-ci<> s1 s2 maybe-starts+ends (or (not (= (- end1 start1) (- end2 start2))) ; Fast path (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test values (lambda (i) #f) values))))) (define (string-ci< s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-ci< s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (< end1 end2) (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test values (lambda (i) #f) (lambda (i) #f))))) (define (string-ci> s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-ci> s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (> end1 end2) (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test (lambda (i) #f) (lambda (i) #f) values)))) (define (string-ci<= s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-ci<= s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (<= end1 end2) (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test values values (lambda (i) #f))))) (define (string-ci>= s1 s2 . maybe-starts+ends) (let-string-start+end2 (start1 end1 start2 end2) string-ci>= s1 s2 maybe-starts+ends (if (and (eq? s1 s2) (= start1 start2)) ; Fast path (>= end1 end2) (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test (lambda (i) #f) values values)))) ;;; Hash ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in ;;; to keep the intermediate values small. (We do the calculation with just ;;; enough bits to represent BOUND, masking off high bits at each step in ;;; calculation. If this screws up any important properties of the hash ;;; function I'd like to hear about it. -Olin) ;;; ;;; If you keep BOUND small enough, the intermediate calculations will ;;; always be fixnums. How small is dependent on the underlying Scheme system; ;;; we use a default BOUND of 2^22 = 4194304, which should hack it in ;;; Schemes that give you at least 29 signed bits for fixnums. The core ;;; calculation that you don't want to overflow is, worst case, ;;; (+ 65535 (* 37 (- bound 1))) ;;; where 65535 is the max character code. Choose the default BOUND to be the ;;; biggest power of two that won't cause this expression to fixnum overflow, ;;; and everything will be copacetic. (define (%string-hash s char->int bound start end) (let ((iref (lambda (s i) (char->int (string-ref s i)))) ;; Compute a 111...1 mask that will cover BOUND-1: (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? (if (>= i bound) (- i 1) (lp (+ i i)))))) (let lp ((i start) (ans 0)) (if (>= i end) (modulo ans bound) (lp (+ i 1) (bitwise-and mask (+ (* 37 ans) (iref s i)))))))) (define (string-hash s . maybe-bound+start+end) (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound) (exact? bound) (<= 0 bound))) rest) (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default. (let-string-start+end (start end) string-hash s rest (%string-hash s char->integer bound start end))))) (define (string-hash-ci s . maybe-bound+start+end) (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound) (exact? bound) (<= 0 bound))) rest) (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default. (let-string-start+end (start end) string-hash-ci s rest (%string-hash s (lambda (c) (char->integer (char-downcase c))) bound start end))))) ;;; Case hacking ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; string-upcase s [start end] ;;; string-upcase! s [start end] ;;; string-downcase s [start end] ;;; string-downcase! s [start end] ;;; ;;; string-titlecase s [start end] ;;; string-titlecase! s [start end] ;;; Capitalize every contiguous alpha sequence: capitalise ;;; first char, lowercase rest. (define (string-upcase s . maybe-start+end) (let-string-start+end (start end) string-upcase s maybe-start+end (%string-map char-upcase s start end))) (define (string-upcase! s . maybe-start+end) (let-string-start+end (start end) string-upcase! s maybe-start+end (%string-map! char-upcase s start end))) (define (string-downcase s . maybe-start+end) (let-string-start+end (start end) string-downcase s maybe-start+end (%string-map char-downcase s start end))) (define (string-downcase! s . maybe-start+end) (let-string-start+end (start end) string-downcase! s maybe-start+end (%string-map! char-downcase s start end))) (define (%string-titlecase! s start end) (let lp ((i start)) (cond ((string-index s char-cased? i end) => (lambda (i) (string-set! s i (char-titlecase (string-ref s i))) (let ((i1 (+ i 1))) (cond ((string-skip s char-cased? i1 end) => (lambda (j) (string-downcase! s i1 j) (lp (+ j 1)))) (else (string-downcase! s i1 end))))))))) (define (string-titlecase! s . maybe-start+end) (let-string-start+end (start end) string-titlecase! s maybe-start+end (%string-titlecase! s start end))) (define (string-titlecase s . maybe-start+end) (let-string-start+end (start end) string-titlecase! s maybe-start+end (let ((ans (substring s start end))) (%string-titlecase! ans 0 (- end start)) ans))) ;;; Cutting & pasting strings ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; string-take string nchars ;;; string-drop string nchars ;;; ;;; string-take-right string nchars ;;; string-drop-right string nchars ;;; ;;; string-pad string k [char start end] ;;; string-pad-right string k [char start end] ;;; ;;; string-trim string [char/char-set/pred start end] ;;; string-trim-right string [char/char-set/pred start end] ;;; string-trim-both string [char/char-set/pred start end] ;;; ;;; These trimmers invert the char-set meaning from MIT Scheme -- you ;;; say what you want to trim. (define (string-take s n) (check-arg string? s string-take) (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n (string-length s)))) n string-take) (%substring/shared s 0 n)) (define (string-take-right s n) (check-arg string? s string-take-right) (let ((len (string-length s))) (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) n string-take-right) (%substring/shared s (- len n) len))) (define (string-drop s n) (check-arg string? s string-drop) (let ((len (string-length s))) (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) n string-drop) (%substring/shared s n len))) (define (string-drop-right s n) (check-arg string? s string-drop-right) (let ((len (string-length s))) (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) n string-drop-right) (%substring/shared s 0 (- len n)))) (define (string-trim s . criterion+start+end) (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) (let-string-start+end (start end) string-trim s rest (cond ((string-skip s criterion start end) => (lambda (i) (%substring/shared s i end))) (else ""))))) (define (string-trim-right s . criterion+start+end) (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) (let-string-start+end (start end) string-trim-right s rest (cond ((string-skip-right s criterion start end) => (lambda (i) (%substring/shared s 0 (+ 1 i)))) (else ""))))) (define (string-trim-both s . criterion+start+end) (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) (let-string-start+end (start end) string-trim-both s rest (cond ((string-skip s criterion start end) => (lambda (i) (%substring/shared s i (+ 1 (string-skip-right s criterion i end))))) (else ""))))) (define (string-pad-right s n . char+start+end) (let-optionals* char+start+end ((char #\space (char? char)) rest) (let-string-start+end (start end) string-pad-right s rest (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n))) n string-pad-right) (let ((len (- end start))) (if (<= n len) (%substring/shared s start (+ start n)) (let ((ans (make-string n char))) (%string-copy! ans 0 s start end) ans)))))) (define (string-pad s n . char+start+end) (let-optionals* char+start+end ((char #\space (char? char)) rest) (let-string-start+end (start end) string-pad s rest (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n))) n string-pad) (let ((len (- end start))) (if (<= n len) (%substring/shared s (- end n) end) (let ((ans (make-string n char))) (%string-copy! ans (- n len) s start end) ans)))))) ;;; Filtering strings ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; string-delete char/char-set/pred string [start end] ;;; string-filter char/char-set/pred string [start end] ;;; ;;; If the criterion is a char or char-set, we scan the string twice with ;;; string-fold -- once to determine the length of the result string, ;;; and once to do the filtered copy. ;;; If the criterion is a predicate, we don't do this double-scan strategy, ;;; because the predicate might have side-effects or be very expensive to ;;; compute. So we preallocate a temp buffer pessimistically, and only do ;;; one scan over S. This is likely to be faster and more space-efficient ;;; than consing a list. (define (string-delete criterion s . maybe-start+end) (let-string-start+end (start end) string-delete s maybe-start+end (if (procedure? criterion) (let* ((slen (- end start)) (temp (make-string slen)) (ans-len (string-fold (lambda (c i) (if (criterion c) i (begin (string-set! temp i c) (+ i 1)))) 0 s start end))) (if (= ans-len slen) temp (substring temp 0 ans-len))) (let* ((cset (cond ((char-set? criterion) criterion) ((char? criterion) (char-set criterion)) (else (error "string-delete criterion not predicate, char or char-set" criterion)))) (len (string-fold (lambda (c i) (if (char-set-contains? cset c) i (+ i 1))) 0 s start end)) (ans (make-string len))) (string-fold (lambda (c i) (if (char-set-contains? cset c) i (begin (string-set! ans i c) (+ i 1)))) 0 s start end) ans)))) (define (string-filter criterion s . maybe-start+end) (let-string-start+end (start end) string-filter s maybe-start+end (if (procedure? criterion) (let* ((slen (- end start)) (temp (make-string slen)) (ans-len (string-fold (lambda (c i) (if (criterion c) (begin (string-set! temp i c) (+ i 1)) i)) 0 s start end))) (if (= ans-len slen) temp (substring temp 0 ans-len))) (let* ((cset (cond ((char-set? criterion) criterion) ((char? criterion) (char-set criterion)) (else (error "string-delete criterion not predicate, char or char-set" criterion)))) (len (string-fold (lambda (c i) (if (char-set-contains? cset c) (+ i 1) i)) 0 s start end)) (ans (make-string len))) (string-fold (lambda (c i) (if (char-set-contains? cset c) (begin (string-set! ans i c) (+ i 1)) i)) 0 s start end) ans)))) ;;; String search ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; string-index string char/char-set/pred [start end] ;;; string-index-right string char/char-set/pred [start end] ;;; string-skip string char/char-set/pred [start end] ;;; string-skip-right string char/char-set/pred [start end] ;;; string-count string char/char-set/pred [start end] ;;; There's a lot of replicated code here for efficiency. ;;; For example, the char/char-set/pred discrimination has ;;; been lifted above the inner loop of each proc. (define (string-index str criterion . maybe-start+end) (let-string-start+end (start end) string-index str maybe-start+end (cond ((char? criterion) (let lp ((i start)) (and (< i end) (if (char=? criterion (string-ref str i)) i (lp (+ i 1)))))) ((char-set? criterion) (let lp ((i start)) (and (< i end) (if (char-set-contains? criterion (string-ref str i)) i (lp (+ i 1)))))) ((procedure? criterion) (let lp ((i start)) (and (< i end) (if (criterion (string-ref str i)) i (lp (+ i 1)))))) (else (error "Second param is neither char-set, char, or predicate procedure." string-index criterion))))) (define (string-index-right str criterion . maybe-start+end) (let-string-start+end (start end) string-index-right str maybe-start+end (cond ((char? criterion) (let lp ((i (- end 1))) (and (>= i 0) (if (char=? criterion (string-ref str i)) i (lp (- i 1)))))) ((char-set? criterion) (let lp ((i (- end 1))) (and (>= i 0) (if (char-set-contains? criterion (string-ref str i)) i (lp (- i 1)))))) ((procedure? criterion) (let lp ((i (- end 1))) (and (>= i 0) (if (criterion (string-ref str i)) i (lp (- i 1)))))) (else (error "Second param is neither char-set, char, or predicate procedure." string-index-right criterion))))) (define (string-skip str criterion . maybe-start+end) (let-string-start+end (start end) string-skip str maybe-start+end (cond ((char? criterion) (let lp ((i start)) (and (< i end) (if (char=? criterion (string-ref str i)) (lp (+ i 1)) i)))) ((char-set? criterion) (let lp ((i start)) (and (< i end) (if (char-set-contains? criterion (string-ref str i)) (lp (+ i 1)) i)))) ((procedure? criterion) (let lp ((i start)) (and (< i end) (if (criterion (string-ref str i)) (lp (+ i 1)) i)))) (else (error "Second param is neither char-set, char, or predicate procedure." string-skip criterion))))) (define (string-skip-right str criterion . maybe-start+end) (let-string-start+end (start end) string-skip-right str maybe-start+end (cond ((char? criterion) (let lp ((i (- end 1))) (and (>= i 0) (if (char=? criterion (string-ref str i)) (lp (- i 1)) i)))) ((char-set? criterion) (let lp ((i (- end 1))) (and (>= i 0) (if (char-set-contains? criterion (string-ref str i)) (lp (- i 1)) i)))) ((procedure? criterion) (let lp ((i (- end 1))) (and (>= i 0) (if (criterion (string-ref str i)) (lp (- i 1)) i)))) (else (error "CRITERION param is neither char-set or char." string-skip-right criterion))))) (define (string-count s criterion . maybe-start+end) (let-string-start+end (start end) string-count s maybe-start+end (cond ((char? criterion) (do ((i start (+ i 1)) (count 0 (if (char=? criterion (string-ref s i)) (+ count 1) count))) ((>= i end) count))) ((char-set? criterion) (do ((i start (+ i 1)) (count 0 (if (char-set-contains? criterion (string-ref s i)) (+ count 1) count))) ((>= i end) count))) ((procedure? criterion) (do ((i start (+ i 1)) (count 0 (if (criterion (string-ref s i)) (+ count 1) count))) ((>= i end) count))) (else (error "CRITERION param is neither char-set or char." string-count criterion))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; string-fill! string char [start end] ;;; ;;; string-copy! to tstart from [fstart fend] ;;; Guaranteed to work, even if s1 eq s2. (define (string-fill! s char . maybe-start+end) (check-arg char? char string-fill!) (let-string-start+end (start end) string-fill! s maybe-start+end (do ((i (- end 1) (- i 1))) ((< i start)) (string-set! s i char)))) (define (string-copy! to tstart from . maybe-fstart+fend) (let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend (check-arg integer? tstart string-copy!) (check-substring-spec string-copy! to tstart (+ tstart (- fend fstart))) (%string-copy! to tstart from fstart fend))) ;;; Library-internal routine (define (%string-copy! to tstart from fstart fend) (if (> fstart tstart) (do ((i fstart (+ i 1)) (j tstart (+ j 1))) ((>= i fend)) (string-set! to j (string-ref from i))) (do ((i (- fend 1) (- i 1)) (j (+ -1 tstart (- fend fstart)) (- j 1))) ((< i fstart)) (string-set! to j (string-ref from i))))) ;;; Returns starting-position in STRING or #f if not true. ;;; This implementation is slow & simple. It is useful as a "spec" or for ;;; comparison testing with fancier implementations. ;;; See below for fast KMP version. (define (%string-contains string substring start1 end1 start2 end2 the-string=) (let* ((len (- end2 start2)) (i-bound (- end1 len))) (let lp ((i start1)) (and (<= i i-bound) (if (the-string= string substring i (+ i len) start2 end2) i (lp (+ i 1))))))) (define (string-contains text pattern . maybe-starts+ends) (let-string-start+end2 (t-start t-end p-start p-end) string-contains text pattern maybe-starts+ends (%string-contains text pattern t-start t-end p-start p-end string=))) (define (string-contains-ci text pattern . maybe-starts+ends) (let-string-start+end2 (t-start t-end p-start p-end) string-contains-ci text pattern maybe-starts+ends (%string-contains text pattern t-start t-end p-start p-end string-ci=))) ;;; Searching for an occurrence of a substring ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Broken, see http://srfi.schemers.org/srfi-13/post-mail-archive/msg00003.html ; (define (string-contains text pattern . maybe-starts+ends) ; (let-string-start+end2 (t-start t-end p-start p-end) ; string-contains text pattern maybe-starts+ends ; (%kmp-search pattern text char=? p-start p-end t-start t-end))) ; (define (string-contains-ci text pattern . maybe-starts+ends) ; (let-string-start+end2 (t-start t-end p-start p-end) ; string-contains-ci text pattern maybe-starts+ends ; (%kmp-search pattern text char-ci=? p-start p-end t-start t-end))) ;;; Knuth-Morris-Pratt string searching ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; See ;;; "Fast pattern matching in strings" ;;; SIAM J. Computing 6(2):323-350 1977 ;;; D. E. Knuth, J. H. Morris and V. R. Pratt ;;; also described in ;;; "Pattern matching in strings" ;;; Alfred V. Aho ;;; Formal Language Theory - Perspectives and Open Problems ;;; Ronald V. Brook (editor) ;;; This algorithm is O(m + n) where m and n are the ;;; lengths of the pattern and string respectively ;;; KMP search source[start,end) for PATTERN. Return starting index of ;;; leftmost match or #f. (define (%kmp-search pattern text c= p-start p-end t-start t-end) (let ((plen (- p-end p-start)) (rv (make-kmp-restart-vector pattern c= p-start p-end))) ;; The search loop. TJ & PJ are redundant state. (let lp ((ti t-start) (pi 0) (tj (- t-end t-start)) ; (- tlen ti) -- how many chars left. (pj plen)) ; (- plen pi) -- how many chars left. (if (= pi plen) (- ti plen) ; Win. (and (<= pj tj) ; Lose. (if (c= (string-ref text ti) ; Search. (string-ref pattern (+ p-start pi))) (lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance. (let ((pi (vector-ref rv pi))) ; Retreat. (if (= pi -1) (lp (+ ti 1) 0 (- tj 1) plen) ; Punt. (lp ti pi tj (- plen pi)))))))))) ;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Compute the KMP restart vector RV for string PATTERN. If ;;; we have matched chars 0..i-1 of PATTERN against a search string S, and ;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to ;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to ;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k]. ;;; ;;; In other words, if you have matched the first i chars of PATTERN, but ;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest ;;; prefix of PATTERN is that you have matched. ;;; ;;; - C= (default CHAR=?) is used to compare characters for equality. ;;; Pass in CHAR-CI=? for case-folded string search. ;;; ;;; - START & END restrict the pattern to the indicated substring; the ;;; returned vector will be of length END - START. The numbers stored ;;; in the vector will be values in the range [0,END-START) -- that is, ;;; they are valid indices into the restart vector; you have to add START ;;; to them to use them as indices into PATTERN. ;;; ;;; I've split this out as a separate function in case other constant-string ;;; searchers might want to use it. ;;; ;;; E.g.: ;;; a b d a b x ;;; #(-1 0 0 -1 1 2) (define (make-kmp-restart-vector pattern . maybe-c=+start+end) (let-optionals* maybe-c=+start+end ((c= char=? (procedure? c=)) ((start end) (lambda (args) (string-parse-start+end make-kmp-restart-vector pattern args)))) (let* ((rvlen (- end start)) (rv (make-vector rvlen -1))) (if (> rvlen 0) (let ((rvlen-1 (- rvlen 1)) (c0 (string-ref pattern start))) ;; Here's the main loop. We have set rv[0] ... rv[i]. ;; K = I + START -- it is the corresponding index into PATTERN. (let lp1 ((i 0) (j -1) (k start)) (if (< i rvlen-1) (let ((ck (string-ref pattern k))) ;; lp2 invariant: ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1] ;; or j = -1. (let lp2 ((j j)) (cond ((= j -1) (let ((i1 (+ i 1))) (vector-set! rv i1 (if (c= ck c0) -1 0)) (lp1 i1 0 (+ k 1)))) ;; pat[(k-j) .. k] matches pat[start..start+j]. ((c= ck (string-ref pattern (+ j start))) (let* ((i1 (+ 1 i)) (j1 (+ 1 j))) (vector-set! rv i1 j1) (lp1 i1 j1 (+ k 1)))) (else (lp2 (vector-ref rv j)))))))))) rv))) ;;; We've matched I chars from PAT. C is the next char from the search string. ;;; Return the new I after handling C. ;;; ;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START ;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched ;;; are ;;; PAT[PAT-START .. PAT-START + I]. ;;; ;;; It's *not* an oversight that there is no friendly error checking or ;;; defaulting of arguments. This is a low-level, inner-loop procedure ;;; that we want integrated/inlined into the point of call. (define (kmp-step pat rv c i c= p-start) (let lp ((i i)) (if (c= c (string-ref pat (+ i p-start))) ; Match => (+ i 1) ; Done. (let ((i (vector-ref rv i))) ; Back up in PAT. (if (= i -1) 0 ; Can't back up further. (lp i)))))) ; Keep trying for match. ;;; Zip through S[start,end), looking for a match of PAT. Assume we've ;;; already matched the first I chars of PAT when we commence at S[start]. ;;; - <0: If we find a match *ending* at index J, return -J. ;;; - >=0: If we get to the end of the S[start,end) span without finding ;;; a complete match, return the number of chars from PAT we'd matched ;;; when we ran off the end. ;;; ;;; This is useful for searching *across* buffers -- that is, when your ;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop ;;; for speed. (define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end) (check-arg vector? rv string-kmp-partial-search) (let-optionals* c=+p-start+s-start+s-end ((c= char=? (procedure? c=)) (p-start 0 (and (integer? p-start) (exact? p-start) (<= 0 p-start))) ((s-start s-end) (lambda (args) (string-parse-start+end string-kmp-partial-search s args)))) (let ((patlen (vector-length rv))) (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i) (< i patlen))) i string-kmp-partial-search) ;; Enough prelude. Here's the actual code. (let lp ((si s-start) ; An index into S. (vi i)) ; An index into RV. (cond ((= vi patlen) (- si)) ; Win. ((= si s-end) vi) ; Ran off the end. (else ; Match s[si] & loop. (let ((c (string-ref s si))) (lp (+ si 1) (let lp2 ((vi vi)) ; This is just KMP-STEP. (if (c= c (string-ref pat (+ vi p-start))) (+ vi 1) (let ((vi (vector-ref rv vi))) (if (= vi -1) 0 (lp2 vi))))))))))))) ;;; Misc ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; (string-null? s) ;;; (string-reverse s [start end]) ;;; (string-reverse! s [start end]) ;;; (reverse-list->string clist) ;;; (string->list s [start end]) (define (string-null? s) (zero? (string-length s))) (define (string-reverse s . maybe-start+end) (let-string-start+end (start end) string-reverse s maybe-start+end (let* ((len (- end start)) (ans (make-string len))) (do ((i start (+ i 1)) (j (- len 1) (- j 1))) ((< j 0)) (string-set! ans j (string-ref s i))) ans))) (define (string-reverse! s . maybe-start+end) (let-string-start+end (start end) string-reverse! s maybe-start+end (do ((i (- end 1) (- i 1)) (j start (+ j 1))) ((<= i j)) (let ((ci (string-ref s i))) (string-set! s i (string-ref s j)) (string-set! s j ci))))) (define (reverse-list->string clist) (let* ((len (length clist)) (s (make-string len))) (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) ((not (pair? clist))) (string-set! s i (car clist))) s)) ;(define (string->list s . maybe-start+end) ; (apply string-fold-right cons '() s maybe-start+end)) (define (string->list s . maybe-start+end) (let-string-start+end (start end) string->list s maybe-start+end (do ((i (- end 1) (- i 1)) (ans '() (cons (string-ref s i) ans))) ((< i start) ans)))) ;;; Defined by R5RS, so commented out here. ;(define (list->string lis) (string-unfold null? car cdr lis)) ;;; string-concatenate string-list -> string ;;; string-concatenate/shared string-list -> string ;;; string-append/shared s ... -> string ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; STRING-APPEND/SHARED has license to return a string that shares storage ;;; with any of its arguments. In particular, if there is only one non-empty ;;; string amongst its parameters, it is permitted to return that string as ;;; its result. STRING-APPEND, by contrast, always allocates new storage. ;;; ;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of ;;; strings, which they concatenate into a result string. STRING-CONCATENATE ;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may ;;; not) return a result that shares storage with any of its arguments. In ;;; particular, if it is applied to a singleton list, it is permitted to ;;; return the car of that list as its value. (define (string-append/shared . strings) (string-concatenate/shared strings)) (define (string-concatenate/shared strings) (let lp ((strings strings) (nchars 0) (first #f)) (cond ((pair? strings) ; Scan the args, add up total (let* ((string (car strings)) ; length, remember 1st (tail (cdr strings)) ; non-empty string. (slen (string-length string))) (if (zero? slen) (lp tail nchars first) (lp tail (+ nchars slen) (or first strings))))) ((zero? nchars) "") ;; Just one non-empty string! Return it. ((= nchars (string-length (car first))) (car first)) (else (let ((ans (make-string nchars))) (let lp ((strings first) (i 0)) (if (pair? strings) (let* ((s (car strings)) (slen (string-length s))) (%string-copy! ans i s 0 slen) (lp (cdr strings) (+ i slen))))) ans))))) ; Alas, Scheme 48's APPLY blows up if you have many, many arguments. ;(define (string-concatenate strings) (apply string-append strings)) ;;; Here it is written out. I avoid using REDUCE to add up string lengths ;;; to avoid non-R5RS dependencies. (define (string-concatenate strings) (let* ((total (do ((strings strings (cdr strings)) (i 0 (+ i (string-length (car strings))))) ((not (pair? strings)) i))) (ans (make-string total))) (let lp ((i 0) (strings strings)) (if (pair? strings) (let* ((s (car strings)) (slen (string-length s))) (%string-copy! ans i s 0 slen) (lp (+ i slen) (cdr strings))))) ans)) ;;; Defined by R5RS, so commented out here. ;(define (string-append . strings) (string-concatenate strings)) ;;; string-concatenate-reverse string-list [final-string end] -> string ;;; string-concatenate-reverse/shared string-list [final-string end] -> string ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Return ;;; (string-concatenate ;;; (reverse ;;; (cons (substring final-string 0 end) string-list))) (define (string-concatenate-reverse string-list . maybe-final+end) (let-optionals* maybe-final+end ((final "" (string? final)) (end (string-length final) (and (integer? end) (exact? end) (<= 0 end (string-length final))))) (let ((len (let lp ((sum 0) (lis string-list)) (if (pair? lis) (lp (+ sum (string-length (car lis))) (cdr lis)) sum)))) (%finish-string-concatenate-reverse len string-list final end)))) (define (string-concatenate-reverse/shared string-list . maybe-final+end) (let-optionals* maybe-final+end ((final "" (string? final)) (end (string-length final) (and (integer? end) (exact? end) (<= 0 end (string-length final))))) ;; Add up the lengths of all the strings in STRING-LIST; also get a ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length ;; string starts. (let lp ((len 0) (nzlist #f) (lis string-list)) (if (pair? lis) (let ((slen (string-length (car string-list)))) (lp (+ len slen) (if (or nzlist (zero? slen)) nzlist lis) (cdr lis))) (cond ((zero? len) (substring/shared final 0 end)) ;; LEN > 0, so NZLIST is non-empty. ((and (zero? end) (= len (string-length (car nzlist)))) (car nzlist)) (else (%finish-string-concatenate-reverse len nzlist final end))))))) (define (%finish-string-concatenate-reverse len string-list final end) (let ((ans (make-string (+ end len)))) (%string-copy! ans len final 0 end) (let lp ((i len) (lis string-list)) (if (pair? lis) (let* ((s (car lis)) (lis (cdr lis)) (slen (string-length s)) (i (- i slen))) (%string-copy! ans i s 0 slen) (lp i lis)))) ans)) ;;; string-replace s1 s2 start1 end1 [start2 end2] -> string ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Replace S1[START1,END1) with S2[START2,END2). (define (string-replace s1 s2 start1 end1 . maybe-start+end) (check-substring-spec string-replace s1 start1 end1) (let-string-start+end (start2 end2) string-replace s2 maybe-start+end (let* ((slen1 (string-length s1)) (sublen2 (- end2 start2)) (alen (+ (- slen1 (- end1 start1)) sublen2)) (ans (make-string alen))) (%string-copy! ans 0 s1 0 start1) (%string-copy! ans start1 s2 start2 end2) (%string-copy! ans (+ start1 sublen2) s1 end1 slen1) ans))) ;;; string-tokenize s [token-set start end] -> list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Break S up into a list of token strings, where a token is a maximal ;;; non-empty contiguous sequence of chars belonging to TOKEN-SET. ;;; (string-tokenize "hello, world") => ("hello," "world") (define (string-tokenize s . token-chars+start+end) (let-optionals* token-chars+start+end ((token-chars char-set:graphic (char-set? token-chars)) rest) (let-string-start+end (start end) string-tokenize s rest (let lp ((i end) (ans '())) (cond ((and (< start i) (string-index-right s token-chars start i)) => (lambda (tend-1) (let ((tend (+ 1 tend-1))) (cond ((string-skip-right s token-chars start tend-1) => (lambda (tstart-1) (lp tstart-1 (cons (substring s (+ 1 tstart-1) tend) ans)))) (else (cons (substring s start tend) ans)))))) (else ans)))))) ;;; xsubstring s from [to start end] -> string ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; S is a string; START and END are optional arguments that demarcate ;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole ;;; string). Replicate this substring up and down index space, in both the ;; positive and negative directions. For example, if S = "abcdefg", START=3, ;;; and END=6, then we have the conceptual bidirectionally-infinite string ;;; ... d e f d e f d e f d e f d e f d e f d e f ... ;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... ;;; XSUBSTRING returns the substring of this string beginning at index FROM, ;;; and ending at TO (which defaults to FROM+(END-START)). ;;; ;;; You can use XSUBSTRING in many ways: ;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" ;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" ;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" ;;; ;;; Note that ;;; - The FROM/TO indices give a half-open range -- the characters from ;;; index FROM up to, but not including index TO. ;;; - The FROM/TO indices are not in terms of the index space for string S. ;;; They are in terms of the replicated index space of the substring ;;; defined by S, START, and END. ;;; ;;; It is an error if START=END -- although this is allowed by special ;;; dispensation when FROM=TO. (define (xsubstring s from . maybe-to+start+end) (check-arg (lambda (val) (and (integer? val) (exact? val))) from xsubstring) (receive (to start end) (if (pair? maybe-to+start+end) (let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end) (let ((to (car maybe-to+start+end))) (check-arg (lambda (val) (and (integer? val) (exact? val) (<= from val))) to xsubstring) (values to start end))) (let ((slen (string-length (check-arg string? s xsubstring)))) (values (+ from slen) 0 slen))) (let ((slen (- end start)) (anslen (- to from))) (cond ((zero? anslen) "") ((zero? slen) (error "Cannot replicate empty (sub)string" xsubstring s from to start end)) ((= 1 slen) ; Fast path for 1-char replication. (make-string anslen (string-ref s start))) ;; Selected text falls entirely within one span. ((= (floor (/ from slen)) (floor (/ to slen))) (substring s (+ start (modulo from slen)) (+ start (modulo to slen)))) ;; Selected text requires multiple spans. (else (let ((ans (make-string anslen))) (%multispan-repcopy! ans 0 s from to start end) ans)))))) ;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Exactly the same as xsubstring, but the extracted text is written ;;; into the string TARGET starting at index TSTART. ;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy ;;; a string on top of itself. (define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) (check-arg (lambda (val) (and (integer? val) (exact? val))) sfrom string-xcopy!) (receive (sto start end) (if (pair? maybe-sto+start+end) (let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) (let ((sto (car maybe-sto+start+end))) (check-arg (lambda (val) (and (integer? val) (exact? val))) sto string-xcopy!) (values sto start end))) (let ((slen (string-length s))) (values (+ sfrom slen) 0 slen))) (let* ((tocopy (- sto sfrom)) (tend (+ tstart tocopy)) (slen (- end start))) (check-substring-spec string-xcopy! target tstart tend) (cond ((zero? tocopy)) ((zero? slen) (error "Cannot replicate empty (sub)string" string-xcopy! target tstart s sfrom sto start end)) ((= 1 slen) ; Fast path for 1-char replication. (string-fill! target (string-ref s start) tstart tend)) ;; Selected text falls entirely within one span. ((= (floor (/ sfrom slen)) (floor (/ sto slen))) (%string-copy! target tstart s (+ start (modulo sfrom slen)) (+ start (modulo sto slen)))) ;; Multi-span copy. (else (%multispan-repcopy! target tstart s sfrom sto start end)))))) ;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! ;;; Internal -- not exported, no careful arg checking. (define (%multispan-repcopy! target tstart s sfrom sto start end) (let* ((slen (- end start)) (i0 (+ start (modulo sfrom slen))) (total-chars (- sto sfrom))) ;; Copy the partial span @ the beginning (%string-copy! target tstart s i0 end) (let* ((ncopied (- end i0)) ; We've copied this many. (nleft (- total-chars ncopied)) ; # chars left to copy. (nspans (quotient nleft slen))) ; # whole spans to copy ;; Copy the whole spans in the middle. (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. (nspans nspans (- nspans 1))) ; # spans to copy ((zero? nspans) ;; Copy the partial-span @ the end & we're done. (%string-copy! target i s start (+ start (- total-chars (- i tstart))))) (%string-copy! target i s start end))))); Copy a whole span. ;;; (string-join string-list [delimiter grammar]) => string ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Paste strings together using the delimiter string. ;;; ;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" ;;; ;;; DELIMITER defaults to a single space " " ;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix} ;;; and defaults to 'infix. ;;; ;;; I could rewrite this more efficiently -- precompute the length of the ;;; answer string, then allocate & fill it in iteratively. Using ;;; STRING-CONCATENATE is less efficient. (define (string-join strings . delim+grammar) (let-optionals* delim+grammar ((delim " " (string? delim)) (grammar 'infix)) (let ((buildit (lambda (lis final) (let recur ((lis lis)) (if (pair? lis) (cons delim (cons (car lis) (recur (cdr lis)))) final))))) (cond ((pair? strings) (string-concatenate (case grammar ((infix strict-infix) (cons (car strings) (buildit (cdr strings) '()))) ((prefix) (buildit strings '())) ((suffix) (cons (car strings) (buildit (cdr strings) (list delim)))) (else (error "Illegal join grammar" grammar string-join))))) ((not (null? strings)) (error "STRINGS parameter not list." strings string-join)) ;; STRINGS is () ((eq? grammar 'strict-infix) (error "Empty list cannot be joined with STRICT-INFIX grammar." string-join)) (else ""))))) ; Special-cased for infix grammar. ;;; Porting & performance-tuning notes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; See the section at the beginning of this file on external dependencies. ;;; ;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro. ;;; There are many, many optional arguments in this library; the complexity ;;; of parsing, defaulting & type-testing these parameters is handled with the ;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can ;;; rewrite the uses, port the hairy macro definition (which is implemented ;;; using a Clinger-Rees low-level explicit-renaming macro system), or port ;;; the simple, high-level definition, which is less efficient. ;;; ;;; There is a fair amount of argument checking. This is, strictly speaking, ;;; unnecessary -- the actual body of the procedures will blow up if, say, a ;;; START/END index is improper. However, the error message will not be as ;;; good as if the error were caught at the "higher level." Also, a very, very ;;; smart Scheme compiler may be able to exploit having the type checks done ;;; early, so that the actual body of the procedures can assume proper values. ;;; This isn't likely; this kind of compiler technology isn't common any ;;; longer. ;;; ;;; The overhead of optional-argument parsing is irritating. The optional ;;; arguments must be consed into a rest list on entry, and then parsed out. ;;; Function call should be a matter of a few register moves and a jump; it ;;; should not involve heap allocation! Your Scheme system may have a superior ;;; non-R5RS optional-argument system that can eliminate this overhead. If so, ;;; then this is a prime candidate for optimising these procedures, ;;; *especially* the many optional START/END index parameters. ;;; ;;; Note that optional arguments are also a barrier to procedure integration. ;;; If your Scheme system permits you to specify alternate entry points ;;; for a call when the number of optional arguments is known in a manner ;;; that enables inlining/integration, this can provide performance ;;; improvements. ;;; ;;; There is enough *explicit* error checking that *all* string-index ;;; operations should *never* produce a bounds error. Period. Feel like ;;; living dangerously? *Big* performance win to be had by replacing ;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops. ;;; Similarly, fixnum-specific operators can speed up the arithmetic done on ;;; the index values in the inner loops. The only arguments that are not ;;; completely error checked are ;;; - string lists (complete checking requires time proportional to the ;;; length of the list) ;;; - procedure arguments, such as char->char maps & predicates. ;;; There is no way to check the range & domain of procedures in Scheme. ;;; Procedures that take these parameters cannot fully check their ;;; arguments. But all other types to all other procedures are fully ;;; checked. ;;; ;;; This does open up the alternate possibility of simply *removing* these ;;; checks, and letting the safe primitives raise the errors. On a dumb ;;; Scheme system, this would provide speed (by eliminating the redundant ;;; error checks) at the cost of error-message clarity. ;;; ;;; See the comments preceding the hash function code for notes on tuning ;;; the default bound so that the code never overflows your implementation's ;;; fixnum size into bignum calculation. ;;; ;;; In an interpreted Scheme, some of these procedures, or the internal ;;; routines with % prefixes, are excellent candidates for being rewritten ;;; in C. Consider STRING-HASH, %STRING-COMPARE, the ;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX & ;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED, ;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!. ;;; ;;; It would also be nice to have the ability to mark some of these ;;; routines as candidates for inlining/integration. ;;; ;;; All the %-prefixed routines in this source code are written ;;; to be called internally to this library. They do *not* perform ;;; friendly error checks on the inputs; they assume everything is ;;; proper. They also do not take optional arguments. These two properties ;;; save calling overhead and enable procedure integration -- but they ;;; are not appropriate for exported routines. ;;; Copyright details ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; The prefix/suffix and comparison routines in this code had (extremely ;;; distant) origins in MIT Scheme's string lib, and was substantially ;;; reworked by Olin Shivers (shivers@ai.mit.edu) 9/98. As such, it is ;;; covered by MIT Scheme's open source copyright. See below for details. ;;; ;;; The KMP string-search code was influenced by implementations written ;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this ;;; version was written from scratch by myself. ;;; ;;; The remainder of this code was written from scratch by myself for scsh. ;;; The scsh copyright is a BSD-style open source copyright. See below for ;;; details. ;;; -Olin Shivers ;;; MIT Scheme copyright terms ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This material was developed by the Scheme project at the Massachusetts ;;; Institute of Technology, Department of Electrical Engineering and ;;; Computer Science. Permission to copy and modify this software, to ;;; redistribute either the original software or a modified version, and ;;; to use this software for any purpose is granted, subject to the ;;; following restrictions and understandings. ;;; ;;; 1. Any copy made of this software must include this copyright notice ;;; in full. ;;; ;;; 2. Users of this software agree to make their best efforts (a) to ;;; return to the MIT Scheme project any improvements or extensions that ;;; they make, so that these may be included in future releases; and (b) ;;; to inform MIT of noteworthy uses of this software. ;;; ;;; 3. All materials developed as a consequence of the use of this ;;; software shall duly acknowledge such use, in accordance with the usual ;;; standards of acknowledging credit in academic research. ;;; ;;; 4. MIT has made no warrantee or representation that the operation of ;;; this software will be error-free, and MIT is under no obligation to ;;; provide any services, by way of maintenance, update, or otherwise. ;;; ;;; 5. In conjunction with products arising from the use of this material, ;;; there shall be no use of the name of the Massachusetts Institute of ;;; Technology nor of any adaptation thereof in any advertising, ;;; promotional, or sales literature without prior written consent from ;;; MIT in each case. ;;; Scsh copyright terms ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; All rights reserved. ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; 1. Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; 2. Redistributions in binary form must reproduce the above copyright ;;; notice, this list of conditions and the following disclaimer in the ;;; documentation and/or other materials provided with the distribution. ;;; 3. The name of the authors may not be used to endorse or promote products ;;; derived from this software without specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR ;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, ;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.