2001-03-21 17:28:31 -05:00
|
|
|
|
;;; SRFI 13 string library reference implementation -*- Scheme -*-
|
|
|
|
|
;;; Olin Shivers 7/2000
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology.
|
|
|
|
|
;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved.
|
|
|
|
|
;;; The details of the copyrights appear at the end of the file. Short
|
|
|
|
|
;;; summary: BSD-style open source.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;;; Exports:
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-map string-map!
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-fold string-unfold
|
|
|
|
|
;;; string-fold-right string-unfold-right
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-tabulate string-for-each string-for-each-index
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-every string-any
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-hash string-hash-ci
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-compare string-compare-ci
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string= string< string> string<= string>= string<>
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-downcase string-upcase string-titlecase
|
|
|
|
|
;;; string-downcase! string-upcase! string-titlecase!
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-take string-take-right
|
|
|
|
|
;;; string-drop string-drop-right
|
|
|
|
|
;;; string-pad string-pad-right
|
|
|
|
|
;;; string-trim string-trim-right string-trim-both
|
|
|
|
|
;;; string-filter string-delete
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-index string-index-right
|
|
|
|
|
;;; string-skip string-skip-right
|
|
|
|
|
;;; string-count
|
|
|
|
|
;;; string-prefix-length string-prefix-length-ci
|
|
|
|
|
;;; string-suffix-length string-suffix-length-ci
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-prefix? string-prefix-ci?
|
|
|
|
|
;;; string-suffix? string-suffix-ci?
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-contains string-contains-ci
|
|
|
|
|
;;; string-copy! substring/shared
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-reverse string-reverse! reverse-list->string
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-concatenate string-concatenate/shared string-concatenate-reverse
|
|
|
|
|
;;; string-append/shared
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; xsubstring string-xcopy!
|
|
|
|
|
;;; string-null?
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-join
|
|
|
|
|
;;; string-tokenize
|
|
|
|
|
;;; string-replace
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; R5RS extended:
|
|
|
|
|
;;; string->list string-copy string-fill!
|
|
|
|
|
;;;
|
|
|
|
|
;;; R5RS re-exports:
|
|
|
|
|
;;; string? make-string string-length string-ref string-set!
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; R5RS re-exports (also defined here but commented-out):
|
|
|
|
|
;;; string string-append list->string
|
|
|
|
|
;;;
|
|
|
|
|
;;; Low-level routines:
|
|
|
|
|
;;; make-kmp-restart-vector string-kmp-partial-search kmp-step
|
|
|
|
|
;;; string-parse-start+end
|
|
|
|
|
;;; string-parse-final-start+end
|
|
|
|
|
;;; let-string-start+end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; check-substring-spec
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; substring-spec-ok?
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;;; Imports
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; This is a fairly large library. While it was written for portability, you
|
|
|
|
|
;;; must be aware of its dependencies in order to run it in a given scheme
|
|
|
|
|
;;; implementation. Here is a complete list of the dependencies it has and the
|
|
|
|
|
;;; assumptions it makes beyond stock R5RS Scheme:
|
|
|
|
|
;;;
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; This code has the following non-R5RS dependencies:
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro;
|
|
|
|
|
;;;
|
|
|
|
|
;;; - Various imports from the char-set library for the routines that can
|
|
|
|
|
;;; take char-set arguments;
|
|
|
|
|
;;;
|
|
|
|
|
;;; - An n-ary ERROR procedure;
|
|
|
|
|
;;;
|
|
|
|
|
;;; - BITWISE-AND for the hash functions;
|
|
|
|
|
;;;
|
|
|
|
|
;;; - A simple CHECK-ARG procedure for checking parameter values; it is
|
|
|
|
|
;;; (lambda (pred val proc)
|
|
|
|
|
;;; (if (pred val) val (error "Bad arg" val pred proc)))
|
|
|
|
|
;;;
|
|
|
|
|
;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting &
|
|
|
|
|
;;; type-checking optional parameters from a rest argument;
|
|
|
|
|
;;;
|
|
|
|
|
;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE &
|
|
|
|
|
;;; STRING-TITLECASE! procedures. The former returns true iff a character is
|
|
|
|
|
;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z.
|
|
|
|
|
;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII &
|
|
|
|
|
;;; Latin-1, it is the same as CHAR-UPCASE.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The code depends upon a small set of core string primitives from R5RS:
|
|
|
|
|
;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING
|
|
|
|
|
;;; (Actually, SUBSTRING is not a primitive, but we assume that an
|
|
|
|
|
;;; implementation's native version is probably faster than one we could
|
|
|
|
|
;;; define, so we import it from R5RS.)
|
|
|
|
|
;;;
|
|
|
|
|
;;; The code depends upon a small set of R5RS character primitives:
|
|
|
|
|
;;; char? char=? char-ci=? char<? char-ci<?
|
|
|
|
|
;;; char-upcase char-downcase
|
|
|
|
|
;;; char->integer (for the hash functions)
|
|
|
|
|
;;;
|
|
|
|
|
;;; We assume the following:
|
|
|
|
|
;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE
|
|
|
|
|
;;; - CHAR-CI=? is equivalent to
|
|
|
|
|
;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1))
|
|
|
|
|
;;; (char-downcase (char-upcase c2))))
|
|
|
|
|
;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive
|
|
|
|
|
;;; and consistent with Unicode's 1-1 char-mapping spec.
|
|
|
|
|
;;; These things are typically true, but if not, you would need to modify
|
|
|
|
|
;;; the case-mapping and case-insensitive routines.
|
|
|
|
|
|
|
|
|
|
;;; Enough introductory blather. On to the source code. (But see the end of
|
|
|
|
|
;;; the file for further notes on porting & performance tuning.)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; Support for START/END substring specs
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; This macro parses optional start/end arguments from arg lists, defaulting
|
|
|
|
|
;;; them to 0/(string-length s), and checks them for correctness.
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define-syntax let-string-start+end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(syntax-rules ()
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((let-string-start+end (start end) proc s-exp args-exp body ...)
|
|
|
|
|
(receive (start end) (string-parse-final-start+end proc s-exp args-exp)
|
|
|
|
|
body ...))
|
|
|
|
|
((let-string-start+end (start end rest) proc s-exp args-exp body ...)
|
|
|
|
|
(receive (rest start end) (string-parse-start+end proc s-exp args-exp)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
body ...))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; This one parses out a *pair* of final start/end indices.
|
|
|
|
|
;;; Not exported; for internal use.
|
|
|
|
|
(define-syntax let-string-start+end2
|
|
|
|
|
(syntax-rules ()
|
|
|
|
|
((l-s-s+e2 (start1 end1 start2 end2) proc s1 s2 args body ...)
|
|
|
|
|
(let ((procv proc)) ; Make sure PROC is only evaluated once.
|
|
|
|
|
(let-string-start+end (start1 end1 rest) procv s1 args
|
|
|
|
|
(let-string-start+end (start2 end2) procv s2 rest
|
|
|
|
|
body ...))))))
|
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Returns three values: rest start end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-parse-start+end proc s args)
|
|
|
|
|
(if (not (string? s)) (error "Non-string value" proc s))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((slen (string-length s)))
|
|
|
|
|
(if (pair? args)
|
|
|
|
|
|
|
|
|
|
(let ((start (car args))
|
|
|
|
|
(args (cdr args)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (and (integer? start) (exact? start) (>= start 0))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(receive (end args)
|
|
|
|
|
(if (pair? args)
|
|
|
|
|
(let ((end (car args))
|
|
|
|
|
(args (cdr args)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (and (integer? end) (exact? end) (<= end slen))
|
|
|
|
|
(values end args)
|
|
|
|
|
(error "Illegal substring END spec" proc end s)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(values slen args))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (<= start end) (values args start end)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(error "Illegal substring START/END spec"
|
2001-03-21 17:28:31 -05:00
|
|
|
|
proc start end s)))
|
|
|
|
|
(error "Illegal substring START spec" proc start s)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(values '() 0 slen))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-parse-final-start+end proc s args)
|
|
|
|
|
(receive (rest start end) (string-parse-start+end proc s args)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(if (pair? rest) (error "Extra arguments to procedure" proc rest)
|
|
|
|
|
(values start end))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (substring-spec-ok? s start end)
|
|
|
|
|
(and (string? s)
|
|
|
|
|
(integer? start)
|
|
|
|
|
(exact? start)
|
|
|
|
|
(integer? end)
|
|
|
|
|
(exact? end)
|
|
|
|
|
(<= 0 start)
|
|
|
|
|
(<= start end)
|
|
|
|
|
(<= end (string-length s))))
|
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(define (check-substring-spec proc s start end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (not (substring-spec-ok? s start end))
|
|
|
|
|
(error "Illegal substring spec." proc s start end)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Defined by R5RS, so commented out here.
|
|
|
|
|
;(define (string . chars)
|
|
|
|
|
; (let* ((len (length chars))
|
|
|
|
|
; (ans (make-string len)))
|
|
|
|
|
; (do ((i 0 (+ i 1))
|
|
|
|
|
; (chars chars (cdr chars)))
|
|
|
|
|
; ((>= i len))
|
|
|
|
|
; (string-set! ans i (car chars)))
|
|
|
|
|
; ans))
|
|
|
|
|
;
|
|
|
|
|
;(define (string . chars) (string-unfold null? car cdr chars))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; substring/shared S START [END]
|
|
|
|
|
;;; string-copy S [START END]
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; All this goop is just arg parsing & checking surrounding a call to the
|
|
|
|
|
;;; actual primitive, %SUBSTRING/SHARED.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (substring/shared s start . maybe-end)
|
|
|
|
|
(check-arg string? s substring/shared)
|
|
|
|
|
(let ((slen (string-length s)))
|
|
|
|
|
(check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start)))
|
|
|
|
|
start substring/shared)
|
|
|
|
|
(%substring/shared s start
|
|
|
|
|
(:optional maybe-end slen
|
|
|
|
|
(lambda (end) (and (integer? end)
|
|
|
|
|
(exact? end)
|
|
|
|
|
(<= start end)
|
|
|
|
|
(<= end slen)))))))
|
|
|
|
|
|
|
|
|
|
;;; Split out so that other routines in this library can avoid arg-parsing
|
|
|
|
|
;;; overhead for END parameter.
|
|
|
|
|
(define (%substring/shared s start end)
|
|
|
|
|
(if (and (zero? start) (= end (string-length s))) s
|
|
|
|
|
(substring s start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-copy s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-copy s maybe-start+end
|
|
|
|
|
(substring s start end)))
|
|
|
|
|
|
|
|
|
|
;This library uses the R5RS SUBSTRING, but doesn't export it.
|
|
|
|
|
;Here is a definition, just for completeness.
|
|
|
|
|
;(define (substring s start end)
|
|
|
|
|
; (check-substring-spec substring s start end)
|
|
|
|
|
; (let* ((slen (- end start))
|
|
|
|
|
; (ans (make-string slen)))
|
|
|
|
|
; (do ((i 0 (+ i 1))
|
|
|
|
|
; (j start (+ j 1)))
|
|
|
|
|
; ((>= i slen) ans)
|
|
|
|
|
; (string-set! ans i (string-ref s j)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;;; Basic iterators and other higher-order abstractions
|
|
|
|
|
;;; (string-map proc s [start end])
|
|
|
|
|
;;; (string-map! proc s [start end])
|
|
|
|
|
;;; (string-fold kons knil s [start end])
|
|
|
|
|
;;; (string-fold-right kons knil s [start end])
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; (string-unfold p f g seed [base make-final])
|
|
|
|
|
;;; (string-unfold-right p f g seed [base make-final])
|
|
|
|
|
;;; (string-for-each proc s [start end])
|
|
|
|
|
;;; (string-for-each-index proc s [start end])
|
|
|
|
|
;;; (string-every char-set/char/pred s [start end])
|
|
|
|
|
;;; (string-any char-set/char/pred s [start end])
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; (string-tabulate proc len)
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; You want compiler support for high-level transforms on fold and unfold ops.
|
|
|
|
|
;;; You'd at least like a lot of inlining for clients of these procedures.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Don't hold your breath.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-map proc s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg procedure? proc string-map)
|
|
|
|
|
(let-string-start+end (start end) string-map s maybe-start+end
|
|
|
|
|
(%string-map proc s start end)))
|
|
|
|
|
|
|
|
|
|
(define (%string-map proc s start end) ; Internal utility
|
|
|
|
|
(let* ((len (- end start))
|
|
|
|
|
(ans (make-string len)))
|
|
|
|
|
(do ((i (- end 1) (- i 1))
|
|
|
|
|
(j (- len 1) (- j 1)))
|
|
|
|
|
((< j 0))
|
|
|
|
|
(string-set! ans j (proc (string-ref s i))))
|
|
|
|
|
ans))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-map! proc s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg procedure? proc string-map!)
|
|
|
|
|
(let-string-start+end (start end) string-map! s maybe-start+end
|
|
|
|
|
(%string-map! proc s start end)))
|
|
|
|
|
|
|
|
|
|
(define (%string-map! proc s start end)
|
|
|
|
|
(do ((i (- end 1) (- i 1)))
|
|
|
|
|
((< i start))
|
|
|
|
|
(string-set! s i (proc (string-ref s i)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-fold kons knil s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg procedure? kons string-fold)
|
|
|
|
|
(let-string-start+end (start end) string-fold s maybe-start+end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((v knil) (i start))
|
|
|
|
|
(if (< i end) (lp (kons (string-ref s i) v) (+ i 1))
|
|
|
|
|
v))))
|
|
|
|
|
|
|
|
|
|
(define (string-fold-right kons knil s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg procedure? kons string-fold-right)
|
|
|
|
|
(let-string-start+end (start end) string-fold-right s maybe-start+end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((v knil) (i (- end 1)))
|
|
|
|
|
(if (>= i start) (lp (kons (string-ref s i) v) (- i 1))
|
|
|
|
|
v))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; (string-unfold p f g seed [base make-final])
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; This is the fundamental constructor for strings.
|
|
|
|
|
;;; - G is used to generate a series of "seed" values from the initial seed:
|
|
|
|
|
;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ...
|
|
|
|
|
;;; - P tells us when to stop -- when it returns true when applied to one
|
|
|
|
|
;;; of these seed values.
|
|
|
|
|
;;; - F maps each seed value to the corresponding character
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; in the result string. These chars are assembled into the
|
|
|
|
|
;;; string in a left-to-right order.
|
|
|
|
|
;;; - BASE is the optional initial/leftmost portion of the constructed string;
|
|
|
|
|
;;; it defaults to the empty string "".
|
|
|
|
|
;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns
|
|
|
|
|
;;; true) to produce the final/rightmost portion of the constructed string.
|
|
|
|
|
;;; It defaults to (LAMBDA (X) "").
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
|
|
|
|
;;; In other words, the following (simple, inefficient) definition holds:
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; (define (string-unfold p f g seed base make-final)
|
|
|
|
|
;;; (string-append base
|
|
|
|
|
;;; (let recur ((seed seed))
|
|
|
|
|
;;; (if (p seed) (make-final seed)
|
|
|
|
|
;;; (string-append (string (f seed))
|
|
|
|
|
;;; (recur (g seed)))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
|
|
|
|
;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to
|
|
|
|
|
;;; reverse a string, copy a string, convert a list to a string, read
|
|
|
|
|
;;; a port into a string, and so forth. Examples:
|
|
|
|
|
;;; (port->string port) =
|
|
|
|
|
;;; (string-unfold (compose eof-object? peek-char)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; read-char values port)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
|
|
|
|
;;; (list->string lis) = (string-unfold null? car cdr lis)
|
|
|
|
|
;;;
|
|
|
|
|
;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0)
|
|
|
|
|
|
|
|
|
|
;;; A problem with the following simple formulation is that it pushes one
|
|
|
|
|
;;; stack frame for every char in the result string -- an issue if you are
|
|
|
|
|
;;; using it to read a 100kchar string. So we don't use it -- but I include
|
|
|
|
|
;;; it to give a clear, straightforward description of what the function
|
|
|
|
|
;;; does.
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;(define (string-unfold p f g seed base make-final)
|
|
|
|
|
; (let ((ans (let recur ((seed seed) (i (string-length base)))
|
|
|
|
|
; (if (p seed)
|
|
|
|
|
; (let* ((final (make-final seed))
|
|
|
|
|
; (ans (make-string (+ i (string-length final)))))
|
|
|
|
|
; (string-copy! ans i final)
|
|
|
|
|
; ans)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
; (let* ((c (f seed))
|
|
|
|
|
; (s (recur (g seed) (+ i 1))))
|
|
|
|
|
; (string-set! s i c)
|
|
|
|
|
; s)))))
|
|
|
|
|
; (string-copy! ans 0 base)
|
|
|
|
|
; ans))
|
|
|
|
|
|
|
|
|
|
;;; The strategy is to allocate a series of chunks into which we stash the
|
|
|
|
|
;;; chars as we generate them. Chunk size goes up in powers of two starting
|
|
|
|
|
;;; with 40 and levelling out at 4k, i.e.
|
|
|
|
|
;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096...
|
|
|
|
|
;;; This should work pretty well for short strings, 1-line (80 char) strings,
|
|
|
|
|
;;; and longer ones. When done, we allocate an answer string and copy the
|
|
|
|
|
;;; chars over from the chunk buffers.
|
|
|
|
|
|
|
|
|
|
(define (string-unfold p f g seed . base+make-final)
|
|
|
|
|
(check-arg procedure? p string-unfold)
|
|
|
|
|
(check-arg procedure? f string-unfold)
|
|
|
|
|
(check-arg procedure? g string-unfold)
|
|
|
|
|
(let-optionals* base+make-final
|
|
|
|
|
((base "" (string? base))
|
|
|
|
|
(make-final (lambda (x) "") (procedure? make-final)))
|
|
|
|
|
(let lp ((chunks '()) ; Previously filled chunks
|
|
|
|
|
(nchars 0) ; Number of chars in CHUNKS
|
|
|
|
|
(chunk (make-string 40)) ; Current chunk into which we write
|
|
|
|
|
(chunk-len 40)
|
|
|
|
|
(i 0) ; Number of chars written into CHUNK
|
|
|
|
|
(seed seed))
|
|
|
|
|
(let lp2 ((i i) (seed seed))
|
|
|
|
|
(if (not (p seed))
|
|
|
|
|
(let ((c (f seed))
|
|
|
|
|
(seed (g seed)))
|
|
|
|
|
(if (< i chunk-len)
|
|
|
|
|
(begin (string-set! chunk i c)
|
|
|
|
|
(lp2 (+ i 1) seed))
|
|
|
|
|
|
|
|
|
|
(let* ((nchars2 (+ chunk-len nchars))
|
|
|
|
|
(chunk-len2 (min 4096 nchars2))
|
|
|
|
|
(new-chunk (make-string chunk-len2)))
|
|
|
|
|
(string-set! new-chunk 0 c)
|
|
|
|
|
(lp (cons chunk chunks) (+ nchars chunk-len)
|
|
|
|
|
new-chunk chunk-len2 1 seed))))
|
|
|
|
|
|
|
|
|
|
;; We're done. Make the answer string & install the bits.
|
|
|
|
|
(let* ((final (make-final seed))
|
|
|
|
|
(flen (string-length final))
|
|
|
|
|
(base-len (string-length base))
|
|
|
|
|
(j (+ base-len nchars i))
|
|
|
|
|
(ans (make-string (+ j flen))))
|
|
|
|
|
(%string-copy! ans j final 0 flen) ; Install FINAL.
|
|
|
|
|
(let ((j (- j i)))
|
|
|
|
|
(%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I).
|
|
|
|
|
(let lp ((j j) (chunks chunks)) ; Install CHUNKS.
|
|
|
|
|
(if (pair? chunks)
|
|
|
|
|
(let* ((chunk (car chunks))
|
|
|
|
|
(chunks (cdr chunks))
|
|
|
|
|
(chunk-len (string-length chunk))
|
|
|
|
|
(j (- j chunk-len)))
|
|
|
|
|
(%string-copy! ans j chunk 0 chunk-len)
|
|
|
|
|
(lp j chunks)))))
|
|
|
|
|
(%string-copy! ans 0 base 0 base-len) ; Install BASE.
|
|
|
|
|
ans))))))
|
|
|
|
|
|
|
|
|
|
(define (string-unfold-right p f g seed . base+make-final)
|
|
|
|
|
(let-optionals* base+make-final
|
|
|
|
|
((base "" (string? base))
|
|
|
|
|
(make-final (lambda (x) "") (procedure? make-final)))
|
|
|
|
|
(let lp ((chunks '()) ; Previously filled chunks
|
|
|
|
|
(nchars 0) ; Number of chars in CHUNKS
|
|
|
|
|
(chunk (make-string 40)) ; Current chunk into which we write
|
|
|
|
|
(chunk-len 40)
|
|
|
|
|
(i 40) ; Number of chars available in CHUNK
|
|
|
|
|
(seed seed))
|
|
|
|
|
(let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right
|
|
|
|
|
(if (not (p seed)) ; to left.
|
|
|
|
|
(let ((c (f seed))
|
|
|
|
|
(seed (g seed)))
|
|
|
|
|
(if (> i 0)
|
|
|
|
|
(let ((i (- i 1)))
|
|
|
|
|
(string-set! chunk i c)
|
|
|
|
|
(lp2 i seed))
|
|
|
|
|
|
|
|
|
|
(let* ((nchars2 (+ chunk-len nchars))
|
|
|
|
|
(chunk-len2 (min 4096 nchars2))
|
|
|
|
|
(new-chunk (make-string chunk-len2))
|
|
|
|
|
(i (- chunk-len2 1)))
|
|
|
|
|
(string-set! new-chunk i c)
|
|
|
|
|
(lp (cons chunk chunks) (+ nchars chunk-len)
|
|
|
|
|
new-chunk chunk-len2 i seed))))
|
|
|
|
|
|
|
|
|
|
;; We're done. Make the answer string & install the bits.
|
|
|
|
|
(let* ((final (make-final seed))
|
|
|
|
|
(flen (string-length final))
|
|
|
|
|
(base-len (string-length base))
|
|
|
|
|
(chunk-used (- chunk-len i))
|
|
|
|
|
(j (+ base-len nchars chunk-used))
|
|
|
|
|
(ans (make-string (+ j flen))))
|
|
|
|
|
(%string-copy! ans 0 final 0 flen) ; Install FINAL.
|
|
|
|
|
(%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,).
|
|
|
|
|
(let lp ((j (+ flen chunk-used)) ; Install CHUNKS.
|
|
|
|
|
(chunks chunks))
|
|
|
|
|
(if (pair? chunks)
|
|
|
|
|
(let* ((chunk (car chunks))
|
|
|
|
|
(chunks (cdr chunks))
|
|
|
|
|
(chunk-len (string-length chunk)))
|
|
|
|
|
(%string-copy! ans j chunk 0 chunk-len)
|
|
|
|
|
(lp (+ j chunk-len) chunks))
|
|
|
|
|
(%string-copy! ans j base 0 base-len))); Install BASE.
|
|
|
|
|
ans))))))
|
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-for-each proc s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg procedure? proc string-for-each)
|
|
|
|
|
(let-string-start+end (start end) string-for-each s maybe-start+end
|
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(if (< i end)
|
|
|
|
|
(begin (proc (string-ref s i))
|
|
|
|
|
(lp (+ i 1)))))))
|
|
|
|
|
|
|
|
|
|
(define (string-for-each-index proc s . maybe-start+end)
|
|
|
|
|
(check-arg procedure? proc string-for-each-index)
|
|
|
|
|
(let-string-start+end (start end) string-for-each-index s maybe-start+end
|
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(if (< i end) (begin (proc i) (lp (+ i 1)))))))
|
|
|
|
|
|
|
|
|
|
(define (string-every criterion s . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-every s maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(or (>= i end)
|
|
|
|
|
(and (char=? criterion (string-ref s i))
|
|
|
|
|
(lp (+ i 1))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((char-set? criterion)
|
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(or (>= i end)
|
|
|
|
|
(and (char-set-contains? criterion (string-ref s i))
|
|
|
|
|
(lp (+ i 1))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((procedure? criterion) ; Slightly funky loop so that
|
|
|
|
|
(or (= start end) ; final (PRED S[END-1]) call
|
|
|
|
|
(let lp ((i start)) ; is a tail call.
|
|
|
|
|
(let ((c (string-ref s i))
|
|
|
|
|
(i1 (+ i 1)))
|
|
|
|
|
(if (= i1 end) (criterion c) ; Tail call.
|
|
|
|
|
(and (criterion c) (lp i1)))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(else (error "Second param is neither char-set, char, or predicate procedure."
|
|
|
|
|
string-every criterion)))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (string-any criterion s . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-any s maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
|
|
|
|
(or (char=? criterion (string-ref s i))
|
|
|
|
|
(lp (+ i 1))))))
|
|
|
|
|
|
|
|
|
|
((char-set? criterion)
|
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
|
|
|
|
(or (char-set-contains? criterion (string-ref s i))
|
|
|
|
|
(lp (+ i 1))))))
|
|
|
|
|
|
|
|
|
|
((procedure? criterion) ; Slightly funky loop so that
|
|
|
|
|
(and (< start end) ; final (PRED S[END-1]) call
|
|
|
|
|
(let lp ((i start)) ; is a tail call.
|
|
|
|
|
(let ((c (string-ref s i))
|
|
|
|
|
(i1 (+ i 1)))
|
|
|
|
|
(if (= i1 end) (criterion c) ; Tail call
|
|
|
|
|
(or (criterion c) (lp i1)))))))
|
|
|
|
|
|
|
|
|
|
(else (error "Second param is neither char-set, char, or predicate procedure."
|
|
|
|
|
string-any criterion)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (string-tabulate proc len)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg procedure? proc string-tabulate)
|
|
|
|
|
(check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val)))
|
|
|
|
|
len string-tabulate)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((s (make-string len)))
|
|
|
|
|
(do ((i (- len 1) (- i 1)))
|
|
|
|
|
((< i 0))
|
|
|
|
|
(string-set! s i (proc i)))
|
|
|
|
|
s))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2]
|
|
|
|
|
;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2]
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Find the length of the common prefix/suffix.
|
|
|
|
|
;;; It is not required that the two substrings passed be of equal length.
|
|
|
|
|
;;; This was microcode in MIT Scheme -- a very tightly bummed primitive.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons,
|
|
|
|
|
;;; so should be as tense as possible.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-prefix-length s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((delta (min (- end1 start1) (- end2 start2)))
|
|
|
|
|
(end1 (+ start1 delta)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path
|
|
|
|
|
delta
|
|
|
|
|
|
|
|
|
|
(let lp ((i start1) (j start2)) ; Regular path
|
|
|
|
|
(if (or (>= i end1)
|
|
|
|
|
(not (char=? (string-ref s1 i)
|
|
|
|
|
(string-ref s2 j))))
|
|
|
|
|
(- i start1)
|
|
|
|
|
(lp (+ i 1) (+ j 1)))))))
|
|
|
|
|
|
|
|
|
|
(define (%string-suffix-length s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((delta (min (- end1 start1) (- end2 start2)))
|
|
|
|
|
(start1 (- end1 delta)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
|
|
|
|
(if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path
|
|
|
|
|
delta
|
|
|
|
|
|
|
|
|
|
(let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path
|
|
|
|
|
(if (or (< i start1)
|
|
|
|
|
(not (char=? (string-ref s1 i)
|
|
|
|
|
(string-ref s2 j))))
|
|
|
|
|
(- (- end1 i) 1)
|
|
|
|
|
(lp (- i 1) (- j 1)))))))
|
|
|
|
|
|
|
|
|
|
(define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((delta (min (- end1 start1) (- end2 start2)))
|
|
|
|
|
(end1 (+ start1 delta)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path
|
|
|
|
|
delta
|
|
|
|
|
|
|
|
|
|
(let lp ((i start1) (j start2)) ; Regular path
|
|
|
|
|
(if (or (>= i end1)
|
|
|
|
|
(not (char-ci=? (string-ref s1 i)
|
|
|
|
|
(string-ref s2 j))))
|
|
|
|
|
(- i start1)
|
|
|
|
|
(lp (+ i 1) (+ j 1)))))))
|
|
|
|
|
|
|
|
|
|
(define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((delta (min (- end1 start1) (- end2 start2)))
|
|
|
|
|
(start1 (- end1 delta)))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path
|
|
|
|
|
delta
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path
|
|
|
|
|
(if (or (< i start1)
|
|
|
|
|
(not (char-ci=? (string-ref s1 i)
|
|
|
|
|
(string-ref s2 j))))
|
|
|
|
|
(- (- end1 i) 1)
|
|
|
|
|
(lp (- i 1) (- j 1)))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-prefix-length s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-prefix-length s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-prefix-length s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-suffix-length s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-suffix-length s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-suffix-length s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-prefix-length-ci s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-prefix-length-ci s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-prefix-length-ci s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-suffix-length-ci s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-suffix-length-ci s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-suffix-length-ci s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
|
|
|
|
;;; string-prefix? s1 s2 [start1 end1 start2 end2]
|
|
|
|
|
;;; string-suffix? s1 s2 [start1 end1 start2 end2]
|
|
|
|
|
;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2]
|
|
|
|
|
;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2]
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; These are all simple derivatives of the previous counting funs.
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-prefix? s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-prefix? s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-prefix? s1 start1 end1 s2 start2 end2)))
|
|
|
|
|
|
|
|
|
|
(define (string-suffix? s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-suffix? s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-suffix? s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-prefix-ci? s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-prefix-ci? s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-prefix-ci? s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-suffix-ci? s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-suffix-ci? s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-suffix-ci? s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Here are the internal routines that do the real work.
|
|
|
|
|
|
|
|
|
|
(define (%string-prefix? s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((len1 (- end1 start1)))
|
|
|
|
|
(and (<= len1 (- end2 start2)) ; Quick check
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(= (%string-prefix-length s1 start1 end1
|
|
|
|
|
s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
len1))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-suffix? s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((len1 (- end1 start1)))
|
|
|
|
|
(and (<= len1 (- end2 start2)) ; Quick check
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(= len1 (%string-suffix-length s1 start1 end1
|
|
|
|
|
s2 start2 end2)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-prefix-ci? s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((len1 (- end1 start1)))
|
|
|
|
|
(and (<= len1 (- end2 start2)) ; Quick check
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(= len1 (%string-prefix-length-ci s1 start1 end1
|
|
|
|
|
s2 start2 end2)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-suffix-ci? s1 start1 end1 s2 start2 end2)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((len1 (- end1 start1)))
|
|
|
|
|
(and (<= len1 (- end2 start2)) ; Quick check
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(= len1 (%string-suffix-length-ci s1 start1 end1
|
|
|
|
|
s2 start2 end2)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2]
|
|
|
|
|
;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2]
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Primitive string-comparison functions.
|
|
|
|
|
;;; Continuation order is different from MIT Scheme.
|
|
|
|
|
;;; Continuations are applied to s1's mismatch index;
|
|
|
|
|
;;; in the case of equality, this is END1.
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-compare s1 start1 end1 s2 start2 end2
|
1999-09-23 11:24:25 -04:00
|
|
|
|
proc< proc= proc>)
|
|
|
|
|
(let ((size1 (- end1 start1))
|
|
|
|
|
(size2 (- end2 start2)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(if (= match size1)
|
|
|
|
|
((if (= match size2) proc= proc<) end1)
|
|
|
|
|
((if (= match size2)
|
|
|
|
|
proc>
|
|
|
|
|
(if (char<? (string-ref s1 (+ start1 match))
|
|
|
|
|
(string-ref s2 (+ start2 match)))
|
|
|
|
|
proc< proc>))
|
|
|
|
|
(+ match start1))))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-compare-ci s1 start1 end1 s2 start2 end2
|
1999-09-23 11:24:25 -04:00
|
|
|
|
proc< proc= proc>)
|
|
|
|
|
(let ((size1 (- end1 start1))
|
|
|
|
|
(size2 (- end2 start2)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(if (= match size1)
|
|
|
|
|
((if (= match size2) proc= proc<) end1)
|
|
|
|
|
((if (= match size2) proc>
|
|
|
|
|
(if (char-ci<? (string-ref s1 (+ start1 match))
|
|
|
|
|
(string-ref s2 (+ start2 match)))
|
|
|
|
|
proc< proc>))
|
|
|
|
|
(+ start1 match))))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends)
|
|
|
|
|
(check-arg procedure? proc< string-compare)
|
|
|
|
|
(check-arg procedure? proc= string-compare)
|
|
|
|
|
(check-arg procedure? proc> string-compare)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-compare s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>)))
|
|
|
|
|
|
|
|
|
|
(define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends)
|
|
|
|
|
(check-arg procedure? proc< string-compare-ci)
|
|
|
|
|
(check-arg procedure? proc= string-compare-ci)
|
|
|
|
|
(check-arg procedure? proc> string-compare-ci)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-compare-ci s1 s2 maybe-starts+ends
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; string= string<> string-ci= string-ci<>
|
|
|
|
|
;;; string< string> string-ci< string-ci>
|
|
|
|
|
;;; string<= string>= string-ci<= string-ci>=
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Simple definitions in terms of the previous comparison funs.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; I sure hope the %STRING-COMPARE calls get integrated.
|
|
|
|
|
|
|
|
|
|
(define (string= s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string= s1 s2 maybe-starts+ends
|
|
|
|
|
(and (= (- end1 start1) (- end2 start2)) ; Quick filter
|
|
|
|
|
(or (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f))))))
|
|
|
|
|
|
|
|
|
|
(define (string<> s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string<> s1 s2 maybe-starts+ends
|
|
|
|
|
(or (not (= (- end1 start1) (- end2 start2))) ; Fast path
|
|
|
|
|
(and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values)))))
|
|
|
|
|
|
|
|
|
|
(define (string< s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string< s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(< end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
(lambda (i) #f)))))
|
|
|
|
|
|
|
|
|
|
(define (string> s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string> s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(> end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values))))
|
|
|
|
|
|
|
|
|
|
(define (string<= s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string<= s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(<= end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
values
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f)))))
|
|
|
|
|
|
|
|
|
|
(define (string>= s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string>= s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(>= end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values
|
|
|
|
|
values))))
|
|
|
|
|
|
|
|
|
|
(define (string-ci= s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-ci= s1 s2 maybe-starts+ends
|
|
|
|
|
(and (= (- end1 start1) (- end2 start2)) ; Quick filter
|
|
|
|
|
(or (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f))))))
|
|
|
|
|
|
|
|
|
|
(define (string-ci<> s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-ci<> s1 s2 maybe-starts+ends
|
|
|
|
|
(or (not (= (- end1 start1) (- end2 start2))) ; Fast path
|
|
|
|
|
(and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values)))))
|
|
|
|
|
|
|
|
|
|
(define (string-ci< s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-ci< s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(< end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
(lambda (i) #f)))))
|
|
|
|
|
|
|
|
|
|
(define (string-ci> s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-ci> s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(> end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values))))
|
|
|
|
|
|
|
|
|
|
(define (string-ci<= s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-ci<= s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(<= end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
values
|
|
|
|
|
values
|
|
|
|
|
(lambda (i) #f)))))
|
|
|
|
|
|
|
|
|
|
(define (string-ci>= s1 s2 . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
string-ci>= s1 s2 maybe-starts+ends
|
|
|
|
|
(if (and (eq? s1 s2) (= start1 start2)) ; Fast path
|
|
|
|
|
(>= end1 end2)
|
|
|
|
|
|
|
|
|
|
(%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
|
|
|
|
|
(lambda (i) #f)
|
|
|
|
|
values
|
|
|
|
|
values))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Hash
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in
|
|
|
|
|
;;; to keep the intermediate values small. (We do the calculation with just
|
|
|
|
|
;;; enough bits to represent BOUND, masking off high bits at each step in
|
|
|
|
|
;;; calculation. If this screws up any important properties of the hash
|
|
|
|
|
;;; function I'd like to hear about it. -Olin)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; If you keep BOUND small enough, the intermediate calculations will
|
|
|
|
|
;;; always be fixnums. How small is dependent on the underlying Scheme system;
|
|
|
|
|
;;; we use a default BOUND of 2^22 = 4194304, which should hack it in
|
|
|
|
|
;;; Schemes that give you at least 29 signed bits for fixnums. The core
|
|
|
|
|
;;; calculation that you don't want to overflow is, worst case,
|
|
|
|
|
;;; (+ 65535 (* 37 (- bound 1)))
|
|
|
|
|
;;; where 65535 is the max character code. Choose the default BOUND to be the
|
|
|
|
|
;;; biggest power of two that won't cause this expression to fixnum overflow,
|
|
|
|
|
;;; and everything will be copacetic.
|
|
|
|
|
|
|
|
|
|
(define (%string-hash s char->int bound start end)
|
|
|
|
|
(let ((iref (lambda (s i) (char->int (string-ref s i))))
|
|
|
|
|
;; Compute a 111...1 mask that will cover BOUND-1:
|
|
|
|
|
(mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh?
|
|
|
|
|
(if (>= i bound) (- i 1) (lp (+ i i))))))
|
|
|
|
|
(let lp ((i start) (ans 0))
|
|
|
|
|
(if (>= i end) (modulo ans bound)
|
|
|
|
|
(lp (+ i 1) (bitwise-and mask (+ (* 37 ans) (iref s i))))))))
|
|
|
|
|
|
|
|
|
|
(define (string-hash s . maybe-bound+start+end)
|
|
|
|
|
(let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound)
|
|
|
|
|
(exact? bound)
|
|
|
|
|
(<= 0 bound)))
|
|
|
|
|
rest)
|
|
|
|
|
(let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default.
|
|
|
|
|
(let-string-start+end (start end) string-hash s rest
|
|
|
|
|
(%string-hash s char->integer bound start end)))))
|
|
|
|
|
|
|
|
|
|
(define (string-hash-ci s . maybe-bound+start+end)
|
|
|
|
|
(let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound)
|
|
|
|
|
(exact? bound)
|
|
|
|
|
(<= 0 bound)))
|
|
|
|
|
rest)
|
|
|
|
|
(let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default.
|
|
|
|
|
(let-string-start+end (start end) string-hash-ci s rest
|
|
|
|
|
(%string-hash s (lambda (c) (char->integer (char-downcase c)))
|
|
|
|
|
bound start end)))))
|
|
|
|
|
|
|
|
|
|
;;; Case hacking
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-upcase s [start end]
|
|
|
|
|
;;; string-upcase! s [start end]
|
|
|
|
|
;;; string-downcase s [start end]
|
|
|
|
|
;;; string-downcase! s [start end]
|
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-titlecase s [start end]
|
|
|
|
|
;;; string-titlecase! s [start end]
|
|
|
|
|
;;; Capitalize every contiguous alpha sequence: capitalise
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; first char, lowercase rest.
|
|
|
|
|
|
|
|
|
|
(define (string-upcase s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-upcase s maybe-start+end
|
|
|
|
|
(%string-map char-upcase s start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-upcase! s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-upcase! s maybe-start+end
|
|
|
|
|
(%string-map! char-upcase s start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-downcase s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-downcase s maybe-start+end
|
|
|
|
|
(%string-map char-downcase s start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-downcase! s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-downcase! s maybe-start+end
|
|
|
|
|
(%string-map! char-downcase s start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%string-titlecase! s start end)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(cond ((string-index s char-cased? i end) =>
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lambda (i)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(string-set! s i (char-titlecase (string-ref s i)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((i1 (+ i 1)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(cond ((string-skip s char-cased? i1 end) =>
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lambda (j)
|
|
|
|
|
(string-downcase! s i1 j)
|
|
|
|
|
(lp (+ j 1))))
|
|
|
|
|
(else (string-downcase! s i1 end)))))))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-titlecase! s . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-titlecase! s maybe-start+end
|
|
|
|
|
(%string-titlecase! s start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-titlecase s . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-titlecase! s maybe-start+end
|
|
|
|
|
(let ((ans (substring s start end)))
|
|
|
|
|
(%string-titlecase! ans 0 (- end start))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
ans)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Cutting & pasting strings
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; string-take string nchars
|
|
|
|
|
;;; string-drop string nchars
|
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-take-right string nchars
|
|
|
|
|
;;; string-drop-right string nchars
|
|
|
|
|
;;;
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-pad string k [char start end]
|
|
|
|
|
;;; string-pad-right string k [char start end]
|
|
|
|
|
;;;
|
|
|
|
|
;;; string-trim string [char/char-set/pred start end]
|
|
|
|
|
;;; string-trim-right string [char/char-set/pred start end]
|
|
|
|
|
;;; string-trim-both string [char/char-set/pred start end]
|
|
|
|
|
;;;
|
|
|
|
|
;;; These trimmers invert the char-set meaning from MIT Scheme -- you
|
|
|
|
|
;;; say what you want to trim.
|
|
|
|
|
|
|
|
|
|
(define (string-take s n)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg string? s string-take)
|
|
|
|
|
(check-arg (lambda (val) (and (integer? n) (exact? n)
|
|
|
|
|
(<= 0 n (string-length s))))
|
|
|
|
|
n string-take)
|
|
|
|
|
(%substring/shared s 0 n))
|
|
|
|
|
|
|
|
|
|
(define (string-take-right s n)
|
|
|
|
|
(check-arg string? s string-take-right)
|
|
|
|
|
(let ((len (string-length s)))
|
|
|
|
|
(check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
|
|
|
|
|
n string-take-right)
|
|
|
|
|
(%substring/shared s (- len n) len)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-drop s n)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg string? s string-drop)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((len (string-length s)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
|
|
|
|
|
n string-drop)
|
|
|
|
|
(%substring/shared s n len)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-drop-right s n)
|
|
|
|
|
(check-arg string? s string-drop-right)
|
|
|
|
|
(let ((len (string-length s)))
|
|
|
|
|
(check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
|
|
|
|
|
n string-drop-right)
|
|
|
|
|
(%substring/shared s 0 (- len n))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (string-trim s . criterion+start+end)
|
|
|
|
|
(let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
|
|
|
|
|
(let-string-start+end (start end) string-trim s rest
|
|
|
|
|
(cond ((string-skip s criterion start end) =>
|
|
|
|
|
(lambda (i) (%substring/shared s i end)))
|
|
|
|
|
(else "")))))
|
|
|
|
|
|
|
|
|
|
(define (string-trim-right s . criterion+start+end)
|
|
|
|
|
(let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
|
|
|
|
|
(let-string-start+end (start end) string-trim-right s rest
|
|
|
|
|
(cond ((string-skip-right s criterion start end) =>
|
|
|
|
|
(lambda (i) (%substring/shared s 0 (+ 1 i))))
|
|
|
|
|
(else "")))))
|
|
|
|
|
|
|
|
|
|
(define (string-trim-both s . criterion+start+end)
|
|
|
|
|
(let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
|
|
|
|
|
(let-string-start+end (start end) string-trim-both s rest
|
|
|
|
|
(cond ((string-skip s criterion start end) =>
|
|
|
|
|
(lambda (i)
|
|
|
|
|
(%substring/shared s i (+ 1 (string-skip-right s criterion i end)))))
|
|
|
|
|
(else "")))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (string-pad-right s n . char+start+end)
|
|
|
|
|
(let-optionals* char+start+end ((char #\space (char? char)) rest)
|
|
|
|
|
(let-string-start+end (start end) string-pad-right s rest
|
|
|
|
|
(check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n)))
|
|
|
|
|
n string-pad-right)
|
|
|
|
|
(let ((len (- end start)))
|
|
|
|
|
(if (<= n len)
|
|
|
|
|
(%substring/shared s start (+ start n))
|
|
|
|
|
(let ((ans (make-string n char)))
|
|
|
|
|
(%string-copy! ans 0 s start end)
|
|
|
|
|
ans))))))
|
|
|
|
|
|
|
|
|
|
(define (string-pad s n . char+start+end)
|
|
|
|
|
(let-optionals* char+start+end ((char #\space (char? char)) rest)
|
|
|
|
|
(let-string-start+end (start end) string-pad s rest
|
|
|
|
|
(check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n)))
|
|
|
|
|
n string-pad)
|
|
|
|
|
(let ((len (- end start)))
|
|
|
|
|
(if (<= n len)
|
|
|
|
|
(%substring/shared s (- end n) end)
|
|
|
|
|
(let ((ans (make-string n char)))
|
|
|
|
|
(%string-copy! ans (- n len) s start end)
|
|
|
|
|
ans))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Filtering strings
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; string-delete char/char-set/pred string [start end]
|
|
|
|
|
;;; string-filter char/char-set/pred string [start end]
|
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; If the criterion is a char or char-set, we scan the string twice with
|
|
|
|
|
;;; string-fold -- once to determine the length of the result string,
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; and once to do the filtered copy.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; If the criterion is a predicate, we don't do this double-scan strategy,
|
|
|
|
|
;;; because the predicate might have side-effects or be very expensive to
|
|
|
|
|
;;; compute. So we preallocate a temp buffer pessimistically, and only do
|
|
|
|
|
;;; one scan over S. This is likely to be faster and more space-efficient
|
|
|
|
|
;;; than consing a list.
|
|
|
|
|
|
|
|
|
|
(define (string-delete criterion s . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-delete s maybe-start+end
|
|
|
|
|
(if (procedure? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((slen (- end start))
|
|
|
|
|
(temp (make-string slen))
|
|
|
|
|
(ans-len (string-fold (lambda (c i)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (criterion c) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(begin (string-set! temp i c)
|
|
|
|
|
(+ i 1))))
|
|
|
|
|
0 s start end)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (= ans-len slen) temp (substring temp 0 ans-len)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let* ((cset (cond ((char-set? criterion) criterion)
|
|
|
|
|
((char? criterion) (char-set criterion))
|
|
|
|
|
(else (error "string-delete criterion not predicate, char or char-set" criterion))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(len (string-fold (lambda (c i) (if (char-set-contains? cset c)
|
|
|
|
|
i
|
|
|
|
|
(+ i 1)))
|
|
|
|
|
0 s start end))
|
|
|
|
|
(ans (make-string len)))
|
|
|
|
|
(string-fold (lambda (c i) (if (char-set-contains? cset c)
|
|
|
|
|
i
|
|
|
|
|
(begin (string-set! ans i c)
|
|
|
|
|
(+ i 1))))
|
|
|
|
|
0 s start end)
|
|
|
|
|
ans))))
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-filter criterion s . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-filter s maybe-start+end
|
|
|
|
|
(if (procedure? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((slen (- end start))
|
|
|
|
|
(temp (make-string slen))
|
|
|
|
|
(ans-len (string-fold (lambda (c i)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (criterion c)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(begin (string-set! temp i c)
|
|
|
|
|
(+ i 1))
|
|
|
|
|
i))
|
|
|
|
|
0 s start end)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (= ans-len slen) temp (substring temp 0 ans-len)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let* ((cset (cond ((char-set? criterion) criterion)
|
|
|
|
|
((char? criterion) (char-set criterion))
|
|
|
|
|
(else (error "string-delete criterion not predicate, char or char-set" criterion))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(len (string-fold (lambda (c i) (if (char-set-contains? cset c)
|
|
|
|
|
(+ i 1)
|
|
|
|
|
i))
|
|
|
|
|
0 s start end))
|
|
|
|
|
(ans (make-string len)))
|
|
|
|
|
(string-fold (lambda (c i) (if (char-set-contains? cset c)
|
|
|
|
|
(begin (string-set! ans i c)
|
|
|
|
|
(+ i 1))
|
|
|
|
|
i))
|
|
|
|
|
0 s start end)
|
|
|
|
|
ans))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; String search
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; string-index string char/char-set/pred [start end]
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-index-right string char/char-set/pred [start end]
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-skip string char/char-set/pred [start end]
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-skip-right string char/char-set/pred [start end]
|
|
|
|
|
;;; string-count string char/char-set/pred [start end]
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; There's a lot of replicated code here for efficiency.
|
|
|
|
|
;;; For example, the char/char-set/pred discrimination has
|
|
|
|
|
;;; been lifted above the inner loop of each proc.
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-index str criterion . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-index str maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char=? criterion (string-ref str i)) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (+ i 1))))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((char-set? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char-set-contains? criterion (string-ref str i)) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (+ i 1))))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((procedure? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (criterion (string-ref str i)) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (+ i 1))))))
|
|
|
|
|
(else (error "Second param is neither char-set, char, or predicate procedure."
|
2001-03-21 17:28:31 -05:00
|
|
|
|
string-index criterion)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-index-right str criterion . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-index-right str maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i (- end 1)))
|
|
|
|
|
(and (>= i 0)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char=? criterion (string-ref str i)) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (- i 1))))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((char-set? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i (- end 1)))
|
|
|
|
|
(and (>= i 0)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char-set-contains? criterion (string-ref str i)) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (- i 1))))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((procedure? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i (- end 1)))
|
|
|
|
|
(and (>= i 0)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (criterion (string-ref str i)) i
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (- i 1))))))
|
|
|
|
|
(else (error "Second param is neither char-set, char, or predicate procedure."
|
2001-03-21 17:28:31 -05:00
|
|
|
|
string-index-right criterion)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-skip str criterion . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-skip str maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char=? criterion (string-ref str i))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (+ i 1))
|
|
|
|
|
i))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((char-set? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char-set-contains? criterion (string-ref str i))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (+ i 1))
|
|
|
|
|
i))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((procedure? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i start))
|
|
|
|
|
(and (< i end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (criterion (string-ref str i)) (lp (+ i 1))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
i))))
|
|
|
|
|
(else (error "Second param is neither char-set, char, or predicate procedure."
|
2001-03-21 17:28:31 -05:00
|
|
|
|
string-skip criterion)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-skip-right str criterion . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-skip-right str maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i (- end 1)))
|
|
|
|
|
(and (>= i 0)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char=? criterion (string-ref str i))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (- i 1))
|
|
|
|
|
i))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((char-set? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i (- end 1)))
|
|
|
|
|
(and (>= i 0)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (char-set-contains? criterion (string-ref str i))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(lp (- i 1))
|
|
|
|
|
i))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((procedure? criterion)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let lp ((i (- end 1)))
|
|
|
|
|
(and (>= i 0)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (criterion (string-ref str i)) (lp (- i 1))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
i))))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(else (error "CRITERION param is neither char-set or char."
|
|
|
|
|
string-skip-right criterion)))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (string-count s criterion . maybe-start+end)
|
|
|
|
|
(let-string-start+end (start end) string-count s maybe-start+end
|
|
|
|
|
(cond ((char? criterion)
|
|
|
|
|
(do ((i start (+ i 1))
|
|
|
|
|
(count 0 (if (char=? criterion (string-ref s i))
|
|
|
|
|
(+ count 1)
|
|
|
|
|
count)))
|
|
|
|
|
((>= i end) count)))
|
|
|
|
|
|
|
|
|
|
((char-set? criterion)
|
|
|
|
|
(do ((i start (+ i 1))
|
|
|
|
|
(count 0 (if (char-set-contains? criterion (string-ref s i))
|
|
|
|
|
(+ count 1)
|
|
|
|
|
count)))
|
|
|
|
|
((>= i end) count)))
|
|
|
|
|
|
|
|
|
|
((procedure? criterion)
|
|
|
|
|
(do ((i start (+ i 1))
|
|
|
|
|
(count 0 (if (criterion (string-ref s i)) (+ count 1) count)))
|
|
|
|
|
((>= i end) count)))
|
|
|
|
|
|
|
|
|
|
(else (error "CRITERION param is neither char-set or char."
|
|
|
|
|
string-count criterion)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; string-fill! string char [start end]
|
|
|
|
|
;;;
|
|
|
|
|
;;; string-copy! to tstart from [fstart fend]
|
|
|
|
|
;;; Guaranteed to work, even if s1 eq s2.
|
|
|
|
|
|
|
|
|
|
(define (string-fill! s char . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg char? char string-fill!)
|
|
|
|
|
(let-string-start+end (start end) string-fill! s maybe-start+end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(do ((i (- end 1) (- i 1)))
|
|
|
|
|
((< i start))
|
|
|
|
|
(string-set! s i char))))
|
|
|
|
|
|
|
|
|
|
(define (string-copy! to tstart from . maybe-fstart+fend)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend
|
|
|
|
|
(check-arg integer? tstart string-copy!)
|
|
|
|
|
(check-substring-spec string-copy! to tstart (+ tstart (- fend fstart)))
|
|
|
|
|
(%string-copy! to tstart from fstart fend)))
|
|
|
|
|
|
|
|
|
|
;;; Library-internal routine
|
|
|
|
|
(define (%string-copy! to tstart from fstart fend)
|
|
|
|
|
(if (> fstart tstart)
|
|
|
|
|
(do ((i fstart (+ i 1))
|
|
|
|
|
(j tstart (+ j 1)))
|
|
|
|
|
((>= i fend))
|
|
|
|
|
(string-set! to j (string-ref from i)))
|
|
|
|
|
|
|
|
|
|
(do ((i (- fend 1) (- i 1))
|
|
|
|
|
(j (+ -1 tstart (- fend fstart)) (- j 1)))
|
|
|
|
|
((< i fstart))
|
|
|
|
|
(string-set! to j (string-ref from i)))))
|
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Returns starting-position in STRING or #f if not true.
|
|
|
|
|
;;; This implementation is slow & simple. It is useful as a "spec" or for
|
|
|
|
|
;;; comparison testing with fancier implementations.
|
|
|
|
|
;;; See below for fast KMP version.
|
|
|
|
|
|
|
|
|
|
;(define (string-contains string substring . maybe-starts+ends)
|
|
|
|
|
; (let-string-start+end2 (start1 end1 start2 end2)
|
|
|
|
|
; string-contains string substring maybe-starts+ends
|
|
|
|
|
; (let* ((len (- end2 start2))
|
|
|
|
|
; (i-bound (- end1 len)))
|
|
|
|
|
; (let lp ((i start1))
|
|
|
|
|
; (and (< i i-bound)
|
|
|
|
|
; (if (string= string substring i (+ i len) start2 end2)
|
|
|
|
|
; i
|
|
|
|
|
; (lp (+ i 1))))))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Searching for an occurrence of a substring
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
|
|
|
|
(define (string-contains text pattern . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (t-start t-end p-start p-end)
|
|
|
|
|
string-contains text pattern maybe-starts+ends
|
|
|
|
|
(%kmp-search pattern text char=? p-start p-end t-start t-end)))
|
|
|
|
|
|
|
|
|
|
(define (string-contains-ci text pattern . maybe-starts+ends)
|
|
|
|
|
(let-string-start+end2 (t-start t-end p-start p-end)
|
|
|
|
|
string-contains-ci text pattern maybe-starts+ends
|
|
|
|
|
(%kmp-search pattern text char-ci=? p-start p-end t-start t-end)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Knuth-Morris-Pratt string searching
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; See
|
|
|
|
|
;;; "Fast pattern matching in strings"
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; SIAM J. Computing 6(2):323-350 1977
|
|
|
|
|
;;; D. E. Knuth, J. H. Morris and V. R. Pratt
|
|
|
|
|
;;; also described in
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; "Pattern matching in strings"
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; Alfred V. Aho
|
|
|
|
|
;;; Formal Language Theory - Perspectives and Open Problems
|
|
|
|
|
;;; Ronald V. Brook (editor)
|
|
|
|
|
;;; This algorithm is O(m + n) where m and n are the
|
|
|
|
|
;;; lengths of the pattern and string respectively
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; KMP search source[start,end) for PATTERN. Return starting index of
|
|
|
|
|
;;; leftmost match or #f.
|
|
|
|
|
|
|
|
|
|
(define (%kmp-search pattern text c= p-start p-end t-start t-end)
|
|
|
|
|
(let ((plen (- p-end p-start))
|
|
|
|
|
(rv (make-kmp-restart-vector pattern c= p-start p-end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;; The search loop. TJ & PJ are redundant state.
|
|
|
|
|
(let lp ((ti t-start) (pi 0)
|
|
|
|
|
(tj (- t-end t-start)) ; (- tlen ti) -- how many chars left.
|
|
|
|
|
(pj plen)) ; (- plen pi) -- how many chars left.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(if (= pi plen) (- ti plen) ; Win.
|
|
|
|
|
|
|
|
|
|
(and (<= pj tj) ; Lose.
|
|
|
|
|
|
|
|
|
|
(if (c= (string-ref text ti) ; Search.
|
|
|
|
|
(string-ref pattern (+ p-start pi)))
|
|
|
|
|
(lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance.
|
|
|
|
|
|
|
|
|
|
(let ((pi (vector-ref rv pi))) ; Retreat.
|
|
|
|
|
(if (= pi -1)
|
|
|
|
|
(lp (+ ti 1) 0 (- tj 1) plen) ; Punt.
|
|
|
|
|
(lp ti pi tj (- plen pi))))))))))
|
|
|
|
|
|
|
|
|
|
;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Compute the KMP restart vector RV for string PATTERN. If
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; we have matched chars 0..i-1 of PATTERN against a search string S, and
|
|
|
|
|
;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to
|
|
|
|
|
;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k].
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
|
|
|
|
;;; In other words, if you have matched the first i chars of PATTERN, but
|
|
|
|
|
;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest
|
|
|
|
|
;;; prefix of PATTERN is that you have matched.
|
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; - C= (default CHAR=?) is used to compare characters for equality.
|
|
|
|
|
;;; Pass in CHAR-CI=? for case-folded string search.
|
|
|
|
|
;;;
|
|
|
|
|
;;; - START & END restrict the pattern to the indicated substring; the
|
|
|
|
|
;;; returned vector will be of length END - START. The numbers stored
|
|
|
|
|
;;; in the vector will be values in the range [0,END-START) -- that is,
|
|
|
|
|
;;; they are valid indices into the restart vector; you have to add START
|
|
|
|
|
;;; to them to use them as indices into PATTERN.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;
|
|
|
|
|
;;; I've split this out as a separate function in case other constant-string
|
|
|
|
|
;;; searchers might want to use it.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;;
|
|
|
|
|
;;; E.g.:
|
|
|
|
|
;;; a b d a b x
|
|
|
|
|
;;; #(-1 0 0 -1 1 2)
|
|
|
|
|
|
|
|
|
|
(define (make-kmp-restart-vector pattern . maybe-c=+start+end)
|
|
|
|
|
(let-optionals* maybe-c=+start+end
|
|
|
|
|
((c= char=? (procedure? c=))
|
|
|
|
|
((start end) (lambda (args)
|
|
|
|
|
(string-parse-start+end make-kmp-restart-vector
|
|
|
|
|
pattern args))))
|
|
|
|
|
(let* ((rvlen (- end start))
|
|
|
|
|
(rv (make-vector rvlen -1)))
|
|
|
|
|
(if (> rvlen 0)
|
|
|
|
|
(let ((rvlen-1 (- rvlen 1))
|
|
|
|
|
(c0 (string-ref pattern start)))
|
|
|
|
|
|
|
|
|
|
;; Here's the main loop. We have set rv[0] ... rv[i].
|
|
|
|
|
;; K = I + START -- it is the corresponding index into PATTERN.
|
|
|
|
|
(let lp1 ((i 0) (j -1) (k start))
|
|
|
|
|
(if (< i rvlen-1)
|
|
|
|
|
|
|
|
|
|
(let ((ck (string-ref pattern k)))
|
|
|
|
|
;; lp2 invariant:
|
|
|
|
|
;; pat[(k-j) .. k-1] matches pat[start .. start+j-1]
|
|
|
|
|
;; or j = -1.
|
|
|
|
|
(let lp2 ((j j))
|
|
|
|
|
|
|
|
|
|
(cond ((= j -1)
|
|
|
|
|
(let ((i1 (+ i 1)))
|
|
|
|
|
(vector-set! rv i1 (if (c= ck c0) -1 0))
|
|
|
|
|
(lp1 i1 0 (+ k 1))))
|
|
|
|
|
|
|
|
|
|
;; pat[(k-j) .. k] matches pat[start..start+j].
|
|
|
|
|
((c= ck (string-ref pattern (+ j start)))
|
|
|
|
|
(let* ((i1 (+ 1 i))
|
|
|
|
|
(j1 (+ 1 j)))
|
|
|
|
|
(vector-set! rv i1 j1)
|
|
|
|
|
(lp1 i1 j1 (+ k 1))))
|
|
|
|
|
|
|
|
|
|
(else (lp2 (vector-ref rv j))))))))))
|
|
|
|
|
rv)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; We've matched I chars from PAT. C is the next char from the search string.
|
|
|
|
|
;;; Return the new I after handling C.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START
|
|
|
|
|
;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched
|
|
|
|
|
;;; are
|
|
|
|
|
;;; PAT[PAT-START .. PAT-START + I].
|
|
|
|
|
;;;
|
|
|
|
|
;;; It's *not* an oversight that there is no friendly error checking or
|
|
|
|
|
;;; defaulting of arguments. This is a low-level, inner-loop procedure
|
|
|
|
|
;;; that we want integrated/inlined into the point of call.
|
|
|
|
|
|
|
|
|
|
(define (kmp-step pat rv c i c= p-start)
|
|
|
|
|
(let lp ((i i))
|
|
|
|
|
(if (c= c (string-ref pat (+ i p-start))) ; Match =>
|
|
|
|
|
(+ i 1) ; Done.
|
|
|
|
|
(let ((i (vector-ref rv i))) ; Back up in PAT.
|
|
|
|
|
(if (= i -1) 0 ; Can't back up further.
|
|
|
|
|
(lp i)))))) ; Keep trying for match.
|
|
|
|
|
|
|
|
|
|
;;; Zip through S[start,end), looking for a match of PAT. Assume we've
|
|
|
|
|
;;; already matched the first I chars of PAT when we commence at S[start].
|
|
|
|
|
;;; - <0: If we find a match *ending* at index J, return -J.
|
|
|
|
|
;;; - >=0: If we get to the end of the S[start,end) span without finding
|
|
|
|
|
;;; a complete match, return the number of chars from PAT we'd matched
|
|
|
|
|
;;; when we ran off the end.
|
|
|
|
|
;;;
|
|
|
|
|
;;; This is useful for searching *across* buffers -- that is, when your
|
|
|
|
|
;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop
|
|
|
|
|
;;; for speed.
|
|
|
|
|
|
|
|
|
|
(define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end)
|
|
|
|
|
(check-arg vector? rv string-kmp-partial-search)
|
|
|
|
|
(let-optionals* c=+p-start+s-start+s-end
|
|
|
|
|
((c= char=? (procedure? c=))
|
|
|
|
|
(p-start 0 (and (integer? p-start) (exact? p-start) (<= 0 p-start)))
|
|
|
|
|
((s-start s-end) (lambda (args)
|
|
|
|
|
(string-parse-start+end string-kmp-partial-search
|
|
|
|
|
s args))))
|
|
|
|
|
(let ((patlen (vector-length rv)))
|
|
|
|
|
(check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i) (< i patlen)))
|
|
|
|
|
i string-kmp-partial-search)
|
|
|
|
|
|
|
|
|
|
;; Enough prelude. Here's the actual code.
|
|
|
|
|
(let lp ((si s-start) ; An index into S.
|
|
|
|
|
(vi i)) ; An index into RV.
|
|
|
|
|
(cond ((= vi patlen) (- si)) ; Win.
|
|
|
|
|
((= si s-end) vi) ; Ran off the end.
|
|
|
|
|
(else ; Match s[si] & loop.
|
|
|
|
|
(let ((c (string-ref s si)))
|
|
|
|
|
(lp (+ si 1)
|
|
|
|
|
(let lp2 ((vi vi)) ; This is just KMP-STEP.
|
|
|
|
|
(if (c= c (string-ref pat (+ vi p-start)))
|
|
|
|
|
(+ vi 1)
|
|
|
|
|
(let ((vi (vector-ref rv vi)))
|
|
|
|
|
(if (= vi -1) 0
|
|
|
|
|
(lp2 vi)))))))))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Misc
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; (string-null? s)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; (string-reverse s [start end])
|
|
|
|
|
;;; (string-reverse! s [start end])
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; (reverse-list->string clist)
|
|
|
|
|
;;; (string->list s [start end])
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string-null? s) (zero? (string-length s)))
|
|
|
|
|
|
|
|
|
|
(define (string-reverse s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-reverse s maybe-start+end
|
|
|
|
|
(let* ((len (- end start))
|
|
|
|
|
(ans (make-string len)))
|
|
|
|
|
(do ((i start (+ i 1))
|
|
|
|
|
(j (- len 1) (- j 1)))
|
|
|
|
|
((< j 0))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(string-set! ans j (string-ref s i)))
|
|
|
|
|
ans)))
|
|
|
|
|
|
|
|
|
|
(define (string-reverse! s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-reverse! s maybe-start+end
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(do ((i (- end 1) (- i 1))
|
|
|
|
|
(j start (+ j 1)))
|
|
|
|
|
((<= i j))
|
|
|
|
|
(let ((ci (string-ref s i)))
|
|
|
|
|
(string-set! s i (string-ref s j))
|
|
|
|
|
(string-set! s j ci)))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (reverse-list->string clist)
|
|
|
|
|
(let* ((len (length clist))
|
|
|
|
|
(s (make-string len)))
|
|
|
|
|
(do ((i (- len 1) (- i 1)) (clist clist (cdr clist)))
|
|
|
|
|
((not (pair? clist)))
|
|
|
|
|
(string-set! s i (car clist)))
|
|
|
|
|
s))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;(define (string->list s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
; (apply string-fold-right cons '() s maybe-start+end))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(define (string->list s . maybe-start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string->list s maybe-start+end
|
|
|
|
|
(do ((i (- end 1) (- i 1))
|
|
|
|
|
(ans '() (cons (string-ref s i) ans)))
|
|
|
|
|
((< i start) ans))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Defined by R5RS, so commented out here.
|
|
|
|
|
;(define (list->string lis) (string-unfold null? car cdr lis))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; string-concatenate string-list -> string
|
|
|
|
|
;;; string-concatenate/shared string-list -> string
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; string-append/shared s ... -> string
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; STRING-APPEND/SHARED has license to return a string that shares storage
|
|
|
|
|
;;; with any of its arguments. In particular, if there is only one non-empty
|
|
|
|
|
;;; string amongst its parameters, it is permitted to return that string as
|
|
|
|
|
;;; its result. STRING-APPEND, by contrast, always allocates new storage.
|
|
|
|
|
;;;
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of
|
|
|
|
|
;;; strings, which they concatenate into a result string. STRING-CONCATENATE
|
|
|
|
|
;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may
|
|
|
|
|
;;; not) return a result that shares storage with any of its arguments. In
|
|
|
|
|
;;; particular, if it is applied to a singleton list, it is permitted to
|
|
|
|
|
;;; return the car of that list as its value.
|
|
|
|
|
|
|
|
|
|
(define (string-append/shared . strings) (string-concatenate/shared strings))
|
|
|
|
|
|
|
|
|
|
(define (string-concatenate/shared strings)
|
|
|
|
|
(let lp ((strings strings) (nchars 0) (first #f))
|
|
|
|
|
(cond ((pair? strings) ; Scan the args, add up total
|
|
|
|
|
(let* ((string (car strings)) ; length, remember 1st
|
|
|
|
|
(tail (cdr strings)) ; non-empty string.
|
|
|
|
|
(slen (string-length string)))
|
|
|
|
|
(if (zero? slen)
|
|
|
|
|
(lp tail nchars first)
|
|
|
|
|
(lp tail (+ nchars slen) (or first strings)))))
|
|
|
|
|
|
|
|
|
|
((zero? nchars) "")
|
|
|
|
|
|
|
|
|
|
;; Just one non-empty string! Return it.
|
|
|
|
|
((= nchars (string-length (car first))) (car first))
|
|
|
|
|
|
|
|
|
|
(else (let ((ans (make-string nchars)))
|
|
|
|
|
(let lp ((strings first) (i 0))
|
|
|
|
|
(if (pair? strings)
|
|
|
|
|
(let* ((s (car strings))
|
|
|
|
|
(slen (string-length s)))
|
|
|
|
|
(%string-copy! ans i s 0 slen)
|
|
|
|
|
(lp (cdr strings) (+ i slen)))))
|
|
|
|
|
ans)))))
|
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
; Alas, Scheme 48's APPLY blows up if you have many, many arguments.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;(define (string-concatenate strings) (apply string-append strings))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;;; Here it is written out. I avoid using REDUCE to add up string lengths
|
|
|
|
|
;;; to avoid non-R5RS dependencies.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (string-concatenate strings)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((total (do ((strings strings (cdr strings))
|
|
|
|
|
(i 0 (+ i (string-length (car strings)))))
|
|
|
|
|
((not (pair? strings)) i)))
|
|
|
|
|
(ans (make-string total)))
|
|
|
|
|
(let lp ((i 0) (strings strings))
|
|
|
|
|
(if (pair? strings)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let* ((s (car strings))
|
|
|
|
|
(slen (string-length s)))
|
|
|
|
|
(%string-copy! ans i s 0 slen)
|
|
|
|
|
(lp (+ i slen) (cdr strings)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
ans))
|
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Defined by R5RS, so commented out here.
|
|
|
|
|
;(define (string-append . strings) (string-concatenate strings))
|
|
|
|
|
|
|
|
|
|
;;; string-concatenate-reverse string-list [final-string end] -> string
|
|
|
|
|
;;; string-concatenate-reverse/shared string-list [final-string end] -> string
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Return
|
|
|
|
|
;;; (string-concatenate
|
|
|
|
|
;;; (reverse
|
|
|
|
|
;;; (cons (substring final-string 0 end) string-list)))
|
|
|
|
|
|
|
|
|
|
(define (string-concatenate-reverse string-list . maybe-final+end)
|
|
|
|
|
(let-optionals* maybe-final+end ((final "" (string? final))
|
|
|
|
|
(end (string-length final)
|
|
|
|
|
(and (integer? end)
|
|
|
|
|
(exact? end)
|
|
|
|
|
(<= 0 end (string-length final)))))
|
|
|
|
|
(let ((len (let lp ((sum 0) (lis string-list))
|
|
|
|
|
(if (pair? lis)
|
|
|
|
|
(lp (+ sum (string-length (car lis))) (cdr lis))
|
|
|
|
|
sum))))
|
|
|
|
|
|
|
|
|
|
(%finish-string-concatenate-reverse len string-list final end))))
|
|
|
|
|
|
|
|
|
|
(define (string-concatenate-reverse/shared string-list . maybe-final+end)
|
|
|
|
|
(let-optionals* maybe-final+end ((final "" (string? final))
|
|
|
|
|
(end (string-length final)
|
|
|
|
|
(and (integer? end)
|
|
|
|
|
(exact? end)
|
|
|
|
|
(<= 0 end (string-length final)))))
|
|
|
|
|
;; Add up the lengths of all the strings in STRING-LIST; also get a
|
|
|
|
|
;; pointer NZLIST into STRING-LIST showing where the first non-zero-length
|
|
|
|
|
;; string starts.
|
|
|
|
|
(let lp ((len 0) (nzlist #f) (lis string-list))
|
|
|
|
|
(if (pair? lis)
|
|
|
|
|
(let ((slen (string-length (car string-list))))
|
|
|
|
|
(lp (+ len slen)
|
|
|
|
|
(if (or nzlist (zero? slen)) nzlist lis)
|
|
|
|
|
(cdr lis)))
|
|
|
|
|
|
|
|
|
|
(cond ((zero? len) (substring/shared final 0 end))
|
|
|
|
|
|
|
|
|
|
;; LEN > 0, so NZLIST is non-empty.
|
|
|
|
|
|
|
|
|
|
((and (zero? end) (= len (string-length (car nzlist))))
|
|
|
|
|
(car nzlist))
|
|
|
|
|
|
|
|
|
|
(else (%finish-string-concatenate-reverse len nzlist final end)))))))
|
|
|
|
|
|
|
|
|
|
(define (%finish-string-concatenate-reverse len string-list final end)
|
|
|
|
|
(let ((ans (make-string (+ end len))))
|
|
|
|
|
(%string-copy! ans len final 0 end)
|
|
|
|
|
(let lp ((i len) (lis string-list))
|
|
|
|
|
(if (pair? lis)
|
|
|
|
|
(let* ((s (car lis))
|
|
|
|
|
(lis (cdr lis))
|
|
|
|
|
(slen (string-length s))
|
|
|
|
|
(i (- i slen)))
|
|
|
|
|
(%string-copy! ans i s 0 slen)
|
|
|
|
|
(lp i lis))))
|
|
|
|
|
ans))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; string-replace s1 s2 start1 end1 [start2 end2] -> string
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Replace S1[START1,END1) with S2[START2,END2).
|
|
|
|
|
|
|
|
|
|
(define (string-replace s1 s2 start1 end1 . maybe-start+end)
|
|
|
|
|
(check-substring-spec string-replace s1 start1 end1)
|
|
|
|
|
(let-string-start+end (start2 end2) string-replace s2 maybe-start+end
|
|
|
|
|
(let* ((slen1 (string-length s1))
|
|
|
|
|
(sublen2 (- end2 start2))
|
|
|
|
|
(alen (+ (- slen1 (- end1 start1)) sublen2))
|
|
|
|
|
(ans (make-string alen)))
|
|
|
|
|
(%string-copy! ans 0 s1 0 start1)
|
|
|
|
|
(%string-copy! ans start1 s2 start2 end2)
|
|
|
|
|
(%string-copy! ans (+ start1 sublen2) s1 end1 slen1)
|
|
|
|
|
ans)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; string-tokenize s [token-set start end] -> list
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Break S up into a list of token strings, where a token is a maximal
|
|
|
|
|
;;; non-empty contiguous sequence of chars belonging to TOKEN-SET.
|
|
|
|
|
;;; (string-tokenize "hello, world") => ("hello," "world")
|
|
|
|
|
|
|
|
|
|
(define (string-tokenize s . token-chars+start+end)
|
|
|
|
|
(let-optionals* token-chars+start+end
|
|
|
|
|
((token-chars char-set:graphic (char-set? token-chars)) rest)
|
|
|
|
|
(let-string-start+end (start end) string-tokenize s rest
|
|
|
|
|
(let lp ((i end) (ans '()))
|
|
|
|
|
(cond ((and (< start i) (string-index-right s token-chars start i)) =>
|
|
|
|
|
(lambda (tend-1)
|
|
|
|
|
(let ((tend (+ 1 tend-1)))
|
|
|
|
|
(cond ((string-skip-right s token-chars start tend-1) =>
|
|
|
|
|
(lambda (tstart-1)
|
|
|
|
|
(lp tstart-1
|
|
|
|
|
(cons (substring s (+ 1 tstart-1) tend)
|
|
|
|
|
ans))))
|
|
|
|
|
(else (cons (substring s start tend) ans))))))
|
|
|
|
|
(else ans))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; xsubstring s from [to start end] -> string
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; S is a string; START and END are optional arguments that demarcate
|
|
|
|
|
;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole
|
|
|
|
|
;;; string). Replicate this substring up and down index space, in both the
|
|
|
|
|
;; positive and negative directions. For example, if S = "abcdefg", START=3,
|
|
|
|
|
;;; and END=6, then we have the conceptual bidirectionally-infinite string
|
|
|
|
|
;;; ... d e f d e f d e f d e f d e f d e f d e f ...
|
|
|
|
|
;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ...
|
|
|
|
|
;;; XSUBSTRING returns the substring of this string beginning at index FROM,
|
|
|
|
|
;;; and ending at TO (which defaults to FROM+(END-START)).
|
|
|
|
|
;;;
|
|
|
|
|
;;; You can use XSUBSTRING in many ways:
|
|
|
|
|
;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab"
|
|
|
|
|
;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd"
|
|
|
|
|
;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca"
|
|
|
|
|
;;;
|
|
|
|
|
;;; Note that
|
|
|
|
|
;;; - The FROM/TO indices give a half-open range -- the characters from
|
|
|
|
|
;;; index FROM up to, but not including index TO.
|
|
|
|
|
;;; - The FROM/TO indices are not in terms of the index space for string S.
|
|
|
|
|
;;; They are in terms of the replicated index space of the substring
|
|
|
|
|
;;; defined by S, START, and END.
|
|
|
|
|
;;;
|
|
|
|
|
;;; It is an error if START=END -- although this is allowed by special
|
|
|
|
|
;;; dispensation when FROM=TO.
|
|
|
|
|
|
|
|
|
|
(define (xsubstring s from . maybe-to+start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg (lambda (val) (and (integer? val) (exact? val)))
|
|
|
|
|
from xsubstring)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(receive (to start end)
|
|
|
|
|
(if (pair? maybe-to+start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end)
|
|
|
|
|
(let ((to (car maybe-to+start+end)))
|
|
|
|
|
(check-arg (lambda (val) (and (integer? val)
|
|
|
|
|
(exact? val)
|
|
|
|
|
(<= from val)))
|
|
|
|
|
to xsubstring)
|
|
|
|
|
(values to start end)))
|
|
|
|
|
(let ((slen (string-length (check-arg string? s xsubstring))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(values (+ from slen) 0 slen)))
|
|
|
|
|
(let ((slen (- end start))
|
|
|
|
|
(anslen (- to from)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(cond ((zero? anslen) "")
|
|
|
|
|
((zero? slen) (error "Cannot replicate empty (sub)string"
|
|
|
|
|
xsubstring s from to start end))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
((= 1 slen) ; Fast path for 1-char replication.
|
|
|
|
|
(make-string anslen (string-ref s start)))
|
|
|
|
|
|
|
|
|
|
;; Selected text falls entirely within one span.
|
|
|
|
|
((= (floor (/ from slen)) (floor (/ to slen)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(substring s (+ start (modulo from slen))
|
|
|
|
|
(+ start (modulo to slen))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;; Selected text requires multiple spans.
|
|
|
|
|
(else (let ((ans (make-string anslen)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(%multispan-repcopy! ans 0 s from to start end)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
ans))))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Exactly the same as xsubstring, but the extracted text is written
|
|
|
|
|
;;; into the string TARGET starting at index TSTART.
|
|
|
|
|
;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy
|
|
|
|
|
;;; a string on top of itself.
|
|
|
|
|
|
|
|
|
|
(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(check-arg (lambda (val) (and (integer? val) (exact? val)))
|
|
|
|
|
sfrom string-xcopy!)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(receive (sto start end)
|
|
|
|
|
(if (pair? maybe-sto+start+end)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end)
|
|
|
|
|
(let ((sto (car maybe-sto+start+end)))
|
|
|
|
|
(check-arg (lambda (val) (and (integer? val) (exact? val)))
|
|
|
|
|
sto string-xcopy!)
|
|
|
|
|
(values sto start end)))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let ((slen (string-length s)))
|
|
|
|
|
(values (+ sfrom slen) 0 slen)))
|
|
|
|
|
|
|
|
|
|
(let* ((tocopy (- sto sfrom))
|
|
|
|
|
(tend (+ tstart tocopy))
|
|
|
|
|
(slen (- end start)))
|
|
|
|
|
(check-substring-spec string-xcopy! target tstart tend)
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(cond ((zero? tocopy))
|
|
|
|
|
((zero? slen) (error "Cannot replicate empty (sub)string"
|
|
|
|
|
string-xcopy!
|
|
|
|
|
target tstart s sfrom sto start end))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
((= 1 slen) ; Fast path for 1-char replication.
|
|
|
|
|
(string-fill! target (string-ref s start) tstart tend))
|
|
|
|
|
|
|
|
|
|
;; Selected text falls entirely within one span.
|
|
|
|
|
((= (floor (/ sfrom slen)) (floor (/ sto slen)))
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(%string-copy! target tstart s
|
|
|
|
|
(+ start (modulo sfrom slen))
|
|
|
|
|
(+ start (modulo sto slen))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;; Multi-span copy.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(else (%multispan-repcopy! target tstart s sfrom sto start end))))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY!
|
|
|
|
|
;;; Internal -- not exported, no careful arg checking.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(define (%multispan-repcopy! target tstart s sfrom sto start end)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
(let* ((slen (- end start))
|
|
|
|
|
(i0 (+ start (modulo sfrom slen)))
|
|
|
|
|
(total-chars (- sto sfrom)))
|
|
|
|
|
|
|
|
|
|
;; Copy the partial span @ the beginning
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(%string-copy! target tstart s i0 end)
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
(let* ((ncopied (- end i0)) ; We've copied this many.
|
|
|
|
|
(nleft (- total-chars ncopied)) ; # chars left to copy.
|
|
|
|
|
(nspans (quotient nleft slen))) ; # whole spans to copy
|
|
|
|
|
|
|
|
|
|
;; Copy the whole spans in the middle.
|
|
|
|
|
(do ((i (+ tstart ncopied) (+ i slen)) ; Current target index.
|
|
|
|
|
(nspans nspans (- nspans 1))) ; # spans to copy
|
|
|
|
|
((zero? nspans)
|
|
|
|
|
;; Copy the partial-span @ the end & we're done.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(%string-copy! target i s start (+ start (- total-chars (- i tstart)))))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
(%string-copy! target i s start end))))); Copy a whole span.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; (string-join string-list [delimiter grammar]) => string
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; Paste strings together using the delimiter string.
|
|
|
|
|
;;;
|
|
|
|
|
;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz"
|
|
|
|
|
;;;
|
|
|
|
|
;;; DELIMITER defaults to a single space " "
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix}
|
|
|
|
|
;;; and defaults to 'infix.
|
|
|
|
|
;;;
|
|
|
|
|
;;; I could rewrite this more efficiently -- precompute the length of the
|
|
|
|
|
;;; answer string, then allocate & fill it in iteratively. Using
|
|
|
|
|
;;; STRING-CONCATENATE is less efficient.
|
|
|
|
|
|
|
|
|
|
(define (string-join strings . delim+grammar)
|
|
|
|
|
(let-optionals* delim+grammar ((delim " " (string? delim))
|
|
|
|
|
(grammar 'infix))
|
|
|
|
|
(let ((buildit (lambda (lis final)
|
|
|
|
|
(let recur ((lis lis))
|
|
|
|
|
(if (pair? lis)
|
|
|
|
|
(cons delim (cons (car lis) (recur (cdr lis))))
|
|
|
|
|
final)))))
|
|
|
|
|
|
|
|
|
|
(cond ((pair? strings)
|
|
|
|
|
(string-concatenate
|
|
|
|
|
(case grammar
|
|
|
|
|
|
|
|
|
|
((infix strict-infix)
|
|
|
|
|
(cons (car strings) (buildit (cdr strings) '())))
|
|
|
|
|
|
|
|
|
|
((prefix) (buildit strings '()))
|
|
|
|
|
|
|
|
|
|
((suffix)
|
|
|
|
|
(cons (car strings) (buildit (cdr strings) (list delim))))
|
|
|
|
|
|
|
|
|
|
(else (error "Illegal join grammar"
|
|
|
|
|
grammar string-join)))))
|
|
|
|
|
|
|
|
|
|
((not (null? strings))
|
|
|
|
|
(error "STRINGS parameter not list." strings string-join))
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;; STRINGS is ()
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
((eq? grammar 'strict-infix)
|
|
|
|
|
(error "Empty list cannot be joined with STRICT-INFIX grammar."
|
|
|
|
|
string-join))
|
|
|
|
|
|
|
|
|
|
(else ""))))) ; Special-cased for infix grammar.
|
1999-09-23 11:24:25 -04:00
|
|
|
|
|
|
|
|
|
|
2001-03-21 17:28:31 -05:00
|
|
|
|
;;; Porting & performance-tuning notes
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; See the section at the beginning of this file on external dependencies.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro.
|
|
|
|
|
;;; There are many, many optional arguments in this library; the complexity
|
|
|
|
|
;;; of parsing, defaulting & type-testing these parameters is handled with the
|
|
|
|
|
;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can
|
|
|
|
|
;;; rewrite the uses, port the hairy macro definition (which is implemented
|
|
|
|
|
;;; using a Clinger-Rees low-level explicit-renaming macro system), or port
|
|
|
|
|
;;; the simple, high-level definition, which is less efficient.
|
|
|
|
|
;;;
|
|
|
|
|
;;; There is a fair amount of argument checking. This is, strictly speaking,
|
|
|
|
|
;;; unnecessary -- the actual body of the procedures will blow up if, say, a
|
|
|
|
|
;;; START/END index is improper. However, the error message will not be as
|
|
|
|
|
;;; good as if the error were caught at the "higher level." Also, a very, very
|
|
|
|
|
;;; smart Scheme compiler may be able to exploit having the type checks done
|
|
|
|
|
;;; early, so that the actual body of the procedures can assume proper values.
|
|
|
|
|
;;; This isn't likely; this kind of compiler technology isn't common any
|
|
|
|
|
;;; longer.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The overhead of optional-argument parsing is irritating. The optional
|
|
|
|
|
;;; arguments must be consed into a rest list on entry, and then parsed out.
|
|
|
|
|
;;; Function call should be a matter of a few register moves and a jump; it
|
|
|
|
|
;;; should not involve heap allocation! Your Scheme system may have a superior
|
|
|
|
|
;;; non-R5RS optional-argument system that can eliminate this overhead. If so,
|
|
|
|
|
;;; then this is a prime candidate for optimising these procedures,
|
|
|
|
|
;;; *especially* the many optional START/END index parameters.
|
|
|
|
|
;;;
|
|
|
|
|
;;; Note that optional arguments are also a barrier to procedure integration.
|
|
|
|
|
;;; If your Scheme system permits you to specify alternate entry points
|
|
|
|
|
;;; for a call when the number of optional arguments is known in a manner
|
|
|
|
|
;;; that enables inlining/integration, this can provide performance
|
|
|
|
|
;;; improvements.
|
|
|
|
|
;;;
|
|
|
|
|
;;; There is enough *explicit* error checking that *all* string-index
|
|
|
|
|
;;; operations should *never* produce a bounds error. Period. Feel like
|
|
|
|
|
;;; living dangerously? *Big* performance win to be had by replacing
|
|
|
|
|
;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops.
|
|
|
|
|
;;; Similarly, fixnum-specific operators can speed up the arithmetic done on
|
|
|
|
|
;;; the index values in the inner loops. The only arguments that are not
|
|
|
|
|
;;; completely error checked are
|
|
|
|
|
;;; - string lists (complete checking requires time proportional to the
|
|
|
|
|
;;; length of the list)
|
|
|
|
|
;;; - procedure arguments, such as char->char maps & predicates.
|
|
|
|
|
;;; There is no way to check the range & domain of procedures in Scheme.
|
|
|
|
|
;;; Procedures that take these parameters cannot fully check their
|
|
|
|
|
;;; arguments. But all other types to all other procedures are fully
|
|
|
|
|
;;; checked.
|
|
|
|
|
;;;
|
|
|
|
|
;;; This does open up the alternate possibility of simply *removing* these
|
|
|
|
|
;;; checks, and letting the safe primitives raise the errors. On a dumb
|
|
|
|
|
;;; Scheme system, this would provide speed (by eliminating the redundant
|
|
|
|
|
;;; error checks) at the cost of error-message clarity.
|
|
|
|
|
;;;
|
|
|
|
|
;;; See the comments preceding the hash function code for notes on tuning
|
|
|
|
|
;;; the default bound so that the code never overflows your implementation's
|
|
|
|
|
;;; fixnum size into bignum calculation.
|
|
|
|
|
;;;
|
|
|
|
|
;;; In an interpreted Scheme, some of these procedures, or the internal
|
|
|
|
|
;;; routines with % prefixes, are excellent candidates for being rewritten
|
|
|
|
|
;;; in C. Consider STRING-HASH, %STRING-COMPARE, the
|
|
|
|
|
;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX &
|
|
|
|
|
;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED,
|
|
|
|
|
;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!.
|
|
|
|
|
;;;
|
|
|
|
|
;;; It would also be nice to have the ability to mark some of these
|
|
|
|
|
;;; routines as candidates for inlining/integration.
|
|
|
|
|
;;;
|
|
|
|
|
;;; All the %-prefixed routines in this source code are written
|
|
|
|
|
;;; to be called internally to this library. They do *not* perform
|
|
|
|
|
;;; friendly error checks on the inputs; they assume everything is
|
|
|
|
|
;;; proper. They also do not take optional arguments. These two properties
|
|
|
|
|
;;; save calling overhead and enable procedure integration -- but they
|
|
|
|
|
;;; are not appropriate for exported routines.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Copyright details
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; The prefix/suffix and comparison routines in this code had (extremely
|
|
|
|
|
;;; distant) origins in MIT Scheme's string lib, and was substantially
|
|
|
|
|
;;; reworked by Olin Shivers (shivers@ai.mit.edu) 9/98. As such, it is
|
|
|
|
|
;;; covered by MIT Scheme's open source copyright. See below for details.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The KMP string-search code was influenced by implementations written
|
|
|
|
|
;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this
|
|
|
|
|
;;; version was written from scratch by myself.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The remainder of this code was written from scratch by myself for scsh.
|
|
|
|
|
;;; The scsh copyright is a BSD-style open source copyright. See below for
|
|
|
|
|
;;; details.
|
|
|
|
|
;;; -Olin Shivers
|
|
|
|
|
|
1999-09-23 11:24:25 -04:00
|
|
|
|
;;; MIT Scheme copyright terms
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; This material was developed by the Scheme project at the Massachusetts
|
|
|
|
|
;;; Institute of Technology, Department of Electrical Engineering and
|
|
|
|
|
;;; Computer Science. Permission to copy and modify this software, to
|
|
|
|
|
;;; redistribute either the original software or a modified version, and
|
|
|
|
|
;;; to use this software for any purpose is granted, subject to the
|
|
|
|
|
;;; following restrictions and understandings.
|
|
|
|
|
;;;
|
|
|
|
|
;;; 1. Any copy made of this software must include this copyright notice
|
|
|
|
|
;;; in full.
|
|
|
|
|
;;;
|
|
|
|
|
;;; 2. Users of this software agree to make their best efforts (a) to
|
|
|
|
|
;;; return to the MIT Scheme project any improvements or extensions that
|
|
|
|
|
;;; they make, so that these may be included in future releases; and (b)
|
|
|
|
|
;;; to inform MIT of noteworthy uses of this software.
|
|
|
|
|
;;;
|
|
|
|
|
;;; 3. All materials developed as a consequence of the use of this
|
|
|
|
|
;;; software shall duly acknowledge such use, in accordance with the usual
|
|
|
|
|
;;; standards of acknowledging credit in academic research.
|
|
|
|
|
;;;
|
|
|
|
|
;;; 4. MIT has made no warrantee or representation that the operation of
|
|
|
|
|
;;; this software will be error-free, and MIT is under no obligation to
|
|
|
|
|
;;; provide any services, by way of maintenance, update, or otherwise.
|
|
|
|
|
;;;
|
|
|
|
|
;;; 5. In conjunction with products arising from the use of this material,
|
|
|
|
|
;;; there shall be no use of the name of the Massachusetts Institute of
|
|
|
|
|
;;; Technology nor of any adaptation thereof in any advertising,
|
|
|
|
|
;;; promotional, or sales literature without prior written consent from
|
|
|
|
|
;;; MIT in each case.
|
2001-03-21 17:28:31 -05:00
|
|
|
|
|
|
|
|
|
;;; Scsh copyright terms
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
;;; All rights reserved.
|
|
|
|
|
;;;
|
|
|
|
|
;;; Redistribution and use in source and binary forms, with or without
|
|
|
|
|
;;; modification, are permitted provided that the following conditions
|
|
|
|
|
;;; are met:
|
|
|
|
|
;;; 1. Redistributions of source code must retain the above copyright
|
|
|
|
|
;;; notice, this list of conditions and the following disclaimer.
|
|
|
|
|
;;; 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
;;; notice, this list of conditions and the following disclaimer in the
|
|
|
|
|
;;; documentation and/or other materials provided with the distribution.
|
|
|
|
|
;;; 3. The name of the authors may not be used to endorse or promote products
|
|
|
|
|
;;; derived from this software without specific prior written permission.
|
|
|
|
|
;;;
|
|
|
|
|
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
|
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
|
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
|
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
|
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
|
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
|
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
|
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
|
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
|
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|