From 7f1879b497c8d3ddf68eca16551672708dd1e050 Mon Sep 17 00:00:00 2001 From: sperber Date: Wed, 14 Aug 2002 14:23:41 +0000 Subject: [PATCH] Remove the old LIST-LIB, STRING-LIB, and CHAR-SET-LIB libraries, now that the corresponding SRFIs are in the Scheme 48 core. All this hopefully preserving backwards compatibility. --- scsh/fr.scm | 2 +- scsh/lib/ccp-pack.scm | 6 +- scsh/lib/char-package.scm | 2 +- scsh/lib/cset-lib.html | 2016 ---------------------- scsh/lib/cset-lib.scm | 804 --------- scsh/lib/cset-lib.txt | 1271 -------------- scsh/lib/cset-obsolete.scm | 56 - scsh/lib/cset-package.scm | 151 -- scsh/lib/cset-tests.scm | 200 --- scsh/lib/list-lib.scm | 1599 ----------------- scsh/lib/list-pack.scm | 249 --- scsh/lib/srfi-1.html | 3257 ----------------------------------- scsh/lib/srfi-1.txt | 2015 ---------------------- scsh/lib/string-lib.scm | 2023 ---------------------- scsh/lib/string-pack.scm | 315 ---- scsh/lib/string-package.scm | 350 ---- scsh/rdelim.scm | 32 +- scsh/rx/loadem.scm | 7 - scsh/rx/modules.scm | 26 - scsh/rx/packages.scm | 18 +- scsh/rx/regexp.scm | 4 +- scsh/scsh-package.scm | 34 +- 22 files changed, 57 insertions(+), 14380 deletions(-) delete mode 100644 scsh/lib/cset-lib.html delete mode 100644 scsh/lib/cset-lib.scm delete mode 100644 scsh/lib/cset-lib.txt delete mode 100644 scsh/lib/cset-obsolete.scm delete mode 100644 scsh/lib/cset-package.scm delete mode 100644 scsh/lib/cset-tests.scm delete mode 100644 scsh/lib/list-lib.scm delete mode 100644 scsh/lib/list-pack.scm delete mode 100644 scsh/lib/srfi-1.html delete mode 100644 scsh/lib/srfi-1.txt delete mode 100644 scsh/lib/string-lib.scm delete mode 100644 scsh/lib/string-pack.scm delete mode 100644 scsh/lib/string-package.scm delete mode 100644 scsh/rx/loadem.scm delete mode 100644 scsh/rx/modules.scm diff --git a/scsh/fr.scm b/scsh/fr.scm index a50782f..452fb74 100644 --- a/scsh/fr.scm +++ b/scsh/fr.scm @@ -307,7 +307,7 @@ (let-optionals args ((delims default-record-delims) (elide? #f) (handle-delim 'trim)) - (let ((delims (->char-set delims))) + (let ((delims (x->char-set delims))) (case handle-delim ((trim) ; TRIM-delimiter reader. diff --git a/scsh/lib/ccp-pack.scm b/scsh/lib/ccp-pack.scm index 50f4d1c..13e6be0 100644 --- a/scsh/lib/ccp-pack.scm +++ b/scsh/lib/ccp-pack.scm @@ -93,13 +93,13 @@ )) (define-structure ccp-lib ccp-lib-interface - (open char-set-lib + (open srfi-14 ascii defrec-package - string-lib + srfi-13 let-opt receiving - list-lib ; EVERY + (subset srfi-1 (every fold)) error-package scheme) (files ccp) diff --git a/scsh/lib/char-package.scm b/scsh/lib/char-package.scm index 318aafd..47722cb 100644 --- a/scsh/lib/char-package.scm +++ b/scsh/lib/char-package.scm @@ -27,7 +27,7 @@ (define-structure char-predicates-lib char-predicates-interface (open error-package ; ERROR scsh-utilities ; DEPRECATED-PROC - char-set-lib + srfi-14 scheme) (begin diff --git a/scsh/lib/cset-lib.html b/scsh/lib/cset-lib.html deleted file mode 100644 index b7eb80f..0000000 --- a/scsh/lib/cset-lib.html +++ /dev/null @@ -1,2016 +0,0 @@ - - - - - - - - - SRFI 14: Character-set Library - - - - - - - - - - -

Title

-
-Character-set Library -
- - -

Author

-
- Olin Shivers / - shivers@ai.mit.edu -
- - -

Table of contents

- - - - - -

Abstract

-

- -The ability to efficiently represent and manipulate sets of characters is an -unglamorous but very useful capability for text-processing code -- one that -tends to pop up in the definitions of other libraries. Hence it is useful to -specify a general substrate for this functionality early. This SRFI defines a -general library that provides this functionality. - -It is accompanied by a reference implementation for the spec. The reference -implementation is fairly efficient, straightforwardly portable, and has a -"free software" copyright. The implementation is tuned for "small" 7 or 8 -bit character types, such as ASCII or Latin-1; the data structures and -algorithms would have to be altered for larger 16 or 32 bit character types -such as Unicode -- however, the specs have been carefully designed with these -larger character types in mind. - -Several forthcoming SRFIs can be defined in terms of this one: -

- - - -

Variable Index

-

-Here is the complete set of bindings -- procedural and otherwise -- -exported by this library. In a Scheme system that has a module or package -system, these procedures should be contained in a module named "char-set-lib". - -

-
-
Predicates & comparison -
-
-char-set? char-set= char-set<= char-set-hash
-
- -
Iterating over character sets -
-
-char-set-cursor char-set-ref char-set-cursor-next end-of-char-set? 
-char-set-fold char-set-unfold char-set-unfold!
-char-set-for-each char-set-map
-
- -
Creating character sets -
-
-char-set-copy char-set
-
-list->char-set  string->char-set
-list->char-set! string->char-set!
-    
-char-set-filter  ucs-range->char-set 
-char-set-filter! ucs-range->char-set!
-
-->char-set
-
- -
Querying character sets -
-
-char-set->list char-set->string
-char-set-size char-set-count char-set-contains?
-char-set-every char-set-any
-
- -
Character-set algebra -
-
-char-set-adjoin  char-set-delete
-char-set-adjoin! char-set-delete!
-
-char-set-complement  char-set-union  char-set-intersection
-char-set-complement! char-set-union! char-set-intersection!
-
-char-set-difference  char-set-xor  char-set-diff+intersection
-char-set-difference! char-set-xor! char-set-diff+intersection!
-
- -
Standard character sets -
-
-char-set:lower-case  char-set:upper-case  char-set:title-case
-char-set:letter      char-set:digit       char-set:letter+digit
-char-set:graphic     char-set:printing    char-set:whitespace
-char-set:iso-control char-set:punctuation char-set:symbol
-char-set:hex-digit   char-set:blank       char-set:ascii
-char-set:empty       char-set:full
-
- -
-
- - -

Rationale

- -

-The ability to efficiently manipulate sets of characters is quite -useful for text-processing code. Encapsulating this functionality in -a general, efficiently implemented library can assist all such code. -This library defines a new data structure to represent these sets, called -a "char-set." The char-set type is distinct from all other types. - -

-This library is designed to be portable across implementations that use -different character types and representations, especially ASCII, Latin-1 -and Unicode. Some effort has been made to preserve compatibility with Java -in the Unicode case (see the definition of char-set:whitespace for the -single real deviation). - - -

Linear-update operations

- -

-The procedures of this SRFI, by default, are "pure functional" -- they do not -alter their parameters. However, this SRFI defines a set of "linear-update" -procedures which have a hybrid pure-functional/side-effecting semantics: they -are allowed, but not required, to side-effect one of their parameters in order -to construct their result. An implementation may legally implement these -procedures as pure, side-effect-free functions, or it may implement them using -side effects, depending upon the details of what is the most efficient or -simple to implement in terms of the underlying representation. - -

-The linear-update routines all have names ending with "!". - -

-Clients of these procedures may not rely upon these procedures working by -side effect. For example, this is not guaranteed to work: -

-(let* ((cs1 (char-set #\a #\b #\c))      ; cs1 = {a,b,c}.
-       (cs2 (char-set-adjoin! cs1 #\d))) ; Add d to {a,b,c}.
-  cs1) ; Could be either {a,b,c} or {a,b,c,d}.
-
-

-However, this is well-defined: -

-(let ((cs (char-set #\a #\b #\c)))
-  (char-set-adjoin! cs #\d)) ; Add d to {a,b,c}.
-
- -

-So clients of these procedures write in a functional style, but must -additionally be sure that, when the procedure is called, there are no other -live pointers to the potentially-modified character set (hence the term -"linear update"). - -

-There are two benefits to this convention: -

- -

-Note that pure functional representations are the right thing for -ASCII- or Latin-1-based Scheme implementations, since a char-set can -be represented in an ASCII Scheme with 4 32-bit words. Pure set-algebra -operations on such a representation are very fast and efficient. Programmers -who code using linear-update operations are guaranteed the system will -provide the best implementation across multiple platforms. - -

-In practice, these procedures are most useful for efficiently constructing -character sets in a side-effecting manner, in some limited local context, -before passing the character set outside the local construction scope to be -used in a functional manner. - -

-Scheme provides no assistance in checking the linearity of the potentially -side-effected parameters passed to these functions --- there's no linear -type checker or run-time mechanism for detecting violations. (But -sophisticated programming environments, such as DrScheme, might help.) - - -

Extra-SRFI recommendations

-

-Users are cautioned that the R5RS predicates -

-char-alphabetic?
-char-numeric?
-char-whitespace?
-char-upper-case?
-char-lower-case?
-
-
-

-may or may not be in agreement with the SRFI 14 base character sets -

- -char-set:letter
-char-set:digit
-char-set:whitespace
-char-set:upper-case
-char-set:lower-case
-
-
-

-Implementors are strongly encouraged to bring these predicates into -agreement with the base character sets of this SRFI; not to do so risks -major confusion. - - - -

Specification

-

-In the following procedure specifications: -

- -

-Passing values to procedures with these parameters that do not satisfy these -types is an error. - -

-Unless otherwise noted in the specification of a procedure, procedures -always return character sets that are distinct (from the point of view -of the linear-update operations) from the parameter character sets. For -example, char-set-adjoin is guaranteed to provide a fresh character set, -even if it is not given any character parameters. - -

-Parameters given in square brackets are optional. Unless otherwise noted in the -text describing the procedure, any prefix of these optional parameters may -be supplied, from zero arguments to the full list. When a procedure returns -multiple values, this is shown by listing the return values in square -brackets, as well. So, for example, the procedure with signature -

-halts? f [x init-store] -> [boolean integer]
-
-would take one (f), two (f, x) -or three (f, x, init-store) input parameters, -and return two values, a boolean and an integer. - -

-A parameter followed by "..." means zero-or-more elements. -So the procedure with the signature -

-sum-squares x ...  -> number
-
-takes zero or more arguments (x ...), -while the procedure with signature -
-spell-check doc dict1 dict2 ... -> string-list
-
-takes two required parameters -(doc and dict1) -and zero or more optional parameters (dict2 ...). - - - -

General procedures

-
- - -
- -char-set? obj -> boolean -
- - Is the object obj a character set? - - -
- -char-set= cs1 ... -> boolean -
- Are the character sets equal? -

- Boundary cases: -

-(char-set=) => true
-(char-set= cs) => true
-
- -

- Rationale: transitive binary relations are generally extended to n-ary - relations in Scheme, which enables clearer, more concise code to be - written. While the zero-argument and one-argument cases will almost - certainly not arise in first-order uses of such relations, they may well - arise in higher-order cases or macro-generated code. - E.g., consider -

-(apply char-set= cset-list)
-
-

- This is well-defined if the list is empty or a singleton list. Hence - we extend these relations to any number of arguments. Implementors - have reported actual uses of n-ary relations in higher-order cases - allowing for fewer than two arguments. The way of Scheme is to handle the - general case; we provide the fully general extension. -

- A counter-argument to this extension is that - R5RS's - transitive binary arithmetic relations - (=, <, etc.) - require at least two arguments, hence - this decision is a break with the prior convention -- although it is - at least one that is backwards-compatible. - - -

- -char-set<= cs1 ... -> boolean -
- Returns true if every character set csi is - a subset of character set csi+1. - -

-Boundary cases: -

-(char-set<=) => true
-(char-set<= cs) => true
-
-

-Rationale: See char-set= for discussion of zero- and one-argument -applications. Consider testing a list of char-sets for monotonicity -with -

-(apply char-set<= cset-list)
-
- - -
- -char-set-hash cs [bound] -> integer -
- Compute a hash value for the character set cs. - Bound is a non-negative - exact integer specifying the range of the hash function. A positive - value restricts the return value to the range [0,bound). - -

- If bound is either zero or not given, the implementation may use - an implementation-specific default value, chosen to be as large as - is efficiently practical. For instance, the default range might be chosen - for a given implementation to map all strings into the range of - integers that can be represented with a single machine word. - - -

- Invariant: -

-(char-set= cs1 cs2) => (= (char-set-hash cs1 b) (char-set-hash cs2 b))
-
- -

- A legal but nonetheless discouraged implementation: -

-(define (char-set-hash cs . maybe-bound) 1)
-
- -

- Rationale: allowing the user to specify an explicit bound simplifies user - code by removing the mod operation that typically accompanies every hash - computation, and also may allow the implementation of the hash function to - exploit a reduced range to efficiently compute the hash value. - E.g., for - small bounds, the hash function may be computed in a fashion such that - intermediate values never overflow into bignum integers, allowing the - implementor to provide a fixnum-specific "fast path" for computing the - common cases very rapidly. - -

- - -

Iterating over character sets

- -
- -
- - - - -char-set-cursor cset -> cursor -
-char-set-ref cset cursor -> char -
-char-set-cursor-next cset cursor -> cursor -
-end-of-char-set? cursor -> boolean -
- Cursors are a low-level facility for iterating over the characters in a - set. A cursor is a value that indexes a character in a char set. - char-set-cursor produces a new cursor for a given char set. - The set element indexed by the cursor is fetched with - char-set-ref. - A cursor index is incremented with char-set-cursor-next; - in this way, code can step through every character in a char set. - Stepping a cursor "past the end" of a char set produces a cursor that - answers true to end-of-char-set?. - It is an error to pass such a cursor to char-set-ref or to - char-set-cursor-next. - -

- A cursor value may not be used in conjunction with a different character - set; if it is passed to char-set-ref or - char-set-cursor-next with - a character set other than the one used to create it, the results and - effects are undefined. - -

- Cursor values are not necessarily distinct from other types. - They may be - integers, linked lists, records, procedures or other values. This license - is granted to allow cursors to be very "lightweight" values suitable for - tight iteration, even in fairly simple implementations. - -

- Note that these primitives are necessary to export an iteration facility - for char sets to loop macros. - -

- Example: -

-(define cs (char-set #\G #\a #\T #\e #\c #\h))
-
-;; Collect elts of CS into a list.
-(let lp ((cur (char-set-cursor cs)) (ans '()))
-  (if (end-of-char-set? cur) ans
-      (lp (char-set-cursor-next cs cur)
-          (cons (char-set-ref cs cur) ans))))
-  => (#\G #\T #\a #\c #\e #\h)
-
-;; Equivalently, using a list unfold (from SRFI 1):
-(unfold-right end-of-char-set? 
-              (curry char-set-ref cs)
-	      (curry char-set-cursor-next cs)
-	      (char-set-cursor cs))
-  => (#\G #\T #\a #\c #\e #\h)
-
- -

- Rationale: Note that the cursor API's four functions "fit" the functional - protocol used by the unfolders provided by the list, string and char-set - SRFIs (see the example above). By way of contrast, here is a simpler, - two-function API that was rejected for failing this criterion. Besides - char-set-cursor, it provided a single - function that mapped a cursor and a character set to two values, the - indexed character and the next cursor. If the cursor had exhausted the - character set, then this function returned false instead of the character - value, and another end-of-char-set cursor. In this way, the other three - functions of the current API were combined together. - - -

- -char-set-fold kons knil cs -> object -
- This is the fundamental iterator for character sets. Applies the function - kons across the character set cs using initial state value knil. That is, - if cs is the empty set, the procedure returns knil. Otherwise, some - element c of cs is chosen; - let cs' be the remaining, unchosen characters. - The procedure returns -
-(char-set-fold kons (kons c knil) cs')
-
-

- Examples: -

-;; CHAR-SET-MEMBERS
-(lambda (cs) (char-set-fold cons '() cs))
-
-;; CHAR-SET-SIZE
-(lambda (cs) (char-set-fold (lambda (c i) (+ i 1)) 0 cs))
-
-;; How many vowels in the char set?
-(lambda (cs) 
-  (char-set-fold (lambda (c i) (if (vowel? c) (+ i 1) i))
-                 0 cs))
-
- - -
- - -char-set-unfold  f p g seed [base-cs] -> char-set -
char-set-unfold! f p g seed base-cs -> char-set -
- This is a fundamental constructor for char-sets. -
    -
  • G is used to generate a series of "seed" values from the initial seed: - seed, (g seed), (g2 seed), (g3 seed), ... -
  • P tells us when to stop -- when it returns true when applied to one - of these seed values. -
  • F maps each seed value to a character. These characters are added - to the base character set base-cs to form the result; base-cs defaults to - the empty set. char-set-unfold! adds the characters to base-cs in a - linear-update -- it is allowed, but not required, to side-effect - and use base-cs's storage to construct the result. -
- -

- More precisely, the following definitions hold, ignoring the - optional-argument issues: - -

-(define (char-set-unfold p f g seed base-cs) 
-  (char-set-unfold! p f g seed (char-set-copy base-cs)))
-
-(define (char-set-unfold! p f g seed base-cs)
-  (let lp ((seed seed) (cs base-cs))
-        (if (p seed) cs                                 ; P says we are done.
-            (lp (g seed)                                ; Loop on (G SEED).
-                (char-set-adjoin! cs (f seed))))))      ; Add (F SEED) to set.
-
- - (Note that the actual implementation may be more efficient.) - -

- Examples: -

                         
-(port->char-set p) = (char-set-unfold eof-object? values
-                                      (lambda (x) (read-char p))
-                                      (read-char p))
-
-(list->char-set lis) = (char-set-unfold null? car cdr lis)
-
- -
- -char-set-for-each proc cs -> unspecified -
- Apply procedure proc to each character in the character set cs. - Note that the order in which proc is applied to the characters in the - set is not specified, and may even change from one procedure application - to another. - -

- Nothing at all is specified about the value returned by this procedure; it - is not even required to be consistent from call to call. It is simply - required to be a value (or values) that may be passed to a command - continuation, e.g. as the value of an expression appearing as a - non-terminal subform of a begin expression. - Note that in - R5RS, - this restricts the procedure to returning a single value; - non-R5RS systems may not even provide this restriction. - - -

- -char-set-map proc cs -> char-set -
- proc is a char->char procedure. Apply it to all the characters in - the char-set cs, and collect the results into a new character set. - -

- Essentially lifts proc from a char->char procedure to a char-set -> - char-set procedure. - -

- Example: -

-(char-set-map char-downcase cset)
-
-
- - - -

Creating character sets

-
- - -
- -char-set-copy cs -> char-set -
- Returns a copy of the character set cs. "Copy" means that if either the - input parameter or the result value of this procedure is passed to one of - the linear-update procedures described below, the other character set is - guaranteed not to be altered. - -

- A system that provides pure-functional implementations of the - linear-operator suite could implement this procedure as the identity - function -- so copies are not guaranteed to be distinct by eq?. - - -

- -char-set char1 ... -> char-set -
- Return a character set containing the given characters. - - -
- - -list->char-set  char-list [base-cs] -> char-set -
list->char-set! char-list base-cs -> char-set -
- Return a character set containing the characters in the list of - characters char-list. - -

- If character set base-cs is provided, the characters from char-list - are added to it. list->char-set! is allowed, but not required, - to side-effect and reuse the storage in base-cs; - list->char-set produces a fresh character set. - - -

- - -string->char-set  s [base-cs] -> char-set -
string->char-set! s base-cs -> char-set -
- - Return a character set containing the characters in the string s. - -

- If character set base-cs is provided, the characters from s are added to - it. string->char-set! is allowed, but not required, to side-effect and - reuse the storage in base-cs; string->char-set produces a fresh character - set. - - -

- - -char-set-filter  pred cs [base-cs] -> char-set -
char-set-filter! pred cs base-cs -> char-set -
- - Returns a character set containing every character c - in cs such that (pred c) - returns true. - -

- If character set base-cs is provided, the characters specified - by pred are added to it. - char-set-filter! is allowed, but not required, - to side-effect and reuse the storage in base-cs; - char-set-filter produces a fresh character set. - -

- An implementation may not save away a reference to pred and - invoke it after char-set-filter or - char-set-filter! returns -- that is, "lazy," - on-demand implementations are not allowed, as pred may have - external dependencies on mutable data or have other side-effects. - -

- Rationale: This procedure provides a means of converting a character - predicate into its equivalent character set; the cs parameter - allows the programmer to bound the predicate's domain. Programmers should - be aware that filtering a character set such as char-set:full - could be a very expensive operation in an implementation that provided an - extremely large character type, such as 32-bit Unicode. An earlier draft - of this library provided a simple predicate->char-set - procedure, which was rejected in favor of char-set-filter for - this reason. - - - -

- - -ucs-range->char-set  lower upper [error? base-cs] -> char-set -
ucs-range->char-set! lower upper error? base-cs -> char-set -
- Lower and upper are exact non-negative integers; - lower <= upper. - -

- Returns a character set containing every character whose ISO/IEC 10646 - UCS-4 code lies in the half-open range [lower,upper). - -

    -
  • If the requested range includes unassigned UCS values, these are - silently ignored (the current UCS specification has "holes" in the - space of assigned codes). - -
  • If the requested range includes "private" or "user space" codes, these - are handled in an implementation-specific manner; however, a UCS- or - Unicode-based Scheme implementation should pass them through - transparently. - -
  • If any code from the requested range specifies a valid, assigned - UCS character that has no corresponding representative in the - implementation's character type, then (1) an error is raised if error? - is true, and (2) the code is ignored if error? is false (the default). - This might happen, for example, if the implementation uses ASCII - characters, and the requested range includes non-ASCII characters. -
- -

- If character set base-cs is provided, the characters specified by the - range are added to it. ucs-range->char-set! is allowed, but not required, - to side-effect and reuse the storage in base-cs; - ucs-range->char-set produces a fresh character set. - -

- Note that ASCII codes are a subset of the Latin-1 codes, which are in turn - a subset of the 16-bit Unicode codes, which are themselves a subset of the - 32-bit UCS-4 codes. We commit to a specific encoding in this routine, - regardless of the underlying representation of characters, so that client - code using this library will be portable. I.e., a conformant Scheme - implementation may use EBCDIC or SHIFT-JIS to encode characters; it must - simply map the UCS characters from the given range into the native - representation when possible, and report errors when not possible. - - -

- -->char-set x -> char-set -
- Coerces x into a char-set. - X may be a string, character or - char-set. A string is converted to the set of its constituent characters; - a character is converted to a singleton set; a char-set is returned - as-is. - This procedure is intended for use by other procedures that want to - provide "user-friendly," wide-spectrum interfaces to their clients. - -
- - -

Querying character sets

-
- - -
- -char-set-size cs -> integer -
- Returns the number of elements in character set cs. - - -
- -char-set-count pred cs -> integer -
- Apply pred to the chars of character set cs, and return the number - of chars that caused the predicate to return true. - - -
- -char-set->list cs -> character-list -
- This procedure returns a list of the members of character set cs. - The order in which cs's characters appear in the list is not defined, - and may be different from one call to another. - - -
- -char-set->string cs -> string -
- This procedure returns a string containing the members of character set cs. - The order in which cs's characters appear in the string is not defined, - and may be different from one call to another. - - -
- -char-set-contains? cs char -> boolean -
- This procedure tests char for membership in character set cs. - -

- The MIT Scheme character-set package called this procedure - char-set-member?, but the argument order isn't consistent with the name. - - -

- - -char-set-every pred cs -> boolean -
char-set-any   pred cs -> boolean -
- The char-set-every procedure returns true if predicate pred - returns true of every character in the character set cs. - Likewise, char-set-any applies pred to every character in - character set cs, and returns the first true value it finds. - If no character produces a true value, it returns false. - The order in which these procedures sequence through the elements of - cs is not specified. - -

- Note that if you need to determine the actual character on which a - predicate returns true, use char-set-any and arrange for the predicate - to return the character parameter as its true value, e.g. -

-(char-set-any (lambda (c) (and (char-upper-case? c) c)) 
-              cs)
-
-
- - -

Character-set algebra

-
- - -
- - -char-set-adjoin cs char1 ... -> char-set -
char-set-delete cs char1 ... -> char-set -
- Add/delete the chari characters to/from character set cs. - - -
- - -char-set-adjoin! cs char1 ... -> char-set -
char-set-delete! cs char1 ... -> char-set -
- - Linear-update variants. These procedures are allowed, but not - required, to side-effect their first parameter. - - -
- - - - - - -char-set-complement cs -> char-set -
char-set-union cs1 ... -> char-set -
char-set-intersection cs1 ... -> char-set -
char-set-difference cs1 cs2 ... -> char-set -
char-set-xor cs1 ... -> char-set -
char-set-diff+intersection cs1 cs2 ... -> [char-set char-set] -
- These procedures implement set complement, union, intersection, - difference, and exclusive-or for character sets. The union, intersection - and xor operations are n-ary. The difference function is also n-ary, - associates to the left (that is, it computes the difference between - its first argument and the union of all the other arguments), - and requires at least one argument. - -

- Boundary cases: -

-(char-set-union) => char-set:empty
-(char-set-intersection) => char-set:full
-(char-set-xor) => char-set:empty
-(char-set-difference cs) => cs
-
- -

- char-set-diff+intersection returns both the difference and the - intersection of the arguments -- it partitions its first parameter. - It is equivalent to -

-(values (char-set-difference cs1 cs2 ...)
-        (char-set-intersection cs1 (char-set-union cs2 ...)))
-
- but can be implemented more efficiently. - -

- Programmers should be aware that char-set-complement could potentially - be a very expensive operation in Scheme implementations that provide - a very large character type, such as 32-bit Unicode. If this is a - possibility, sets can be complimented with respect to a smaller - universe using char-set-difference. - - - -

- - - - - - -char-set-complement! cs -> char-set -
char-set-union! cs1 cs2 ... -> char-set -
char-set-intersection! cs1 cs2 ... -> char-set -
char-set-difference! cs1 cs2 ... -> char-set -
char-set-xor! cs1 cs2 ... -> char-set -
char-set-diff+intersection! cs1 cs2 cs3 ... -> [char-set char-set] -
- These are linear-update variants of the set-algebra functions. - They are allowed, but not required, to side-effect their first (required) - parameter. - -

- char-set-diff+intersection! is allowed to side-effect both - of its two required parameters, cs1 - and cs2. -

- - -

Standard character sets

-

-Several character sets are predefined for convenience: - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - - - - - -
char-set:lower-case Lower-case letters
char-set:upper-case Upper-case letters
char-set:title-case Title-case letters
char-set:letter Letters
char-set:digit Digits
char-set:letter+digit Letters and digits
char-set:graphic Printing characters except spaces
char-set:printing Printing characters including spaces
char-set:whitespace Whitespace characters
char-set:iso-control The ISO control characters
char-set:punctuation Punctuation characters
char-set:symbol Symbol characters
char-set:hex-digit A hexadecimal digit: 0-9, A-F, a-f
char-set:blank Blank characters -- horizontal whitespace
char-set:ascii All characters in the ASCII set.
char-set:empty Empty set
char-set:full All characters
-
- -

-Note that there may be characters in char-set:letter that are neither upper or -lower case---this might occur in implementations that use a character type -richer than ASCII, such as Unicode. A "graphic character" is one that would -put ink on your page. While the exact composition of these sets may vary -depending upon the character type provided by the underlying Scheme system, -here are the definitions for some of the sets in an ASCII implementation: -

- - - - - - - - - - - - - -
char-set:lower-case a-z
char-set:upper-case A-Z
char-set:letter A-Z and a-z
char-set:digit 0123456789
char-set:punctuation !"#%&'()*,-./:;?@[\]_{}
char-set:symbol $+<=>^`|~
char-set:whitespace Space, newline, tab, form feed,
vertical tab, carriage return
char-set:blank Space and tab
char-set:graphic letter + digit + punctuation + symbol
char-set:printing graphic + whitespace
char-set:iso-control ASCII 0-31 and 127
-
- -

-Note that the existence of the char-set:ascii set implies that the underlying -character set is required to be at least as rich as ASCII (including -ASCII's control characters). - -

-Rationale: The name choices reflect a shift from the older "alphabetic/numeric" -terms found in -R5RS -and Posix to newer, Unicode-influenced "letter/digit" lexemes. - - -

- Unicode, Latin-1 and ASCII definitions of the standard character sets -

-

-In Unicode Scheme implementations, the base character sets are compatible with -Java's Unicode specifications. For ASCII or Latin-1, we simply restrict the -Unicode set specifications to their first 128 or 256 codes, respectively. -Scheme implementations that are not based on ASCII, Latin-1 or Unicode should -attempt to preserve the sense or spirit of these definitions. - -

-The following descriptions frequently make reference to the "Unicode character -database." This is a file, available at URL -

- -ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt -
-

-Each line contains a description of a Unicode character. The first -semicolon-delimited field of the line gives the hex value of the character's -code; the second field gives the name of the character, and the third field -gives a two-letter category. Other fields give simple 1-1 case-mappings for -the character and other information; see -

- -ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html -
-

-for further description of the file's format. Note in particular the -two-letter category specified in the the third field, which is referenced -frequently in the descriptions below. - - -

char-set:lower-case

-

-For Unicode, we follow Java's specification: a character is lowercase if -

- -

-The lower-case ASCII characters are -

- abcdefghijklmnopqrstuvwxyz -
-

-Latin-1 adds another 33 lower-case characters to the ASCII set: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
00B5 MICRO SIGN
00DF LATIN SMALL LETTER SHARP S
00E0 LATIN SMALL LETTER A WITH GRAVE
00E1 LATIN SMALL LETTER A WITH ACUTE
00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
00E3 LATIN SMALL LETTER A WITH TILDE
00E4 LATIN SMALL LETTER A WITH DIAERESIS
00E5 LATIN SMALL LETTER A WITH RING ABOVE
00E6 LATIN SMALL LETTER AE
00E7 LATIN SMALL LETTER C WITH CEDILLA
00E8 LATIN SMALL LETTER E WITH GRAVE
00E9 LATIN SMALL LETTER E WITH ACUTE
00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
00EB LATIN SMALL LETTER E WITH DIAERESIS
00EC LATIN SMALL LETTER I WITH GRAVE
00ED LATIN SMALL LETTER I WITH ACUTE
00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
00EF LATIN SMALL LETTER I WITH DIAERESIS
00F0 LATIN SMALL LETTER ETH
00F1 LATIN SMALL LETTER N WITH TILDE
00F2 LATIN SMALL LETTER O WITH GRAVE
00F3 LATIN SMALL LETTER O WITH ACUTE
00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
00F5 LATIN SMALL LETTER O WITH TILDE
00F6 LATIN SMALL LETTER O WITH DIAERESIS
00F8 LATIN SMALL LETTER O WITH STROKE
00F9 LATIN SMALL LETTER U WITH GRAVE
00FA LATIN SMALL LETTER U WITH ACUTE
00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
00FC LATIN SMALL LETTER U WITH DIAERESIS
00FD LATIN SMALL LETTER Y WITH ACUTE
00FE LATIN SMALL LETTER THORN
00FF LATIN SMALL LETTER Y WITH DIAERESIS
-
-

-Note that three of these have no corresponding Latin-1 upper-case character: -

- - - - -
00B5 MICRO SIGN
00DF LATIN SMALL LETTER SHARP S
00FF LATIN SMALL LETTER Y WITH DIAERESIS
-
-

-(The compatibility micro character uppercases to the non-Latin-1 Greek capital -mu; the German sharp s character uppercases to the pair of characters "SS," -and the capital y-with-diaeresis is non-Latin-1.) - -

-(Note that the Java spec for lowercase characters given at -

- -http://java.sun.com/docs/books/jls/html/javalang.doc4.html#14345 -
-

-is inconsistent. U+00B5 MICRO SIGN fulfills the requirements for a lower-case -character (as of Unicode 3.0), but is not given in the numeric list of -lower-case character codes.) - -

-(Note that the Java spec for isLowerCase() given at -

- -http://java.sun.com/products/jdk/1.2/docs/api/java/lang/Character.html#isLowerCase(char) -
-

-gives three mutually inconsistent definitions of "lower case." The first is -the definition used in this SRFI. Following text says "A character is -considered to be lowercase if and only if it is specified to be lowercase by -the Unicode 2.0 standard (category Ll in the Unicode specification data -file)." The former spec excludes U+00AA FEMININE ORDINAL INDICATOR and -U+00BA MASCULINE ORDINAL INDICATOR; the later spec includes them. Finally, -the spec enumerates a list of characters in the Latin-1 subset; this list -excludes U+00B5 MICRO SIGN, which is included in both of the previous specs.) - - -

char-set:upper-case

-

-For Unicode, we follow Java's specification: a character is uppercase if -

- -

-The upper-case ASCII characters are -

-ABCDEFGHIJKLMNOPQRSTUVWXYZ -
-

-Latin-1 adds another 30 upper-case characters to the ASCII set: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
00C0 LATIN CAPITAL LETTER A WITH GRAVE
00C1 LATIN CAPITAL LETTER A WITH ACUTE
00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
00C3 LATIN CAPITAL LETTER A WITH TILDE
00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
00C6 LATIN CAPITAL LETTER AE
00C7 LATIN CAPITAL LETTER C WITH CEDILLA
00C8 LATIN CAPITAL LETTER E WITH GRAVE
00C9 LATIN CAPITAL LETTER E WITH ACUTE
00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
00CB LATIN CAPITAL LETTER E WITH DIAERESIS
00CC LATIN CAPITAL LETTER I WITH GRAVE
00CD LATIN CAPITAL LETTER I WITH ACUTE
00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
00CF LATIN CAPITAL LETTER I WITH DIAERESIS
00D0 LATIN CAPITAL LETTER ETH
00D1 LATIN CAPITAL LETTER N WITH TILDE
00D2 LATIN CAPITAL LETTER O WITH GRAVE
00D3 LATIN CAPITAL LETTER O WITH ACUTE
00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
00D5 LATIN CAPITAL LETTER O WITH TILDE
00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
00D8 LATIN CAPITAL LETTER O WITH STROKE
00D9 LATIN CAPITAL LETTER U WITH GRAVE
00DA LATIN CAPITAL LETTER U WITH ACUTE
00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
00DC LATIN CAPITAL LETTER U WITH DIAERESIS
00DD LATIN CAPITAL LETTER Y WITH ACUTE
00DE LATIN CAPITAL LETTER THORN
-
- -

char-set:title-case

-

-In Unicode, a character is titlecase if it has the category Lt in -the character attribute database. There are very few of these characters; -here is the entire 31-character list as of Unicode 3.0: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01C5 LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON -
01C8 LATIN CAPITAL LETTER L WITH SMALL LETTER J -
01CB LATIN CAPITAL LETTER N WITH SMALL LETTER J -
01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z -
1F88 GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI -
1F89 GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI -
1F8A GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI -
1F8B GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI -
1F8C GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI -
1F8D GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI -
1F8E GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -
1F8F GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -
1F98 GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI -
1F99 GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI -
1F9A GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI -
1F9B GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI -
1F9C GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI -
1F9D GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI -
1F9E GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -
1F9F GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -
1FA8 GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI -
1FA9 GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI -
1FAA GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI -
1FAB GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI -
1FAC GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI -
1FAD GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI -
1FAE GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -
1FAF GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -
1FBC GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI -
1FCC GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI -
1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -
-
-

-There are no ASCII or Latin-1 titlecase characters. - - - -

char-set:letter

-

-In Unicode, a letter is any character with one of the letter categories -(Lu, Ll, Lt, Lm, Lo) in the Unicode character database. - -

-There are 52 ASCII letters -

- abcdefghijklmnopqrstuvwxyz
- ABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-

-There are 117 Latin-1 letters. These are the 115 characters that are -members of the Latin-1 char-set:lower-case and char-set:upper-case sets, -plus -

- - - -
00AA FEMININE ORDINAL INDICATOR
00BA MASCULINE ORDINAL INDICATOR
-
-

-(These two letters are considered lower-case by Unicode, but not by -Java or SRFI 14.) - - -

char-set:digit

- -

-In Unicode, a character is a digit if it has the category Nd in -the character attribute database. In Latin-1 and ASCII, the only -such characters are 0123456789. In Unicode, there are other digit -characters in other code blocks, such as Gujarati digits and Tibetan -digits. - - - -

char-set:hex-digit

-

-The only hex digits are 0123456789abcdefABCDEF. - - - -

char-set:letter+digit

-

-The union of char-set:letter and char-set:digit. - - -

char-set:graphic

-

-A graphic character is one that would put ink on paper. The ASCII and Latin-1 -graphic characters are the members of -

- - - - - -
char-set:letter
char-set:digit
char-set:punctuation
char-set:symbol
-
- - -

char-set:printing

-

-A printing character is one that would occupy space when printed, i.e., -a graphic character or a space character. char-set:printing is the union -of char-set:whitespace and char-set:graphic. - - -

char-set:whitespace

-

-In Unicode, a whitespace character is either -

- -

-There are 24 whitespace characters in Unicode 3.0: -

- - - - - - - - - - - - - - - - - - - - - - - - - -
0009 HORIZONTAL TABULATION \t control-I
000A LINE FEED \n control-J
000B VERTICAL TABULATION \v control-K
000C FORM FEED \f control-L
000D CARRIAGE RETURN \r control-M
0020 SPACE Zs
00A0 NO-BREAK SPACE Zs
1680 OGHAM SPACE MARK Zs
2000 EN QUAD Zs
2001 EM QUAD Zs
2002 EN SPACE Zs
2003 EM SPACE Zs
2004 THREE-PER-EM SPACE Zs
2005 FOUR-PER-EM SPACE Zs
2006 SIX-PER-EM SPACE Zs
2007 FIGURE SPACE Zs
2008 PUNCTUATION SPACE Zs
2009 THIN SPACE Zs
200A HAIR SPACE Zs
200B ZERO WIDTH SPACE Zs
2028 LINE SEPARATOR Zl
2029 PARAGRAPH SEPARATOR Zp
202F NARROW NO-BREAK SPACE Zs
3000 IDEOGRAPHIC SPACE Zs
-
-

-The ASCII whitespace characters are the first six characters in the above list --- line feed, horizontal tabulation, vertical tabulation, form feed, carriage -return, and space. These are also exactly the characters recognised by the -Posix isspace() procedure. Latin-1 adds the no-break space. - -

-Note: Java's isWhitespace() method is incompatible, including -

- - - - - - -
0009 HORIZONTAL TABULATION (\t control-I)
001C FILE SEPARATOR (control-\)
001D GROUP SEPARATOR (control-])
001E RECORD SEPARATOR (control-^)
001F UNIT SEPARATOR (control-_)
-
-

-and excluding -

- - -
00A0 NO-BREAK SPACE
-
-

-Java's excluding the no-break space means that tokenizers can simply break -character streams at "whitespace" boundaries. However, the exclusion introduces -exceptions in other places, e.g. char-set:printing is no longer simply the -union of char-set:graphic and char-set:whitespace. - - - -

char-set:iso-control

-

-The ISO control characters are the Unicode/Latin-1 characters in the ranges -[U+0000,U+001F] and [U+007F,U+009F]. - -

-ASCII restricts this set to the characters in the range [U+0000,U+001F] -plus the character U+007F. - -

-Note that Unicode defines other control characters which do not belong to this -set (hence the qualifying prefix "iso-" in the name). This restriction is -compatible with the Java IsISOControl() method. - - - -

char-set:punctuation

-

-In Unicode, a punctuation character is any character that has one of the -punctuation categories in the Unicode character database (Pc, Pd, Ps, -Pe, Pi, Pf, or Po.) - -

-ASCII has 23 punctuation characters: -

-!"#%&'()*,-./:;?@[\]_{}
-
-

-Latin-1 adds six more: -

- -
00A1 INVERTED EXCLAMATION MARK -
00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -
00AD SOFT HYPHEN -
00B7 MIDDLE DOT -
00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -
00BF INVERTED QUESTION MARK -
-
- -

-Note that the nine ASCII characters $+<=>^`|~ are not -punctuation. They are "symbols." - - - -

char-set:symbol

-

-In Unicode, a symbol is any character that has one of the symbol categories -in the Unicode character database (Sm, Sc, Sk, or So). There -are nine ASCII symbol characters: -

-$+<=>^`|~
-
-

-Latin-1 adds 18 more: -

- - - - - - - - - - - - - - - - - - - -
00A2 CENT SIGN
00A3 POUND SIGN
00A4 CURRENCY SIGN
00A5 YEN SIGN
00A6 BROKEN BAR
00A7 SECTION SIGN
00A8 DIAERESIS
00A9 COPYRIGHT SIGN
00AC NOT SIGN
00AE REGISTERED SIGN
00AF MACRON
00B0 DEGREE SIGN
00B1 PLUS-MINUS SIGN
00B4 ACUTE ACCENT
00B6 PILCROW SIGN
00B8 CEDILLA
00D7 MULTIPLICATION SIGN
00F7 DIVISION SIGN
-
- - -

char-set:blank

- -

-Blank chars are horizontal whitespace. In Unicode, a blank character is either -

- -

-There are eighteen blank characters in Unicode 3.0: -

- - - - - - - - - - - - - - - - - - - -
0009 HORIZONTAL TABULATION \t control-I
0020 SPACE Zs
00A0 NO-BREAK SPACE Zs
1680 OGHAM SPACE MARK Zs
2000 EN QUAD Zs
2001 EM QUAD Zs
2002 EN SPACE Zs
2003 EM SPACE Zs
2004 THREE-PER-EM SPACE Zs
2005 FOUR-PER-EM SPACE Zs
2006 SIX-PER-EM SPACE Zs
2007 FIGURE SPACE Zs
2008 PUNCTUATION SPACE Zs
2009 THIN SPACE Zs
200A HAIR SPACE Zs
200B ZERO WIDTH SPACE Zs
202F NARROW NO-BREAK SPACE Zs
3000 IDEOGRAPHIC SPACE Zs
-
-

-The ASCII blank characters are the first two characters above -- -horizontal tab and space. Latin-1 adds the no-break space. - -

-Java doesn't have the concept of "blank" characters, so there are no -compatibility issues. - - - -

Reference implementation

-

-This SRFI comes with a reference implementation. It resides at: -

- -http://srfi.schemers.org/srfi-14/srfi-14.scm -
-

-I have placed this source on the Net with an unencumbered, "open" copyright. -Some of the code in the reference implementation bears a distant family -relation to the MIT Scheme implementation, and being derived from that code, -is covered by the MIT Scheme copyright (which is a generic BSD-style -open-source copyright -- see the source file for details). The remainder of -the code was written by myself for scsh or for this SRFI; I have placed this -code under the scsh copyright, which is also a generic BSD-style open-source -copyright. - -

-The code is written for portability and should be simple to port to -any Scheme. It has only the following deviations from R4RS, clearly -discussed in the comments: -

- -

-The library is written for clarity and well-commented; the current source is -about 375 lines of source code and 375 lines of comments and white space. -It is also written for efficiency. Fast paths are provided for common cases. - -

-This is not to say that the implementation can't be tuned up for -a specific Scheme implementation. There are notes in comments addressing -ways implementors can tune the reference implementation for performance. - -

-In short, I've written the reference implementation to make it as painless -as possible for an implementor -- or a regular programmer -- to adopt this -library and get good results with it. - -

-The code uses a rather simple-minded, inefficient representation for -ASCII/Latin-1 char-sets -- a 256-character string. The character whose code is -i is in the set if s[i] = ASCII 1 (soh, or ^a); -not in the set if s[i] = ASCII 0 (nul). -A much faster and denser representation would be 16 or 32 bytes worth -of bit string. A portable implementation using bit sets awaits standards for -bitwise logical-ops and byte vectors. - -

-"Large" character types, such as Unicode, should use a sparse representation, -taking care that the Latin-1 subset continues to be represented with a -dense 32-byte bit set. - - - -

Acknowledgements

-

-The design of this library benefited greatly from the feedback provided during -the SRFI discussion phase. Among those contributing thoughtful commentary and -suggestions, both on the mailing list and by private discussion, were Paolo -Amoroso, Lars Arvestad, Alan Bawden, Jim Bender, Dan Bornstein, Per Bothner, -Will Clinger, Brian Denheyer, Kent Dybvig, Sergei Egorov, Marc Feeley, -Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Arthur A. Gleckler, Ben -Goetter, Sven Hartrumpf, Erik Hilsdale, Shiro Kawai, Richard Kelsey, Oleg -Kiselyov, Bengt Kleberg, Donovan Kolbly, Bruce Korb, Shriram Krishnamurthi, -Bruce Lewis, Tom Lord, Brad Lucier, Dave Mason, David Rush, Klaus Schilling, -Jonathan Sobel, Mike Sperber, Mikael Staldal, Vladimir Tsyshevsky, Donald -Welsh, and Mike Wilson. I am grateful to them for their assistance. - -

-I am also grateful the authors, implementors and documentors of all the -systems mentioned in the introduction. Aubrey Jaffer should be noted for his -work in producing Web-accessible versions of the R5RS spec, which was a -tremendous aid. - -

-This is not to imply that these individuals necessarily endorse the final -results, of course. - -

-During this document's long development period, great patience was exhibited -by Mike Sperber, who is the editor for the SRFI, and by Hillary Sullivan, -who is not. - - -

References & links

- -
-
[Java] -
- The following URLs provide documentation on relevant Java classes.
- - http://java.sun.com/products/jdk/1.2/docs/api/java/lang/Character.html -
- http://java.sun.com/products/jdk/1.2/docs/api/java/lang/String.html -
- http://java.sun.com/products/jdk/1.2/docs/api/java/lang/StringBuffer.html -
- http://java.sun.com/products/jdk/1.2/docs/api/java/text/Collator.html -
- http://java.sun.com/products/jdk/1.2/docs/api/java/text/package-summary.html - -
[MIT-Scheme] -
- http://www.swiss.ai.mit.edu/projects/scheme/ - -
[R5RS]
-
Revised5 report on the algorithmic language Scheme.
- R. Kelsey, W. Clinger, J. Rees (editors).
- Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998.
- and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998.
- Available at - http://www.schemers.org/Documents/Standards/. - -
[SRFI]
-
- The SRFI web site.
- http://srfi.schemers.org/ - -
[SRFI-14]
-
- SRFI-14: String libraries.
- http://srfi.schemers.org/srfi-14/ - -
-
- This document, in HTML: -
- http://srfi.schemers.org/srfi-14/srfi-14.html - -
- This document, in plain text format: -
- http://srfi.schemers.org/srfi-14/srfi-14.txt - -
Source code for the reference implementation: -
- - http://srfi.schemers.org/srfi-14/srfi-14.scm - -
Scheme 48 module specification, with typings: -
- - http://srfi.schemers.org/srfi-14/srfi-14-s48-module.scm - -
Regression-test suite: -
- http://srfi.schemers.org/srfi-14/srfi-14-tests.scm - -
-
- -
[Unicode] -
- http://www.unicode.org/ - -
[UnicodeData] -
- The Unicode character database.
- ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt -
- ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html -
- - -

Copyright

- -

-Certain portions of this document -- the specific, marked segments of text -describing the R5RS procedures -- were adapted with permission from the R5RS -report. - -

-All other text is copyright (C) Olin Shivers (1998, 1999, 2000). -All Rights Reserved. - -

-This document and translations of it may be copied and furnished to others, -and derivative works that comment on or otherwise explain it or assist in its -implementation may be prepared, copied, published and distributed, in whole or -in part, without restriction of any kind, provided that the above copyright -notice and this paragraph are included on all such copies and derivative -works. However, this document itself may not be modified in any way, such as -by removing the copyright notice or references to the Scheme Request For -Implementation process or editors, except as needed for the purpose of -developing SRFIs in which case the procedures for copyrights defined in the -SRFI process must be followed, or as required to translate it into languages -other than English. - -

-The limited permissions granted above are perpetual and will not be revoked by -the authors or their successors or assigns. - -

-This document and the information contained herein is provided on an -"as is" basis and the authors and the SRFI editors -disclaim all warranties, express or implied, including but not limited to any -warranty that the use of the information herein will not infringe any rights -or any implied warranties of merchantability or fitness for a particular -purpose. - - - - diff --git a/scsh/lib/cset-lib.scm b/scsh/lib/cset-lib.scm deleted file mode 100644 index 2effd4b..0000000 --- a/scsh/lib/cset-lib.scm +++ /dev/null @@ -1,804 +0,0 @@ -;;; SRFI-14 character-sets library -*- Scheme -*- -;;; -;;; - Ported from MIT Scheme runtime by Brian D. Carlstrom. -;;; - Massively rehacked & extended by Olin Shivers 6/98. -;;; - Massively redesigned and rehacked 5/2000 during SRFI process. -;;; At this point, the code bears the following relationship to the -;;; MIT Scheme code: "This is my grandfather's axe. My father replaced -;;; the head, and I have replaced the handle." Nonetheless, we preserve -;;; the MIT Scheme copyright: -;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology -;;; The MIT Scheme license is a "free software" license. See the end of -;;; this file for the tedious details. - -;;; Exports: -;;; char-set? char-set= char-set<= -;;; char-set-hash -;;; char-set-cursor char-set-ref char-set-cursor-next end-of-char-set? -;;; char-set-fold char-set-unfold char-set-unfold! -;;; char-set-for-each char-set-map -;;; char-set-copy char-set -;;; -;;; list->char-set string->char-set -;;; list->char-set! string->char-set! -;;; -;;; filterchar-set ucs-range->char-set ->char-set -;;; filterchar-set! ucs-range->char-set! -;;; -;;; char-set->list char-set->string -;;; -;;; char-set-size char-set-count char-set-contains? -;;; char-set-every char-set-any -;;; -;;; char-set-adjoin char-set-delete -;;; char-set-adjoin! char-set-delete! -;;; - -;;; char-set-complement char-set-union char-set-intersection -;;; char-set-complement! char-set-union! char-set-intersection! -;;; -;;; char-set-difference char-set-xor char-set-diff+intersection -;;; char-set-difference! char-set-xor! char-set-diff+intersection! -;;; -;;; char-set:lower-case char-set:upper-case char-set:title-case -;;; char-set:letter char-set:digit char-set:letter+digit -;;; char-set:graphic char-set:printing char-set:whitespace -;;; char-set:iso-control char-set:punctuation char-set:symbol -;;; char-set:hex-digit char-set:blank char-set:ascii -;;; char-set:empty char-set:full - -;;; Imports -;;; This code has the following non-R5RS dependencies: -;;; - ERROR -;;; - %LATIN1->CHAR %CHAR->LATIN1 -;;; - LET-OPTIONALS* and :OPTIONAL macros for parsing, checking & defaulting -;;; optional arguments from rest lists. -;;; - BITWISE-AND for CHAR-SET-HASH -;;; - The SRFI-19 DEFINE-RECORD-TYPE record macro -;;; - A simple CHECK-ARG procedure: -;;; (lambda (pred val caller) (if (not (pred val)) (error val caller))) - -;;; This is simple code, not great code. Char sets are represented as 256-char -;;; strings. If char I is ASCII/Latin-1 0, then it isn't in the set; if char I -;;; is ASCII/Latin-1 1, then it is in the set. -;;; - Should be rewritten to use bit strings or byte vecs. -;;; - Is Latin-1 specific. Would certainly have to be rewritten for Unicode. - -;;; See the end of the file for porting and performance-tuning notes. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-record-type :char-set - (make-char-set s) - char-set? - (s char-set:s)) - - -(define (%string-copy s) (substring s 0 (string-length s))) - -;;; Parse, type-check & default a final optional BASE-CS parameter from -;;; a rest argument. Return a *fresh copy* of the underlying string. -;;; The default is the empty set. The PROC argument is to help us -;;; generate informative error exceptions. - -(define (%default-base maybe-base proc) - (if (pair? maybe-base) - (let ((bcs (car maybe-base)) - (tail (cdr maybe-base))) - (if (null? tail) - (if (char-set? bcs) (%string-copy (char-set:s bcs)) - (error "BASE-CS parameter not a char-set" proc bcs)) - (error "Expected final base char set -- too many parameters" - proc maybe-base))) - (make-string 256 (%latin1->char 0)))) - -;;; If CS is really a char-set, do CHAR-SET:S, otw report an error msg on -;;; behalf of our caller, PROC. This procedure exists basically to provide -;;; explicit error-checking & reporting. - -(define (%char-set:s/check cs proc) - (let lp ((cs cs)) - (if (char-set? cs) (char-set:s cs) - (lp (error "Not a char-set" cs proc))))) - - - -;;; These internal functions hide a lot of the dependency on the -;;; underlying string representation of char sets. They should be -;;; inlined if possible. - -(define (si=0? s i) (zero? (%char->latin1 (string-ref s i)))) -(define (si=1? s i) (not (si=0? s i))) -(define c0 (%latin1->char 0)) -(define c1 (%latin1->char 1)) -(define (si s i) (%char->latin1 (string-ref s i))) -(define (%set0! s i) (string-set! s i c0)) -(define (%set1! s i) (string-set! s i c1)) - -;;; These do various "s[i] := s[i] op val" operations -- see -;;; %CHAR-SET-ALGEBRA. They are used to implement the various -;;; set-algebra procedures. -(define (setv! s i v) (string-set! s i (%latin1->char v))) ; SET to a Value. -(define (%not! s i v) (setv! s i (- 1 v))) -(define (%and! s i v) (if (zero? v) (%set0! s i))) -(define (%or! s i v) (if (not (zero? v)) (%set1! s i))) -(define (%minus! s i v) (if (not (zero? v)) (%set0! s i))) -(define (%xor! s i v) (if (not (zero? v)) (setv! s i (- 1 (si s i))))) - - -(define (char-set-copy cs) - (make-char-set (%string-copy (%char-set:s/check cs char-set-copy)))) - -(define (char-set= . rest) - (or (null? rest) - (let* ((cs1 (car rest)) - (rest (cdr rest)) - (s1 (%char-set:s/check cs1 char-set=))) - (let lp ((rest rest)) - (or (not (pair? rest)) - (and (string=? s1 (%char-set:s/check (car rest) char-set=)) - (lp (cdr rest)))))))) - -(define (char-set<= . rest) - (or (null? rest) - (let ((cs1 (car rest)) - (rest (cdr rest))) - (let lp ((s1 (%char-set:s/check cs1 char-set<=)) (rest rest)) - (or (not (pair? rest)) - (let ((s2 (%char-set:s/check (car rest) char-set<=)) - (rest (cdr rest))) - (if (eq? s1 s2) (lp s2 rest) ; Fast path - (let lp2 ((i 255)) ; Real test - (if (< i 0) (lp s2 rest) - (and (<= (si s1 i) (si s2 i)) - (lp2 (- i 1)))))))))))) - -;;; Hash -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in -;;; to keep the intermediate values small. (We do the calculation with just -;;; enough bits to represent BOUND, masking off high bits at each step in -;;; calculation. If this screws up any important properties of the hash -;;; function I'd like to hear about it. -Olin) -;;; -;;; If you keep BOUND small enough, the intermediate calculations will -;;; always be fixnums. How small is dependent on the underlying Scheme system; -;;; we use a default BOUND of 2^22 = 4194304, which should hack it in -;;; Schemes that give you at least 29 signed bits for fixnums. The core -;;; calculation that you don't want to overflow is, worst case, -;;; (+ 65535 (* 37 (- bound 1))) -;;; where 65535 is the max character code. Choose the default BOUND to be the -;;; biggest power of two that won't cause this expression to fixnum overflow, -;;; and everything will be copacetic. - -(define (char-set-hash cs . maybe-bound) - (let* ((bound (:optional maybe-bound 4194304 (lambda (n) (and (integer? n) - (exact? n) - (<= 0 n))))) - (bound (if (zero? bound) 4194304 bound)) ; 0 means default. - (s (%char-set:s/check cs char-set-hash)) - ;; Compute a 111...1 mask that will cover BOUND-1: - (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? - (if (>= i bound) (- i 1) (lp (+ i i)))))) - - (let lp ((i 255) (ans 0)) - (if (< i 0) (modulo ans bound) - (lp (- i 1) - (if (si=0? s i) ans - (bitwise-and mask (+ (* 37 ans) i)))))))) - - -(define (char-set-contains? cs char) - (si=1? (%char-set:s/check cs char-set-contains?) - (%char->latin1 (check-arg char? char char-set-contains?)))) - -(define (char-set-size cs) - (let ((s (%char-set:s/check cs char-set-size))) - (let lp ((i 255) (size 0)) - (if (< i 0) size - (lp (- i 1) (+ size (si s i))))))) - -(define (char-set-count pred cset) - (check-arg procedure? pred char-set-count) - (let ((s (%char-set:s/check cset char-set-count))) - (let lp ((i 255) (count 0)) - (if (< i 0) count - (lp (- i 1) - (if (and (si=1? s i) (pred (%latin1->char i))) - (+ count 1) - count)))))) - - -;;; -- Adjoin & delete - -(define (%set-char-set set proc cs chars) - (let ((s (%string-copy (%char-set:s/check cs proc)))) - (for-each (lambda (c) (set s (%char->latin1 c))) - chars) - (make-char-set s))) - -(define (%set-char-set! set proc cs chars) - (let ((s (%char-set:s/check cs proc))) - (for-each (lambda (c) (set s (%char->latin1 c))) - chars)) - cs) - -(define (char-set-adjoin cs . chars) - (%set-char-set %set1! char-set-adjoin cs chars)) -(define (char-set-adjoin! cs . chars) - (%set-char-set! %set1! char-set-adjoin! cs chars)) -(define (char-set-delete cs . chars) - (%set-char-set %set0! char-set-delete cs chars)) -(define (char-set-delete! cs . chars) - (%set-char-set! %set0! char-set-delete! cs chars)) - - -;;; Cursors -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Simple implementation. A cursors is an integer index into the -;;; mark vector, and -1 for the end-of-char-set cursor. -;;; -;;; If we represented char sets as a bit set, we could do the following -;;; trick to pick the lowest bit out of the set: -;;; (count-bits (xor (- cset 1) cset)) -;;; (But first mask out the bits already scanned by the cursor first.) - -(define (char-set-cursor cset) - (%char-set-cursor-next cset 256 char-set-cursor)) - -(define (end-of-char-set? cursor) (< cursor 0)) - -(define (char-set-ref cset cursor) (%latin1->char cursor)) - -(define (char-set-cursor-next cset cursor) - (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i 255))) cursor - char-set-cursor-next) - (%char-set-cursor-next cset cursor char-set-cursor-next)) - -(define (%char-set-cursor-next cset cursor proc) ; Internal - (let ((s (%char-set:s/check cset proc))) - (let lp ((cur cursor)) - (let ((cur (- cur 1))) - (if (or (< cur 0) (si=1? s cur)) cur - (lp cur)))))) - - -;;; -- for-each map fold unfold every any - -(define (char-set-for-each proc cs) - (check-arg procedure? proc char-set-for-each) - (let ((s (%char-set:s/check cs char-set-for-each))) - (let lp ((i 255)) - (cond ((>= i 0) - (if (si=1? s i) (proc (%latin1->char i))) - (lp (- i 1))))))) - -(define (char-set-map proc cs) - (check-arg procedure? proc char-set-map) - (let ((s (%char-set:s/check cs char-set-map)) - (ans (make-string 256 c0))) - (let lp ((i 255)) - (cond ((>= i 0) - (if (si=1? s i) - (%set1! ans (%char->latin1 (proc (%latin1->char i))))) - (lp (- i 1))))) - (make-char-set ans))) - -(define (char-set-fold kons knil cs) - (check-arg procedure? kons char-set-fold) - (let ((s (%char-set:s/check cs char-set-fold))) - (let lp ((i 255) (ans knil)) - (if (< i 0) ans - (lp (- i 1) - (if (si=0? s i) ans - (kons (%latin1->char i) ans))))))) - -(define (char-set-every pred cs) - (check-arg procedure? pred char-set-every) - (let ((s (%char-set:s/check cs char-set-every))) - (let lp ((i 255)) - (or (< i 0) - (and (or (si=0? s i) (pred (%latin1->char i))) - (lp (- i 1))))))) - -(define (char-set-any pred cs) - (check-arg procedure? pred char-set-any) - (let ((s (%char-set:s/check cs char-set-any))) - (let lp ((i 255)) - (and (>= i 0) - (or (and (si=1? s i) (pred (%latin1->char i))) - (lp (- i 1))))))) - - -(define (%char-set-unfold! proc p f g s seed) - (check-arg procedure? p proc) - (check-arg procedure? f proc) - (check-arg procedure? g proc) - (let lp ((seed seed)) - (cond ((not (p seed)) ; P says we are done. - (%set1! s (%char->latin1 (f seed))) ; Add (F SEED) to set. - (lp (g seed)))))) ; Loop on (G SEED). - -(define (char-set-unfold p f g seed . maybe-base) - (let ((bs (%default-base maybe-base char-set-unfold))) - (%char-set-unfold! char-set-unfold p f g bs seed) - (make-char-set bs))) - -(define (char-set-unfold! p f g seed base-cset) - (%char-set-unfold! char-set-unfold! p f g - (%char-set:s/check base-cset char-set-unfold!) - seed) - base-cset) - - - -;;; list <--> char-set - -(define (%list->char-set! chars s) - (for-each (lambda (char) (%set1! s (%char->latin1 char))) - chars)) - -(define (char-set . chars) - (let ((s (make-string 256 c0))) - (%list->char-set! chars s) - (make-char-set s))) - -(define (list->char-set chars . maybe-base) - (let ((bs (%default-base maybe-base list->char-set))) - (%list->char-set! chars bs) - (make-char-set bs))) - -(define (list->char-set! chars base-cs) - (%list->char-set! chars (%char-set:s/check base-cs list->char-set!)) - base-cs) - - -(define (char-set->list cs) - (let ((s (%char-set:s/check cs char-set->list))) - (let lp ((i 255) (ans '())) - (if (< i 0) ans - (lp (- i 1) - (if (si=0? s i) ans - (cons (%latin1->char i) ans))))))) - - - -;;; string <--> char-set - -(define (%string->char-set! str bs proc) - (check-arg string? str proc) - (do ((i (- (string-length str) 1) (- i 1))) - ((< i 0)) - (%set1! bs (%char->latin1 (string-ref str i))))) - -(define (string->char-set str . maybe-base) - (let ((bs (%default-base maybe-base string->char-set))) - (%string->char-set! str bs string->char-set) - (make-char-set bs))) - -(define (string->char-set! str base-cs) - (%string->char-set! str (%char-set:s/check base-cs string->char-set!) - string->char-set!) - base-cs) - - -(define (char-set->string cs) - (let* ((s (%char-set:s/check cs char-set->string)) - (ans (make-string (char-set-size cs)))) - (let lp ((i 255) (j 0)) - (if (< i 0) ans - (let ((j (if (si=0? s i) j - (begin (string-set! ans j (%latin1->char i)) - (+ j 1))))) - (lp (- i 1) j)))))) - - -;;; -- UCS-range -> char-set - -(define (%ucs-range->char-set! lower upper error? bs proc) - (check-arg (lambda (x) (and (integer? x) (exact? x) (<= 0 x))) lower proc) - (check-arg (lambda (x) (and (integer? x) (exact? x) (<= lower x))) upper proc) - - (if (and (< lower upper) (< 256 upper) error?) - (error "Requested UCS range contains unavailable characters -- this implementation only supports Latin-1" - proc lower upper)) - - (let lp ((i (- (min upper 256) 1))) - (cond ((<= lower i) (%set1! bs i) (lp (- i 1)))))) - -(define (ucs-range->char-set lower upper . rest) - (let-optionals* rest ((error? #f) rest) - (let ((bs (%default-base rest ucs-range->char-set))) - (%ucs-range->char-set! lower upper error? bs ucs-range->char-set) - (make-char-set bs)))) - -(define (ucs-range->char-set! lower upper error? base-cs) - (%ucs-range->char-set! lower upper error? - (%char-set:s/check base-cs ucs-range->char-set!) - ucs-range->char-set) - base-cs) - - -;;; -- predicate -> char-set - -(define (%char-set-filter! pred ds bs proc) - (check-arg procedure? pred proc) - (let lp ((i 255)) - (cond ((>= i 0) - (if (and (si=1? ds i) (pred (%latin1->char i))) - (%set1! bs i)) - (lp (- i 1)))))) - -(define (char-set-filter predicate domain . maybe-base) - (let ((bs (%default-base maybe-base char-set-filter))) - (%char-set-filter! predicate - (%char-set:s/check domain char-set-filter!) - bs - char-set-filter) - (make-char-set bs))) - -(define (char-set-filter! predicate domain base-cs) - (%char-set-filter! predicate - (%char-set:s/check domain char-set-filter!) - (%char-set:s/check base-cs char-set-filter!) - char-set-filter!) - base-cs) - - -;;; {string, char, char-set, char predicate} -> char-set - -(define (->char-set x) - (cond ((char-set? x) x) - ((string? x) (string->char-set x)) - ((char? x) (char-set x)) - (else (error "->char-set: Not a charset, string or char." x)))) - - - -;;; Set algebra -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; The exported ! procs are "linear update" -- allowed, but not required, to -;;; side-effect their first argument when computing their result. In other -;;; words, you must use them as if they were completely functional, just like -;;; their non-! counterparts, and you must additionally ensure that their -;;; first arguments are "dead" at the point of call. In return, we promise a -;;; more efficient result, plus allowing you to always assume char-sets are -;;; unchangeable values. - -;;; Apply P to each index and its char code in S: (P I VAL). -;;; Used by the set-algebra ops. - -(define (%string-iter p s) - (let lp ((i (- (string-length s) 1))) - (cond ((>= i 0) - (p i (%char->latin1 (string-ref s i))) - (lp (- i 1)))))) - -;;; String S represents some initial char-set. (OP s i val) does some -;;; kind of s[i] := s[i] op val update. Do -;;; S := S OP CSETi -;;; for all the char-sets in the list CSETS. The n-ary set-algebra ops -;;; all use this internal proc. - -(define (%char-set-algebra s csets op proc) - (for-each (lambda (cset) - (let ((s2 (%char-set:s/check cset proc))) - (let lp ((i 255)) - (cond ((>= i 0) - (op s i (si s2 i)) - (lp (- i 1))))))) - csets)) - - -;;; -- Complement - -(define (char-set-complement cs) - (let ((s (%char-set:s/check cs char-set-complement)) - (ans (make-string 256))) - (%string-iter (lambda (i v) (%not! ans i v)) s) - (make-char-set ans))) - -(define (char-set-complement! cset) - (let ((s (%char-set:s/check cset char-set-complement!))) - (%string-iter (lambda (i v) (%not! s i v)) s)) - cset) - - -;;; -- Union - -(define (char-set-union! cset1 . csets) - (%char-set-algebra (%char-set:s/check cset1 char-set-union!) - csets %or! char-set-union!) - cset1) - -(define (char-set-union . csets) - (if (pair? csets) - (let ((s (%string-copy (%char-set:s/check (car csets) char-set-union)))) - (%char-set-algebra s (cdr csets) %or! char-set-union) - (make-char-set s)) - (char-set-copy char-set:empty))) - - -;;; -- Intersection - -(define (char-set-intersection! cset1 . csets) - (%char-set-algebra (%char-set:s/check cset1 char-set-intersection!) - csets %and! char-set-intersection!) - cset1) - -(define (char-set-intersection . csets) - (if (pair? csets) - (let ((s (%string-copy (%char-set:s/check (car csets) char-set-intersection)))) - (%char-set-algebra s (cdr csets) %and! char-set-intersection) - (make-char-set s)) - (char-set-copy char-set:full))) - - -;;; -- Difference - -(define (char-set-difference! cset1 . csets) - (%char-set-algebra (%char-set:s/check cset1 char-set-difference!) - csets %minus! char-set-difference!) - cset1) - -(define (char-set-difference cs1 . csets) - (if (pair? csets) - (let ((s (%string-copy (%char-set:s/check cs1 char-set-difference)))) - (%char-set-algebra s csets %minus! char-set-difference) - (make-char-set s)) - (char-set-copy cs1))) - - -;;; -- Xor - -(define (char-set-xor! cset1 . csets) - (%char-set-algebra (%char-set:s/check cset1 char-set-xor!) - csets %xor! char-set-xor!) - cset1) - -(define (char-set-xor . csets) - (if (pair? csets) - (let ((s (%string-copy (%char-set:s/check (car csets) char-set-xor)))) - (%char-set-algebra s (cdr csets) %xor! char-set-xor) - (make-char-set s)) - (char-set-copy char-set:empty))) - - -;;; -- Difference & intersection - -(define (%char-set-diff+intersection! diff int csets proc) - (for-each (lambda (cs) - (%string-iter (lambda (i v) - (if (not (zero? v)) - (cond ((si=1? diff i) - (%set0! diff i) - (%set1! int i))))) - (%char-set:s/check cs proc))) - csets)) - -(define (char-set-diff+intersection! cs1 cs2 . csets) - (let ((s1 (%char-set:s/check cs1 char-set-diff+intersection!)) - (s2 (%char-set:s/check cs2 char-set-diff+intersection!))) - (%string-iter (lambda (i v) (if (zero? v) - (%set0! s2 i) - (if (si=1? s2 i) (%set0! s1 i)))) - s1) - (%char-set-diff+intersection! s1 s2 csets char-set-diff+intersection!)) - (values cs1 cs2)) - -(define (char-set-diff+intersection cs1 . csets) - (let ((diff (string-copy (%char-set:s/check cs1 char-set-diff+intersection))) - (int (make-string 256 c0))) - (%char-set-diff+intersection! diff int csets char-set-diff+intersection) - (values (make-char-set diff) (make-char-set int)))) - - -;;;; System character sets -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; These definitions are for Latin-1. -;;; -;;; If your Scheme implementation allows you to mark the underlying strings -;;; as immutable, you should do so -- it would be very, very bad if a client's -;;; buggy code corrupted these constants. - -(define char-set:empty (char-set)) -(define char-set:full (char-set-complement char-set:empty)) - -(define char-set:lower-case - (let* ((a-z (ucs-range->char-set #x61 #x7B)) - (latin1 (ucs-range->char-set! #xdf #xf7 #t a-z)) - (latin2 (ucs-range->char-set! #xf8 #x100 #t latin1))) - (char-set-adjoin! latin2 (%latin1->char #xb5)))) - -(define char-set:upper-case - (let ((A-Z (ucs-range->char-set #x41 #x5B))) - ;; Add in the Latin-1 upper-case chars. - (ucs-range->char-set! #xd8 #xdf #t - (ucs-range->char-set! #xc0 #xd7 #t A-Z)))) - -(define char-set:title-case char-set:empty) - -(define char-set:letter - (let ((u/l (char-set-union char-set:upper-case char-set:lower-case))) - (char-set-adjoin! u/l - (%latin1->char #xaa) ; FEMININE ORDINAL INDICATOR - (%latin1->char #xba)))) ; MASCULINE ORDINAL INDICATOR - -(define char-set:digit (string->char-set "0123456789")) -(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF")) - -(define char-set:letter+digit - (char-set-union char-set:letter char-set:digit)) - -(define char-set:punctuation - (let ((ascii (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")) - (latin-1-chars (map %latin1->char '(#xA1 ; INVERTED EXCLAMATION MARK - #xAB ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - #xAD ; SOFT HYPHEN - #xB7 ; MIDDLE DOT - #xBB ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - #xBF)))) ; INVERTED QUESTION MARK - (list->char-set! latin-1-chars ascii))) - -(define char-set:symbol - (let ((ascii (string->char-set "$+<=>^`|~")) - (latin-1-chars (map %latin1->char '(#x00A2 ; CENT SIGN - #x00A3 ; POUND SIGN - #x00A4 ; CURRENCY SIGN - #x00A5 ; YEN SIGN - #x00A6 ; BROKEN BAR - #x00A7 ; SECTION SIGN - #x00A8 ; DIAERESIS - #x00A9 ; COPYRIGHT SIGN - #x00AC ; NOT SIGN - #x00AE ; REGISTERED SIGN - #x00AF ; MACRON - #x00B0 ; DEGREE SIGN - #x00B1 ; PLUS-MINUS SIGN - #x00B4 ; ACUTE ACCENT - #x00B6 ; PILCROW SIGN - #x00B8 ; CEDILLA - #x00D7 ; MULTIPLICATION SIGN - #x00F7)))) ; DIVISION SIGN - (list->char-set! latin-1-chars ascii))) - - -(define char-set:graphic - (char-set-union char-set:letter+digit char-set:punctuation char-set:symbol)) - -(define char-set:whitespace - (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION - #x0A ; LINE FEED - #x0B ; VERTICAL TABULATION - #x0C ; FORM FEED - #x0D ; CARRIAGE RETURN - #x20 ; SPACE - #xA0)))) - -(define char-set:printing (char-set-union char-set:whitespace char-set:graphic)) ; NO-BREAK SPACE - -(define char-set:blank - (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION - #x20 ; SPACE - #xA0)))) ; NO-BREAK SPACE - - -(define char-set:iso-control - (ucs-range->char-set! #x7F #xA0 #t (ucs-range->char-set 0 32))) - -(define char-set:ascii (ucs-range->char-set 0 128)) - - -;;; Porting & performance-tuning notes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; See the section at the beginning of this file on external dependencies. -;;; -;;; First and foremost, rewrite this code to use bit vectors of some sort. -;;; This will give big speedup and memory savings. -;;; -;;; - LET-OPTIONALS* macro. -;;; This is only used once. You can rewrite the use, port the hairy macro -;;; definition (which is implemented using a Clinger-Rees low-level -;;; explicit-renaming macro system), or port the simple, high-level -;;; definition, which is less efficient. -;;; -;;; - :OPTIONAL macro -;;; Very simply defined using an R5RS high-level macro. -;;; -;;; Implementations that can arrange for the base char sets to be immutable -;;; should do so. (E.g., Scheme 48 allows one to mark a string as immutable, -;;; which can be used to protect the underlying strings.) It would be very, -;;; very bad if a client's buggy code corrupted these constants. -;;; -;;; There is a fair amount of argument checking. This is, strictly speaking, -;;; unnecessary -- the actual body of the procedures will blow up if an -;;; illegal value is passed in. However, the error message will not be as good -;;; as if the error were caught at the "higher level." Also, a very, very -;;; smart Scheme compiler may be able to exploit having the type checks done -;;; early, so that the actual body of the procedures can assume proper values. -;;; This isn't likely; this kind of compiler technology isn't common any -;;; longer. -;;; -;;; The overhead of optional-argument parsing is irritating. The optional -;;; arguments must be consed into a rest list on entry, and then parsed out. -;;; Function call should be a matter of a few register moves and a jump; it -;;; should not involve heap allocation! Your Scheme system may have a superior -;;; non-R5RS optional-argument system that can eliminate this overhead. If so, -;;; then this is a prime candidate for optimising these procedures, -;;; *especially* the many optional BASE-CS parameters. -;;; -;;; Note that optional arguments are also a barrier to procedure integration. -;;; If your Scheme system permits you to specify alternate entry points -;;; for a call when the number of optional arguments is known in a manner -;;; that enables inlining/integration, this can provide performance -;;; improvements. -;;; -;;; There is enough *explicit* error checking that *all* internal operations -;;; should *never* produce a type or index-range error. Period. Feel like -;;; living dangerously? *Big* performance win to be had by replacing string -;;; and record-field accessors and setters with unsafe equivalents in the -;;; code. Similarly, fixnum-specific operators can speed up the arithmetic -;;; done on the index values in the inner loops. The only arguments that are -;;; not completely error checked are -;;; - string lists (complete checking requires time proportional to the -;;; length of the list) -;;; - procedure arguments, such as char->char maps & predicates. -;;; There is no way to check the range & domain of procedures in Scheme. -;;; Procedures that take these parameters cannot fully check their -;;; arguments. But all other types to all other procedures are fully -;;; checked. -;;; -;;; This does open up the alternate possibility of simply *removing* these -;;; checks, and letting the safe primitives raise the errors. On a dumb -;;; Scheme system, this would provide speed (by eliminating the redundant -;;; error checks) at the cost of error-message clarity. -;;; -;;; In an interpreted Scheme, some of these procedures, or the internal -;;; routines with % prefixes, are excellent candidates for being rewritten -;;; in C. -;;; -;;; It would also be nice to have the ability to mark some of these -;;; routines as candidates for inlining/integration. -;;; -;;; See the comments preceding the hash function code for notes on tuning -;;; the default bound so that the code never overflows your implementation's -;;; fixnum size into bignum calculation. -;;; -;;; All the %-prefixed routines in this source code are written -;;; to be called internally to this library. They do *not* perform -;;; friendly error checks on the inputs; they assume everything is -;;; proper. They also do not take optional arguments. These two properties -;;; save calling overhead and enable procedure integration -- but they -;;; are not appropriate for exported routines. - -;;; Copyright notice -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology -;;; -;;; This material was developed by the Scheme project at the Massachusetts -;;; Institute of Technology, Department of Electrical Engineering and -;;; Computer Science. Permission to copy and modify this software, to -;;; redistribute either the original software or a modified version, and -;;; to use this software for any purpose is granted, subject to the -;;; following restrictions and understandings. -;;; -;;; 1. Any copy made of this software must include this copyright notice -;;; in full. -;;; -;;; 2. Users of this software agree to make their best efforts (a) to -;;; return to the MIT Scheme project any improvements or extensions that -;;; they make, so that these may be included in future releases; and (b) -;;; to inform MIT of noteworthy uses of this software. -;;; -;;; 3. All materials developed as a consequence of the use of this -;;; software shall duly acknowledge such use, in accordance with the usual -;;; standards of acknowledging credit in academic research. -;;; -;;; 4. MIT has made no warrantee or representation that the operation of -;;; this software will be error-free, and MIT is under no obligation to -;;; provide any services, by way of maintenance, update, or otherwise. -;;; -;;; 5. In conjunction with products arising from the use of this material, -;;; there shall be no use of the name of the Massachusetts Institute of -;;; Technology nor of any adaptation thereof in any advertising, -;;; promotional, or sales literature without prior written consent from -;;; MIT in each case. diff --git a/scsh/lib/cset-lib.txt b/scsh/lib/cset-lib.txt deleted file mode 100644 index 75a77d5..0000000 --- a/scsh/lib/cset-lib.txt +++ /dev/null @@ -1,1271 +0,0 @@ -The SRFI 14 character-set library -*- outline -*- -Olin Shivers -98/11/8 -Last Update: 2000/7/4 - -Emacs should display this document in outline mode. Say c-h m for -instructions on how to move through it by sections (e.g., c-c c-n, c-c c-p). - -* Table of contents -------------------- -Abstract -Variable index -Rationale - Linear-update operations - Extra-SRFI recommendations -Specification - General procedures - Iterating over character sets - Creating character sets - Querying character sets - Character-set algebra - Standard character sets -Unicode, Latin-1 and ASCII definitions of the standard character sets -Reference implementation -Acknowledgements -References & links -Copyright - - -------------------------------------------------------------------------------- -* Abstract ----------- - -The ability to efficiently represent and manipulate sets of characters is an -unglamorous but very useful capability for text-processing code -- one that -tends to pop up in the definitions of other libraries. Hence it is useful to -specify a general substrate for this functionality early. This SRFI defines a -general library that provides this functionality. - -It is accompanied by a reference implementation for the spec. The reference -implementation is fairly efficient, straightforwardly portable, and has a -"free software" copyright. The implementation is tuned for "small" 7 or 8 -bit character types, such as ASCII or Latin-1; the data structures and -algorithms would have to be altered for larger 16 or 32 bit character types -such as Unicode -- however, the specs have been carefully designed with these -larger character types in mind. - -Several forthcoming SRFIs can be defined in terms of this one: - - string library - - delimited input procedures (e.g., READ-LINE) - - regular expressions - - -------------------------------------------------------------------------------- -* Variable index ------------------ -Here is the complete set of bindings -- procedural and otherwise -- -exported by this library. In a Scheme system that has a module or package -system, these procedures should be contained in a module named "char-set-lib". - -char-set? char-set= char-set<= - -char-set-hash - -char-set-cursor char-set-ref char-set-cursor-next end-of-char-set? -char-set-fold char-set-unfold char-set-unfold! -char-set-for-each char-set-map - -char-set-copy char-set - -list->char-set string->char-set -list->char-set! string->char-set! - -char-set-filter ucs-range->char-set -char-set-filter! ucs-range->char-set! - -->char-set - -char-set->list char-set->string - -char-set-size char-set-count char-set-contains? - -char-set-every char-set-any - -char-set-adjoin char-set-delete -char-set-adjoin! char-set-delete! - -char-set-complement char-set-union char-set-intersection -char-set-complement! char-set-union! char-set-intersection! - -char-set-difference char-set-xor char-set-diff+intersection -char-set-difference! char-set-xor! char-set-diff+intersection! - -char-set:lower-case char-set:upper-case char-set:title-case -char-set:letter char-set:digit char-set:letter+digit -char-set:graphic char-set:printing char-set:whitespace -char-set:iso-control char-set:punctuation char-set:symbol -char-set:hex-digit char-set:blank char-set:ascii -char-set:empty char-set:full - - -------------------------------------------------------------------------------- -* Rationale ------------ - -The ability to efficiently manipulate sets of characters is quite -useful for text-processing code. Encapsulating this functionality in -a general, efficiently implemented library can assist all such code. -This library defines a new data structure to represent these sets, called -a "char-set." The char-set type is distinct from all other types. - -This library is designed to be portable across implementations that use -different character types and representations, especially ASCII, Latin-1 -and Unicode. Some effort has been made to preserve compatibility with Java -in the Unicode case (see the definition of CHAR-SET:WHITESPACE for the -single real deviation). - - -** Linear-update operations -=========================== -The procedures of this SRFI, by default, are "pure functional" -- they do not -alter their parameters. However, this SRFI defines a set of "linear-update" -procedures which have a hybrid pure-functional/side-effecting semantics: they -are allowed, but not required, to side-effect one of their parameters in order -to construct their result. An implementation may legally implement these -procedures as pure, side-effect-free functions, or it may implement them using -side effects, depending upon the details of what is the most efficient or -simple to implement in terms of the underlying representation. - -The linear-update routines all have names ending with "!". - -Clients of these procedures *may not* rely upon these procedures working by -side effect. For example, this is not guaranteed to work: - - (let* ((cs1 (char-set #\a #\b #\c)) ; cs1 = {a,b,c}. - (cs2 (char-set-adjoin! cs1 #\d))) ; Add d to {a,b,c}. - cs1) ; Could be either {a,b,c} or {a,b,c,d}. - -However, this is well-defined: - - (let ((cs (char-set #\a #\b #\c))) - (char-set-adjoin! cs #\d)) ; Add d to {a,b,c}. - -So clients of these procedures write in a functional style, but must -additionally be sure that, when the procedure is called, there are no other -live pointers to the potentially-modified character set (hence the term -"linear update"). - -There are two benefits to this convention: - - Implementations are free to provide the most efficient possible - implementation, either functional or side-effecting. - - Programmers may nonetheless continue to assume that character sets - are purely functional data structures: they may be reliably shared - without needing to be copied, uniquified, and so forth. - -Note that pure functional representations are the right thing for -ASCII- or Latin-1-based Scheme implementations, since a char-set can -be represented in an ASCII Scheme with 4 32-bit words. Pure set-algebra -operations on such a representation are very fast and efficient. Programmers -who code using linear-update operations are guaranteed the system will -provide the best implementation across multiple platforms. - -In practice, these procedures are most useful for efficiently constructing -character sets in a side-effecting manner, in some limited local context, -before passing the character set outside the local construction scope to be -used in a functional manner. - -Scheme provides no assistance in checking the linearity of the potentially -side-effected parameters passed to these functions --- there's no linear -type checker or run-time mechanism for detecting violations. (But -sophisticated programming environments, such as DrScheme, might help.) - -** Extra-SRFI recommendations -============================= -Users are cautioned that the R5RS predicates - CHAR-ALPHABETIC? - CHAR-NUMERIC? - CHAR-WHITESPACE? - CHAR-UPPER-CASE? - CHAR-LOWER-CASE? -may or may not be in agreement with the SRFI 14 base character sets - CHAR-SET:LETTER - CHAR-SET:DIGIT - CHAR-SET:WHITESPACE - CHAR-SET:UPPER-CASE - CHAR-SET:LOWER-CASE -Implementors are strongly encouraged to bring these predicates into -agreement with the base character sets of this SRFI; not to do so risks -major confusion. - - -------------------------------------------------------------------------------- -* Specification ---------------- - -In the following procedure specifications: - - A CS parameter is a character set. - - - An S parameter is a string. - - - A CHAR parameter is a character. - - - A CHAR-LIST parameter is a list of characters. - - - A PRED parameter is a unary character predicate procedure, returning - a true/false value when applied to a character. - - - An OBJ parameter may be any value at all. - -Passing values to procedures with these parameters that do not satisfy these -types is an error. - -Unless otherwise noted in the specification of a procedure, procedures -always return character sets that are distinct (from the point of view -of the linear-update operations) from the parameter character sets. For -example, CHAR-SET-ADJOIN is guaranteed to provide a fresh character set, -even if it is not given any character parameters. - -Parameters given in square brackets are optional. Unless otherwise noted in -the text describing the procedure, any prefix of these optional parameters may -be supplied, from zero arguments to the full list. When a procedure returns -multiple values, this is shown by listing the return values in square -brackets, as well. So, for example, the procedure with signature - - halts? f [x init-store] -> [boolean integer] - -would take one (F), two (F, X) or three (F, X, INPUT-STORE) input parameters, -and return two values, a boolean and an integer. - -A parameter followed by "..." means zero-or-more elements. So the procedure -with the signature - sum-squares x ... -> number -takes zero or more arguments (X ...), while the procedure with signature - spell-check doc dict1 dict2 ... -> string-list -takes two required parameters (DOC and DICT1) and zero or more -optional parameters (DICT2 ...). - - -** General procedures -===================== -char-set? obj -> boolean - Is the object OBJ a character set? - -char-set= cs1 ... -> boolean - Are the character sets equal? - - Boundary cases: - (char-set=) => true - (char-set= cs) => true - - Rationale: transitive binary relations are generally extended to n-ary - relations in Scheme, which enables clearer, more concise code to be - written. While the zero-argument and one-argument cases will almost - certainly not arise in first-order uses of such relations, they may well - arise in higher-order cases or macro-generated code. E.g., consider - (apply char-set= cset-list) - This is well-defined if the list is empty or a singleton list. Hence - we extend these relations to any number of arguments. Implementors - have reported actual uses of n-ary relations in higher-order cases - allowing for fewer than two arguments. The way of Scheme is to handle the - general case; we provide the fully general extension. - - A counter-argument to this extension is that R5RS's transitive binary - arithmetic relations (=, <, etc.) require at least two arguments, hence - this decision is a break with the prior convention -- although it is - at least one that is backwards-compatible. - -char-set<= cs1 ... -> boolean - Returns true if every character set CSi is a subset of character set CSi+1. - - Boundary cases: - (char-set<=) => true - (char-set<= cs) => true - - Rationale: See CHAR-SET= for discussion of zero- and one-argument - applications. Consider testing a list of char-sets for monotonicity - with (APPLY CHAR-SET<= CSET-LIST). - -char-set-hash cs [bound] -> integer - Compute a hash value for the character set CS. BOUND is a non-negative - exact integer specifying the range of the hash function. A positive - value restricts the return value to the range [0,BOUND). - - If BOUND is either zero or not given, the implementation may use - an implementation-specific default value, chosen to be as large as - is efficiently practical. For instance, the default range might be chosen - for a given implementation to map all character sets into the range of - integers that can be represented with a single machine word. - - Invariant: - (char-set= cs1 cs2) => (= (char-set-hash cs1 b) (char-set-hash cs2 b)) - - A legal but nonetheless discouraged implementation: - (define (char-set-hash cs . maybe-bound) 1) - - Rationale: allowing the user to specify an explicit bound simplifies user - code by removing the mod operation that typically accompanies every hash - computation, and also may allow the implementation of the hash function to - exploit a reduced range to efficiently compute the hash value. E.g., for - small bounds, the hash function may be computed in a fashion such that - intermediate values never overflow into bignum integers, allowing the - implementor to provide a fixnum-specific "fast path" for computing the - common cases very rapidly. - -** Iterating over character sets -=================================== - -char-set-cursor cset -> cursor -char-set-ref cset cursor -> char -char-set-cursor-next cset cursor -> cursor -end-of-char-set? cursor -> boolean - Cursors are a low-level facility for iterating over the characters in a - set. A cursor is a value that indexes a character in a char set. - CHAR-SET-CURSOR produces a new cursor for a given char set. The set - element indexed by the cursor is fetched with CHAR-SET-REF. A cursor index - is incremented with CHAR-SET-CURSOR-NEXT; in this way, code can step - through every character in a char set. Stepping a cursor "past the end" of - a char set produces a cursor that answers true to END-OF-CHAR-SET?. It is - an error to pass such a cursor to CHAR-SET-REF or to CHAR-SET-CURSOR-NEXT. - - A cursor value may not be used in conjunction with a different character - set; if it is passed to CHAR-SET-REF or CHAR-SET-CURSOR-NEXT with - a character set other than the one used to create it, the results and - effects are undefined. - - Cursor values are *not* necessarily distinct from other types. They may be - integers, linked lists, records, procedures or other values. This license - is granted to allow cursors to be very "lightweight" values suitable for - tight iteration, even in fairly simple implementations. - - Note that these primitives are necessary to export an iteration facility - for char sets to loop macros. - - Example: - - (define cs (char-set #\G #\a #\T #\e #\c #\h)) - - ;; Collect elts of CS into a list. - (let lp ((cur (char-set-cursor cs)) (ans '())) - (if (end-of-char-set? cur) ans - (lp (char-set-cursor-next cs cur) - (cons (char-set-ref cs cur) ans)))) - => (#\G #\T #\a #\c #\e #\h) - - ;; Equivalently, using a list unfold (from SRFI 1): - (unfold-right end-of-char-set? - (curry char-set-ref cs) - (curry char-set-cursor-next cs) - (char-set-cursor cs)) - => (#\G #\T #\a #\c #\e #\h) - - Rationale: Note that the cursor API's four functions "fit" the functional - protocol used by the unfolders provided by the list, string and char-set - SRFIs (see the example above). By way of contrast, here is a simpler, - two-function API that was rejected for failing this criterion. Besides - CHAR-SET-CURSOR, it provided a single function that mapped a cursor and a - character set to two values, the indexed character and the next cursor. If - the cursor had exhausted the character set, then this function returned - false instead of the character value, and another end-of-char-set cursor. - In this way, the other three functions of the current API were combined - together. - -char-set-fold kons knil cs -> object - This is the fundamental iterator for character sets. Applies the function - KONS across the character set CS using initial state value KNIL. That is, - if CS is the empty set, the procedure returns KNIL. Otherwise, some - element c of CS is chosen; let cs' be the remaining, unchosen characters. - The procedure returns - (char-set-fold KONS (KONS c KNIL) cs') - - Examples: - ;; CHAR-SET-MEMBERS - (lambda (cs) (char-set-fold cons '() cs)) - - ;; CHAR-SET-SIZE - (lambda (cs) (char-set-fold (lambda (c i) (+ i 1)) 0 cs)) - - ;; How many vowels in the char set? - (lambda (cs) - (char-set-fold (lambda (c i) (if (vowel? c) (+ i 1) i)) - 0 cs)) - -char-set-unfold f p g seed [base-cs] -> char-set -char-set-unfold! f p g seed base-cs -> char-set - This is a fundamental constructor for char-sets. - - G is used to generate a series of "seed" values from the initial seed: - SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... - - P tells us when to stop -- when it returns true when applied to one - of these seed values. - - F maps each seed value to a character. These characters are added - to the base character set BASE-CS to form the result; BASE-CS defaults to - the empty set. CHAR-SET-UNFOLD! adds the characters to BASE-CS in a - linear-update -- it is allowed, but not required, to side-effect - and use BASE-CS's storage to construct the result. - - More precisely, the following definitions hold, ignoring the - optional-argument issues: - - (define (char-set-unfold p f g seed base-cs) - (char-set-unfold! p f g seed (char-set-copy base-cs))) - - (define (char-set-unfold! p f g seed base-cs) - (let lp ((seed seed) (cs base-cs)) - (if (p seed) cs ; P says we are done. - (lp (g seed) ; Loop on (G SEED). - (char-set-adjoin! cs (f seed)))))) ; Add (F SEED) to set. - - (Note that the actual implementation may be more efficient.) - - Examples: - - (port->char-set p) = (char-set-unfold eof-object? values - (lambda (x) (read-char p)) - (read-char p)) - - (list->char-set lis) = (char-set-unfold null? car cdr lis) - -char-set-for-each proc cs -> unspecified - Apply procedure PROC to each character in the character set CS. - Note that the order in which PROC is applied to the characters in the - set is not specified, and may even change from one procedure application - to another. - - Nothing at all is specified about the value returned by this procedure; it - is not even required to be consistent from call to call. It is simply - required to be a value (or values) that may be passed to a command - continuation, e.g. as the value of an expression appearing as a - non-terminal subform of a BEGIN expression. Note that in R5RS, this - restricts the procedure to returning a single value; non-R5RS systems may - not even provide this restriction. - -char-set-map proc cs -> char-set - PROC is a char->char procedure. Apply it to all the characters in - the char-set CS, and collect the results into a new character set. - - Essentially lifts PROC from a char->char procedure to a char-set -> - char-set procedure. - - Example: - (char-set-map char-downcase cset) - - -** Creating character sets -========================== -char-set-copy cs -> char-set - Returns a copy of the character set CS. "Copy" means that if either the - input parameter or the result value of this procedure is passed to one of - the linear-update procedures described below, the other character set is - guaranteed not to be altered. - - A system that provides pure-functional implementations of the - linear-operator suite could implement this procedure as the identity - function -- so copies are *not* guaranteed to be distinct by EQ?. - -char-set char1 ... -> char-set - Return a character set containing the given characters. - -list->char-set char-list [base-cs] -> char-set -list->char-set! char-list base-cs -> char-set - Return a character set containing the characters in the list of - characters CHAR-LIST. - - If character set BASE-CS is provided, the characters from CHAR-LIST - are added to it. LIST->CHAR-SET! is allowed, but not required, - to side-effect and reuse the storage in BASE-CS; LIST->CHAR-SET - produces a fresh character set. - -string->char-set s [base-cs] -> char-set -string->char-set! s base-cs -> char-set - Return a character set containing the characters in the string S. - - If character set BASE-CS is provided, the characters from S are added to - it. STRING->CHAR-SET! is allowed, but not required, to side-effect and - reuse the storage in BASE-CS; STRING->CHAR-SET produces a fresh character - set. - -char-set-filter pred cs [base-cs] -> char-set -char-set-filter! pred cs base-cs -> char-set - Returns a character set containing every character c in CS - such that (PRED c) returns true. - - If character set BASE-CS is provided, the characters specified by PRED - are added to it. CHAR-SET-FILTER! is allowed, but not required, - to side-effect and reuse the storage in BASE-CS; CHAR-SET-FILTER - produces a fresh character set. - - An implementation may not save away a reference to PRED and invoke it - after CHAR-SET-FILTER or CHAR-SET-FILTER! returns -- that is, "lazy," - on-demand implementations are not allowed, as PRED may have external - dependencies on mutable data or have other side-effects. - - Rationale: This procedure provides a means of converting a character - predicate into its equivalent character set; the CS parameter allows the - programmer to bound the predicate's domain. Programmers should be aware - that filtering a character set such as CHAR-SET:FULL could be a very - expensive operation in an implementation that provided an extremely large - character type, such as 32-bit Unicode. An earlier draft of this library - provided a simple PREDICATE->CHAR-SET procedure, which was rejected in - favor of CHAR-SET-FILTER for this reason. - -ucs-range->char-set lower upper [error? base-cs] -> char-set -ucs-range->char-set! lower upper error? base-cs -> char-set - LOWER and UPPER are exact non-negative integers; LOWER <= UPPER. - - Returns a character set containing every character whose ISO/IEC 10646 - UCS-4 code lies in the half-open range [LOWER,UPPER). - - - If the requested range includes unassigned UCS values, these are - silently ignored (the current UCS specification has "holes" in the - space of assigned codes). - - - If the requested range includes "private" or "user space" codes, these - are handled in an implementation-specific manner; however, a UCS- or - Unicode-based Scheme implementation should pass them through - transparently. - - - If any code from the requested range specifies a valid, assigned - UCS character that has no corresponding representative in the - implementation's character type, then (1) an error is raised if ERROR? - is true, and (2) the code is ignored if ERROR? is false (the default). - This might happen, for example, if the implementation uses ASCII - characters, and the requested range includes non-ASCII characters. - - If character set BASE-CS is provided, the characters specified by the - range are added to it. UCS-RANGE->CHAR-SET! is allowed, but not required, - to side-effect and reuse the storage in BASE-CS; UCS-RANGE->CHAR-SET - produces a fresh character set. - - Note that ASCII codes are a subset of the Latin-1 codes, which are in turn - a subset of the 16-bit Unicode codes, which are themselves a subset of the - 32-bit UCS-4 codes. We commit to a specific encoding in this routine, - regardless of the underlying representation of characters, so that client - code using this library will be portable. I.e., a conformant Scheme - implementation may use EBCDIC or SHIFT-JIS to encode characters; it must - simply map the UCS characters from the given range into the native - representation when possible, and report errors when not possible. - -->char-set x -> char-set - Coerces X into a char-set. X may be a string, character or char-set. A - string is converted to the set of its constituent characters; a character - is converted to a singleton set; a char-set is returned as-is. This - procedure is intended for use by other procedures that want to provide - "user-friendly," wide-spectrum interfaces to their clients. - - -** Querying character sets -========================== -char-set-size cs -> integer - Returns the number of elements in character set CS. - -char-set-count pred cs -> integer - Apply PRED to the chars of character set CS, and return the number - of chars that caused the predicate to return true. - -char-set->list cs -> character-list - This procedure returns a list of the members of character set CS. - The order in which CS's characters appear in the list is not defined, - and may be different from one call to another. - -char-set->string cs -> string - This procedure returns a string containing the members of character set CS. - The order in which CS's characters appear in the string is not defined, - and may be different from one call to another. - -char-set-contains? cs char -> boolean - This procedure tests CHAR for membership in character set CS. - - The MIT Scheme character-set package called this procedure - CHAR-SET-MEMBER?, but the argument order isn't consistent with the name. - -char-set-every pred cs -> boolean -char-set-any pred cs -> object - The CHAR-SET-EVERY procedure returns true if predicate PRED - returns true of every character in the character set CS. - - Likewise, CHAR-SET-ANY applies PRED to every character in - character set CS, and returns the first true value it finds. - If no character produces a true value, it returns false. - - The order in which these procedures sequence through the elements of - CS is not specified. - - Note that if you need to determine the actual character on which a - predicate returns true, use CHAR-SET-ANY and arrange for the predicate - to return the character parameter as its true value, e.g. - (char-set-any (lambda (c) (and (char-upper-case? c) c)) - cs) - - -** Character-set algebra -======================== -char-set-adjoin cs char1 ... -> char-set -char-set-delete cs char1 ... -> char-set - Add/delete the CHARi characters to/from character set CS. - -char-set-adjoin! cs char1 ... -> char-set -char-set-delete! cs char1 ... -> char-set - Linear-update variants. These procedures are allowed, but not - required, to side-effect their first parameter. - -char-set-complement cs -> char-set -char-set-union cs1 ... -> char-set -char-set-intersection cs1 ... -> char-set -char-set-difference cs1 cs2 ... -> char-set -char-set-xor cs1 ... -> char-set -char-set-diff+intersection cs1 cs2 ... -> [char-set char-set] - These procedures implement set complement, union, intersection, - difference, and exclusive-or for character sets. The union, intersection - and xor operations are n-ary. The difference function is also n-ary, - associates to the left (that is, it computes the difference between - its first argument and the union of all the other arguments), - and requires at least one argument. - - Boundary cases: - (char-set-union) => char-set:empty - (char-set-intersection) => char-set:all - (char-set-xor) => char-set:empty - (char-set-difference cs) => cs - - CHAR-SET-DIFF+INTERSECTION returns both the difference and the - intersection of the arguments -- it partitions its first parameter. - It is equivalent to - (values (char-set-difference cs1 cs2 ...) - (char-set-intersection cs1 (char-set-union cs2 ...))) - but can be implemented more efficiently. - - Programmers should be aware that CHAR-SET-COMPLEMENT could potentially - be a very expensive operation in Scheme implementations that provide - a very large character type, such as 32-bit Unicode. If this is a - possibility, sets can be complimented with respect to a smaller - universe using CHAR-SET-DIFFERENCE. - -char-set-complement! cs -> char-set -char-set-union! cs1 cs2 ... -> char-set -char-set-intersection! cs1 cs2 ... -> char-set -char-set-difference! cs1 cs2 ... -> char-set -char-set-xor! cs1 cs2 ... -> char-set -char-set-diff+intersection! cs1 cs2 cs3 ... -> [char-set char-set] - These are linear-update variants of the set-algebra functions. - They are allowed, but not required, to side-effect their first - (required) parameter. - - - CHAR-SET-DIFF+INTERSECTION! is allowed to side-effect both of - its two required parameters, CS1 and CS2. - -** Standard character sets -========================== -Several character sets are predefined for convenience: - char-set:lower-case Lower-case letters - char-set:upper-case Upper-case letters - char-set:title-case Title-case letters - char-set:letter Letters - char-set:digit Digits - char-set:letter+digit Letters and digits - char-set:graphic Printing characters except spaces - char-set:printing Printing characters including spaces - char-set:whitespace Whitespace characters - char-set:iso-control The ISO control characters - char-set:punctuation Punctuation characters - char-set:symbol Symbol characters - char-set:hex-digit A hexadecimal digit: 0-9, A-F, a-f - char-set:blank Blank characters -- horizontal whitespace - char-set:ascii All characters in the ASCII set. - char-set:empty Empty set - char-set:full All characters - -Note that there may be characters in CHAR-SET:LETTER that are neither upper or -lower case---this might occur in implementations that use a character type -richer than ASCII, such as Unicode. A "graphic character" is one that would -put ink on your page. While the exact composition of these sets may vary -depending upon the character type provided by the underlying Scheme system, -here are the definitions for some of the sets in an ASCII implementation: - - char-set:lower-case a-z - char-set:upper-case A-Z - char-set:letter A-Z and a-z - char-set:digit 0123456789 - char-set:punctuation !"#%&'()*,-./:;?@[\]_{} - char-set:symbol $+<=>^`|~ - char-set:whitespace Space, newline, tab, form feed, - vertical tab, carriage return - char-set:blank Space and tab - char-set:graphic letter + digit + punctuation + symbol - char-set:printing graphic + whitespace - char-set:iso-control ASCII 0-31 and 127 - -Note that the existence of the CHAR-SET:ASCII set implies that the underlying -character set is required to be at least as rich as ASCII (including -ASCII's control characters). - -Rationale: The name choices reflect a shift from the older -"alphabetic/numeric" terms found in R5RS and Posix to newer, -Unicode-influenced "letter/digit" lexemes. - -------------------------------------------------------------------------------- -* Unicode, Latin-1 and ASCII definitions of the standard character sets ------------------------------------------------------------------------ - -In Unicode Scheme implementations, the base character sets are compatible with -Java's Unicode specifications. For ASCII or Latin-1, we simply restrict the -Unicode set specifications to their first 128 or 256 codes, respectively. -Scheme implementations that are not based on ASCII, Latin-1 or Unicode should -attempt to preserve the sense or spirit of these definitions. - -The following descriptions frequently make reference to the "Unicode character -database." This is a file, available at URL - ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt -Each line contains a description of a Unicode character. The first -semicolon-delimited field of the line gives the hex value of the character's -code; the second field gives the name of the character, and the third field -gives a two-letter category. Other fields give simple 1-1 case-mappings for -the character and other information; see - ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html -for further description of the file's format. Note in particular the -two-letter category specified in the the third field, which is referenced -frequently in the descriptions below. - -** char-set:lower-case -====================== -For Unicode, we follow Java's specification: a character is lowercase if - + it is not in the range [U+2000,U+2FFF], and - + the Unicode attribute table does not give a lowercase mapping for it, and - + at least one of the following is true: - - the Unicode attribute table gives a mapping to uppercase - for the character, or - - the name for the character in the Unicode attribute table contains - the words "SMALL LETTER" or "SMALL LIGATURE". - -The lower-case ASCII characters are - abcdefghijklmnopqrstuvwxyz -Latin-1 adds another 33 lower-case characters to the ASCII set: - 00B5 MICRO SIGN - 00DF LATIN SMALL LETTER SHARP S - 00E0 LATIN SMALL LETTER A WITH GRAVE - 00E1 LATIN SMALL LETTER A WITH ACUTE - 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX - 00E3 LATIN SMALL LETTER A WITH TILDE - 00E4 LATIN SMALL LETTER A WITH DIAERESIS - 00E5 LATIN SMALL LETTER A WITH RING ABOVE - 00E6 LATIN SMALL LETTER AE - 00E7 LATIN SMALL LETTER C WITH CEDILLA - 00E8 LATIN SMALL LETTER E WITH GRAVE - 00E9 LATIN SMALL LETTER E WITH ACUTE - 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX - 00EB LATIN SMALL LETTER E WITH DIAERESIS - 00EC LATIN SMALL LETTER I WITH GRAVE - 00ED LATIN SMALL LETTER I WITH ACUTE - 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX - 00EF LATIN SMALL LETTER I WITH DIAERESIS - 00F0 LATIN SMALL LETTER ETH - 00F1 LATIN SMALL LETTER N WITH TILDE - 00F2 LATIN SMALL LETTER O WITH GRAVE - 00F3 LATIN SMALL LETTER O WITH ACUTE - 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX - 00F5 LATIN SMALL LETTER O WITH TILDE - 00F6 LATIN SMALL LETTER O WITH DIAERESIS - 00F8 LATIN SMALL LETTER O WITH STROKE - 00F9 LATIN SMALL LETTER U WITH GRAVE - 00FA LATIN SMALL LETTER U WITH ACUTE - 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX - 00FC LATIN SMALL LETTER U WITH DIAERESIS - 00FD LATIN SMALL LETTER Y WITH ACUTE - 00FE LATIN SMALL LETTER THORN - 00FF LATIN SMALL LETTER Y WITH DIAERESIS -Note that three of these have no corresponding Latin-1 upper-case character: - 00B5 MICRO SIGN - 00DF LATIN SMALL LETTER SHARP S - 00FF LATIN SMALL LETTER Y WITH DIAERESIS -(The compatibility micro character uppercases to the non-Latin-1 Greek capital -mu; the German sharp s character uppercases to the pair of characters "SS," -and the capital y-with-diaeresis is non-Latin-1.) - -(Note that the Java spec for lowercase characters given at - http://java.sun.com/docs/books/jls/html/javalang.doc4.html#14345 -is inconsistent. U+00B5 MICRO SIGN fulfills the requirements for a lower-case -character (as of Unicode 3.0), but is not given in the numeric list of -lower-case character codes.) - -(Note that the Java spec for isLowerCase() given at - http://java.sun.com/products/jdk/1.2/docs/api/java/lang/Character.html#isLowerCase(char) -gives three mutually inconsistent definitions of "lower case." The first is -the definition used in this SRFI. Following text says "A character is -considered to be lowercase if and only if it is specified to be lowercase by -the Unicode 2.0 standard (category Ll in the Unicode specification data -file)." The former spec excludes U+00AA FEMININE ORDINAL INDICATOR and -U+00BA MASCULINE ORDINAL INDICATOR; the later spec includes them. Finally, -the spec enumerates a list of characters in the Latin-1 subset; this list -excludes U+00B5 MICRO SIGN, which is included in both of the previous specs.) - - -** char-set:upper-case -====================== -For Unicode, we follow Java's specification: a character is uppercase if - + it is not in the range [U+2000,U+2FFF], and - + the Unicode attribute table does not give an uppercase mapping for it - (this excludes titlecase characters), and - + at least one of the following is true: - - the Unicode attribute table gives a mapping to lowercase - for the character, or - - the name for the character in the Unicode attribute table contains - the words "CAPITAL LETTER" or "CAPITAL LIGATURE". - -The upper-case ASCII characters are - ABCDEFGHIJKLMNOPQRSTUVWXYZ -Latin-1 adds another 30 upper-case characters to the ASCII set: - 00C0 LATIN CAPITAL LETTER A WITH GRAVE - 00C1 LATIN CAPITAL LETTER A WITH ACUTE - 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 00C3 LATIN CAPITAL LETTER A WITH TILDE - 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS - 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE - 00C6 LATIN CAPITAL LETTER AE - 00C7 LATIN CAPITAL LETTER C WITH CEDILLA - 00C8 LATIN CAPITAL LETTER E WITH GRAVE - 00C9 LATIN CAPITAL LETTER E WITH ACUTE - 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 00CB LATIN CAPITAL LETTER E WITH DIAERESIS - 00CC LATIN CAPITAL LETTER I WITH GRAVE - 00CD LATIN CAPITAL LETTER I WITH ACUTE - 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 00CF LATIN CAPITAL LETTER I WITH DIAERESIS - 00D0 LATIN CAPITAL LETTER ETH - 00D1 LATIN CAPITAL LETTER N WITH TILDE - 00D2 LATIN CAPITAL LETTER O WITH GRAVE - 00D3 LATIN CAPITAL LETTER O WITH ACUTE - 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 00D5 LATIN CAPITAL LETTER O WITH TILDE - 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS - 00D8 LATIN CAPITAL LETTER O WITH STROKE - 00D9 LATIN CAPITAL LETTER U WITH GRAVE - 00DA LATIN CAPITAL LETTER U WITH ACUTE - 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 00DC LATIN CAPITAL LETTER U WITH DIAERESIS - 00DD LATIN CAPITAL LETTER Y WITH ACUTE - 00DE LATIN CAPITAL LETTER THORN - - -** char-set:title-case -====================== -In Unicode, a character is titlecase if it has the category Lt in -the character attribute database. There are very few of these characters; -here is the entire 31-character list as of Unicode 3.0: - - 01C5 LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON - 01C8 LATIN CAPITAL LETTER L WITH SMALL LETTER J - 01CB LATIN CAPITAL LETTER N WITH SMALL LETTER J - 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z - 1F88 GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI - 1F89 GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI - 1F8A GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI - 1F8B GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI - 1F8C GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI - 1F8D GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI - 1F8E GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - 1F8F GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - 1F98 GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI - 1F99 GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI - 1F9A GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI - 1F9B GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI - 1F9C GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI - 1F9D GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI - 1F9E GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - 1F9F GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - 1FA8 GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI - 1FA9 GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI - 1FAA GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI - 1FAB GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI - 1FAC GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI - 1FAD GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI - 1FAE GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - 1FAF GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - 1FBC GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI - 1FCC GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI - 1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI - -There are no ASCII or Latin-1 titlecase characters. - - -** char-set:letter -================== -In Unicode, a letter is any character with one of the letter categories -(Lu, Ll, Lt, Lm, Lo) in the Unicode character database. - -There are 52 ASCII letters - abcdefghijklmnopqrstuvwxyz - ABCDEFGHIJKLMNOPQRSTUVWXYZ - -There are 117 Latin-1 letters. These are the 115 characters that are -members of the Latin-1 CHAR-SET:LOWER-CASE and CHAR-SET:UPPER-CASE sets, -plus - 00AA FEMININE ORDINAL INDICATOR - 00BA MASCULINE ORDINAL INDICATOR -(These two letters are considered lower-case by Unicode, but not by -Java or SRFI 14.) - -** char-set:digit -================= -In Unicode, a character is a digit if it has the category Nd in -the character attribute database. In Latin-1 and ASCII, the only -such characters are 0123456789. In Unicode, there are other digit -characters in other code blocks, such as Gujarati digits and Tibetan -digits. - - -** char-set:hex-digit -===================== -The only hex digits are 0123456789abcdefABCDEF. - - -** char-set:letter+digit -======================== -The union of CHAR-SET:LETTER and CHAR-SET:DIGIT. - - -** char-set:graphic -=================== -A graphic character is one that would put ink on paper. The ASCII and Latin-1 -graphic characters are the members of - CHAR-SET:LETTER - CHAR-SET:DIGIT - CHAR-SET:PUNCTUATION - CHAR-SET:SYMBOL - - -** char-set:printing -==================== -A printing character is one that would occupy space when printed, i.e., -a graphic character or a space character. CHAR-SET:PRINTING is the union -of CHAR-SET:WHITESPACE and CHAR-SET:GRAPHIC. - - -** char-set:whitespace -====================== -In Unicode, a whitespace character is either - - a character with one of the space, line, or paragraph separator categories - (Zs, Zl or Zp) of the Unicode character database. - - U+0009 Horizontal tabulation (\t control-I) - - U+000A Line feed (\n control-J) - - U+000B Vertical tabulation (\v control-K) - - U+000C Form feed (\f control-L) - - U+000D Carriage return (\r control-M) - -There are 24 whitespace characters in Unicode 3.0: - 0009 HORIZONTAL TABULATION \t control-I - 000A LINE FEED \n control-J - 000B VERTICAL TABULATION \v control-K - 000C FORM FEED \f control-L - 000D CARRIAGE RETURN \r control-M - 0020 SPACE Zs - 00A0 NO-BREAK SPACE Zs - 1680 OGHAM SPACE MARK Zs - 2000 EN QUAD Zs - 2001 EM QUAD Zs - 2002 EN SPACE Zs - 2003 EM SPACE Zs - 2004 THREE-PER-EM SPACE Zs - 2005 FOUR-PER-EM SPACE Zs - 2006 SIX-PER-EM SPACE Zs - 2007 FIGURE SPACE Zs - 2008 PUNCTUATION SPACE Zs - 2009 THIN SPACE Zs - 200A HAIR SPACE Zs - 200B ZERO WIDTH SPACE Zs - 2028 LINE SEPARATOR Zl - 2029 PARAGRAPH SEPARATOR Zp - 202F NARROW NO-BREAK SPACE Zs - 3000 IDEOGRAPHIC SPACE Zs - -The ASCII whitespace characters are the first six characters in the above list --- line feed, horizontal tabulation, vertical tabulation, form feed, carriage -return, and space. These are also exactly the characters recognised by the -Posix isspace() procedure. Latin-1 adds the no-break space. - -Note: Java's isWhitespace() method is incompatible, including - 001C FILE SEPARATOR (control-\) - 001D GROUP SEPARATOR (control-]) - 001E RECORD SEPARATOR (control-^) - 001F UNIT SEPARATOR (control-_) -and excluding - 00A0 NO-BREAK SPACE - -Java's excluding the no-break space means that tokenizers can simply break -character streams at "whitespace" boundaries. However, the exclusion introduces -exceptions in other places, e.g. CHAR-SET:PRINTING is no longer simply the -union of CHAR-SET:GRAPHIC and CHAR-SET:WHITESPACE. - - -** char-set:iso-control -======================= -The ISO control characters are the Unicode/Latin-1 characters in the ranges -[U+0000,U+001F] and [U+007F,U+009F]. - -ASCII restricts this set to the characters in the range [U+0000,U+001F] -plus the character U+007F. - -Note that Unicode defines other control characters which do not belong to this -set (hence the qualifying prefix "iso-" in the name). This restriction is -compatible with the Java IsISOControl() method. - - -** char-set:punctuation -======================= -In Unicode, a punctuation character is any character that has one of the -punctuation categories in the Unicode character database (Pc, Pd, Ps, -Pe, Pi, Pf, or Po.) - -ASCII has 23 punctuation characters: - !"#%&'()*,-./:;?@[\]_{} - -Latin-1 adds six more: - 00A1 INVERTED EXCLAMATION MARK - 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 00AD SOFT HYPHEN - 00B7 MIDDLE DOT - 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 00BF INVERTED QUESTION MARK - -Note that the nine ASCII characters $+<=>^`|~ are *not* punctuation. -They are "symbols." - - -** char-set:symbol -================== -In Unicode, a symbol is any character that has one of the symbol categories -in the Unicode character database (Sm, Sc, Sk, or So). There are nine ASCII -symbol characters: - $+<=>^`|~ - -Latin-1 adds 18 more: - 00A2 CENT SIGN - 00A3 POUND SIGN - 00A4 CURRENCY SIGN - 00A5 YEN SIGN - 00A6 BROKEN BAR - 00A7 SECTION SIGN - 00A8 DIAERESIS - 00A9 COPYRIGHT SIGN - 00AC NOT SIGN - 00AE REGISTERED SIGN - 00AF MACRON - 00B0 DEGREE SIGN - 00B1 PLUS-MINUS SIGN - 00B4 ACUTE ACCENT - 00B6 PILCROW SIGN - 00B8 CEDILLA - 00D7 MULTIPLICATION SIGN - 00F7 DIVISION SIGN - - -** char-set:blank -================= -Blank chars are horizontal whitespace. In Unicode, a blank character is either - - a character with the space separator category (Zs) in the Unicode - character database. - - U+0009 Horizontal tabulation (\t control-I) - -There are eighteen blank characters in Unicode 3.0: - 0009 HORIZONTAL TABULATION \t control-I - 0020 SPACE Zs - 00A0 NO-BREAK SPACE Zs - 1680 OGHAM SPACE MARK Zs - 2000 EN QUAD Zs - 2001 EM QUAD Zs - 2002 EN SPACE Zs - 2003 EM SPACE Zs - 2004 THREE-PER-EM SPACE Zs - 2005 FOUR-PER-EM SPACE Zs - 2006 SIX-PER-EM SPACE Zs - 2007 FIGURE SPACE Zs - 2008 PUNCTUATION SPACE Zs - 2009 THIN SPACE Zs - 200A HAIR SPACE Zs - 200B ZERO WIDTH SPACE Zs - 202F NARROW NO-BREAK SPACE Zs - 3000 IDEOGRAPHIC SPACE Zs - -The ASCII blank characters are the first two characters above -- -horizontal tab and space. Latin-1 adds the no-break space. - -Java doesn't have the concept of "blank" characters, so there are no -compatibility issues. - - -------------------------------------------------------------------------------- -* Reference implementation --------------------------- - -This SRFI comes with a reference implementation. It resides at: - http://srfi.schemers.org/srfi-14/srfi-14.scm -I have placed this source on the Net with an unencumbered, "open" copyright. -Some of the code in the reference implementation bears a distant family -relation to the MIT Scheme implementation, and being derived from that code, -is covered by the MIT Scheme copyright (which is a generic BSD-style -open-source copyright -- see the source file for details). The remainder of -the code was written by myself for scsh or for this SRFI; I have placed this -code under the scsh copyright, which is also a generic BSD-style open-source -copyright. - -The code is written for portability and should be simple to port to -any Scheme. It has only the following deviations from R4RS, clearly -discussed in the comments: - - an ERROR procedure; - - the R5RS VALUES procedure for producing multiple return values; - - a simple CHECK-ARG procedure for argument checking; - - LET-OPTIONALS* and :OPTIONAL macros for for parsing, checking & defaulting - optional arguments from rest lists; - - The SRFI-19 DEFINE-RECORD-TYPE form; - - BITWISE-AND for the hash function; - - %LATIN1->CHAR & %CHAR->LATIN1. - -The library is written for clarity and well-commented; the current source is -about 375 lines of source code and 375 lines of comments and white space. -It is also written for efficiency. Fast paths are provided for common cases. - -This is not to say that the implementation can't be tuned up for -a specific Scheme implementation. There are notes in comments addressing -ways implementors can tune the reference implementation for performance. - -In short, I've written the reference implementation to make it as painless -as possible for an implementor -- or a regular programmer -- to adopt this -library and get good results with it. - -The code uses a rather simple-minded, inefficient representation for -ASCII/Latin-1 char-sets -- a 256-character string. The character whose code is -I is in the set if S[I] = ASCII 1 (soh, or ^a); not in the set if S[I] = ASCII -0 (nul). A much faster and denser representation would be 16 or 32 bytes worth -of bit string. A portable implementation using bit sets awaits standards for -bitwise logical-ops and byte vectors. - -"Large" character types, such as Unicode, should use a sparse representation, -taking care that the Latin-1 subset continues to be represented with a -dense 32-byte bit set. - - -------------------------------------------------------------------------------- -* Acknowledgements ------------------- - -The design of this library benefited greatly from the feedback provided during -the SRFI discussion phase. Among those contributing thoughtful commentary and -suggestions, both on the mailing list and by private discussion, were Paolo -Amoroso, Lars Arvestad, Alan Bawden, Jim Bender, Dan Bornstein, Per Bothner, -Will Clinger, Brian Denheyer, Kent Dybvig, Sergei Egorov, Marc Feeley, -Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Arthur A. Gleckler, Ben -Goetter, Sven Hartrumpf, Erik Hilsdale, Shiro Kawai, Richard Kelsey, Oleg -Kiselyov, Bengt Kleberg, Donovan Kolbly, Bruce Korb, Shriram Krishnamurthi, -Bruce Lewis, Tom Lord, Brad Lucier, Dave Mason, David Rush, Klaus Schilling, -Jonathan Sobel, Mike Sperber, Mikael Staldal, Vladimir Tsyshevsky, Donald -Welsh, and Mike Wilson. I am grateful to them for their assistance. - -I am also grateful the authors, implementors and documentors of all the systems -mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted -for their work in producing Web-accessible versions of the R5RS and Common -Lisp spec, which was a tremendous aid. - -This is not to imply that these individuals necessarily endorse the final -results, of course. - -During this document's long development period, great patience was exhibited -by Mike Sperber, who is the editor for the SRFI, and by Hillary Sullivan, -who is not. - -------------------------------------------------------------------------------- -* References & links --------------------- - -[Java] - The following URLs provide documentation on relevant Java classes. - - http://java.sun.com/products/jdk/1.2/docs/api/java/lang/Character.html - http://java.sun.com/products/jdk/1.2/docs/api/java/lang/String.html - http://java.sun.com/products/jdk/1.2/docs/api/java/lang/StringBuffer.html - http://java.sun.com/products/jdk/1.2/docs/api/java/text/Collator.html - http://java.sun.com/products/jdk/1.2/docs/api/java/text/package-summary.html - -[MIT-Scheme] - http://www.swiss.ai.mit.edu/projects/scheme/ - -[R5RS] - Revised^5 report on the algorithmic language Scheme. - R. Kelsey, W. Clinger, J. Rees (editors). - Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998. - and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998. - - Available at http://www.schemers.org/Documents/Standards/ - -[SRFI] - The SRFI web site. - http://srfi.schemers.org/ - -[SRFI-14] - SRFI-14: Character-set library. - http://srfi.schemers.org/srfi-14/ - - This document, in HTML: - http://srfi.schemers.org/srfi-14/srfi-14.html - This document, in plain text format: - http://srfi.schemers.org/srfi-14/srfi-14.txt - Source code for the reference implementation: - http://srfi.schemers.org/srfi-14/srfi-14.scm - Scheme 48 module specification, with typings: - http://srfi.schemers.org/srfi-14/srfi-14-s48-module.scm - Regression-test suite: - http://srfi.schemers.org/srfi-14/srfi-14-tests.scm - -[Unicode] - http://www.unicode.org/ - -[UnicodeData] - The Unicode character database. - ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html - ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt - - -------------------------------------------------------------------------------- -* Copyright ------------ - -Certain portions of this document -- the specific, marked segments of text -describing the R5RS procedures -- were adapted with permission from the R5RS -report. - -All other text is copyright (C) Olin Shivers (1998, 1999). -All Rights Reserved. - -This document and translations of it may be copied and furnished to others, -and derivative works that comment on or otherwise explain it or assist in its -implementation may be prepared, copied, published and distributed, in whole or -in part, without restriction of any kind, provided that the above copyright -notice and this paragraph are included on all such copies and derivative -works. However, this document itself may not be modified in any way, such as -by removing the copyright notice or references to the Scheme Request For -Implementation process or editors, except as needed for the purpose of -developing SRFIs in which case the procedures for copyrights defined in the -SRFI process must be followed, or as required to translate it into languages -other than English. - -The limited permissions granted above are perpetual and will not be revoked by -the authors or their successors or assigns. - -This document and the information contained herein is provided on an "AS IS" -basis and THE AUTHORS AND THE SRFI EDITORS DISCLAIM ALL WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE -INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF -MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - -------------------------------------------------------------------------------- -* Ispell "buffer local" dictionary ----------------------------------- - -Ispell dumps "buffer local" words here. Please ignore. - - LocalWords: SRFI Unicode API RS lib ARG ascii xor diff defs Generalise cs CSi - LocalWords: kons knil proc upcase cset lp eof lis cdr pred ary CHARi Posix op - LocalWords: uniquified DrScheme soh nul HTML srfi html txt scm Clinger Rees - LocalWords: SIGPLAN refs ucs iso CS's downcase IEC conformant JIS ASCII URL - LocalWords: FFF abcdefghijklmnopqrstuvwxyz DF DIAERESIS AE EA EB EC EE EF ETH - LocalWords: FA FB FC FD FF SS diaeresis isLowerCase Ll AA BA titlecase CA CB - LocalWords: CC CD CE CF DA DC DD Lt CARON PSILI PROSGEGRAMMENI DASIA VARIA Lu - LocalWords: OXIA PERISPOMENI FAA FAB FAC FAE FAF FBC FFC Lm Lo abcdefABCDEF - LocalWords: Zs Zl Zp OGHAM IDEOGRAPHIC recognised isspace isWhitespace Pc Pd - LocalWords: tokenizers IsISOControl Ps Pe Pf AB BB BF Sm Sc Sk AC AF MACRON - LocalWords: PILCROW obj EQ scsh ops UnicodeData Paolo Amoroso Arvestad Bawden - LocalWords: Bornstein Bothner Denheyer Dybvig Egorov Feeley Matthias Flatt eq - LocalWords: Felleisen Gleckler Goetter Sven Hartrumpf Hilsdale Shiro Kawai - LocalWords: Kiselyov Bengt Kleberg Kolbly Korb Shriram Krishnamurthi Lucier - LocalWords: Schilling Sobel Mikael Staldal Tsyshevsky documentors Jaffer ans - LocalWords: Sperber bignum fixnum ref init doc dict subform diff --git a/scsh/lib/cset-obsolete.scm b/scsh/lib/cset-obsolete.scm deleted file mode 100644 index cb041c4..0000000 --- a/scsh/lib/cset-obsolete.scm +++ /dev/null @@ -1,56 +0,0 @@ -;;; Support for obsolete, deprecated 0.5.2 char-set procedures. -;;; Will go away in a future release. - -(define-interface obsolete-char-set-interface - (export char-set-members ; char-set->list - chars->char-set ; list->char-set - ascii-range->char-set ; ucs-range->char-set (not exact) - predicate->char-set ; char-set-filter (not exact) - ;->char-set ; no longer handles a predicate - char-set-every? ; char-set-every - char-set-any? ; char-set-any - - char-set-invert ; char-set-complement - char-set-invert! ; char-set-complement! - - char-set:alphabetic ; char-set:letter - char-set:numeric ; char-set:digit - char-set:alphanumeric ; char-set:letter+digit - char-set:control)) ; char-set:iso-control - - -(define-structure obsolete-char-set-lib obsolete-char-set-interface - (open scsh-utilities char-set-lib scheme) - (begin - - (define char-set-members - (deprecated-proc char-set->list 'char-set-members - "Use CHAR-SET->LIST instead.")) - (define chars->char-set - (deprecated-proc list->char-set 'chars->char-set - "Use LIST->CHAR-SET instead.")) - (define ascii-range->char-set - (deprecated-proc (lambda (lower upper) (ucs-range->char-set lower upper #t)) - 'ascii-range->char-set - "Use UCS-RANGE->CHAR-SET instead.")) - (define predicate->char-set - (deprecated-proc (lambda (pred) (char-set-filter pred char-set:full)) - 'predicate->char-set - "Change code to use CHAR-SET-FILTER.")) - (define char-set-every? - (deprecated-proc char-set-every 'char-set-every? - "Use CHAR-SET-EVERY instead.")) - (define char-set-any? - (deprecated-proc char-set-every 'char-set-any? - "Use CHAR-SET-ANY instead.")) - (define char-set-invert - (deprecated-proc char-set-complement 'char-set-invert - "Use CHAR-SET-COMPLEMENT instead.")) - (define char-set-invert! - (deprecated-proc char-set-complement! 'char-set-invert! - "Use CHAR-SET-COMPLEMENT! instead.")) - - (define char-set:alphabetic char-set:letter) - (define char-set:numeric char-set:digit) - (define char-set:alphanumeric char-set:letter+digit) - (define char-set:control char-set:iso-control))) diff --git a/scsh/lib/cset-package.scm b/scsh/lib/cset-package.scm deleted file mode 100644 index 0b0dcd7..0000000 --- a/scsh/lib/cset-package.scm +++ /dev/null @@ -1,151 +0,0 @@ -;;; SRFI-14 interface for Scheme48 -*- Scheme -*- -;;; -;;; Complete interface spec for the SRFI-14 char-set-lib library in the -;;; Scheme48 interface and module language. The interface is fully typed, in -;;; the Scheme48 type notation. The structure definitions also provide a -;;; formal description of the external dependencies of the source code. - -(define-interface char-set-interface - (export (char-set? (proc (:value) :boolean)) - ((char-set= char-set<=) (proc (&rest :value) :boolean)) - - (char-set-hash (proc (:value &opt :exact-integer) :exact-integer)) - - ;; Cursors are exact integers in the reference implementation. - ;; These typings would be different with a different cursor - ;; implementation. - ;; Too bad Scheme doesn't have abstract data types. - (char-set-cursor (proc (:value) :exact-integer)) - (char-set-ref (proc (:value :exact-integer) :char)) - (char-set-cursor-next (proc (:value :exact-integer) :exact-integer)) - (end-of-char-set? (proc (:value) :boolean)) - - (char-set-fold (proc ((proc (:char :value) :value) :value :value) - :value)) - (char-set-unfold (proc ((proc (:value) :boolean) - (proc (:value) :value) - (proc (:value) :value) - :value - &opt :value) - :value)) - - (char-set-unfold! (proc ((proc (:value) :boolean) - (proc (:value) :value) - (proc (:value) :value) - :value :value) - :value)) - - (char-set-for-each (proc ((proc (:char) :values) :value) :unspecific)) - (char-set-map (proc ((proc (:char) :char) :value) :value)) - - (char-set-copy (proc (:value) :value)) - - (char-set (proc (&rest :char) :value)) - - (list->char-set (proc (:value &opt :value) :value)) - (list->char-set! (proc (:value :value) :value)) - - (string->char-set (proc (:value &opt :value) :value)) - (string->char-set! (proc (:value :value) :value)) - - (ucs-range->char-set (proc (:exact-integer :exact-integer &opt - :boolean :value) - :value)) - (ucs-range->char-set! (proc (:exact-integer :exact-integer - :boolean :value) - :value)) - - (char-set-filter (proc ((proc (:char) :boolean) :value &opt :value) :value)) - (char-set-filter! (proc ((proc (:char) :boolean) :value :value) :value)) - - (->char-set (proc (:value) :value)) - - (char-set-size (proc (:value) :exact-integer)) - (char-set-count (proc ((proc (:char) :boolean) :value) :exact-integer)) - (char-set-contains? (proc (:value :value) :boolean)) - - (char-set-every (proc ((proc (:char) :boolean) :value) :boolean)) - (char-set-any (proc ((proc (:char) :boolean) :value) :value)) - - ((char-set-adjoin char-set-delete - char-set-adjoin! char-set-delete!) - (proc (:value &rest :char) :value)) - - (char-set->list (proc (:value) :value)) - (char-set->string (proc (:value) :string)) - - (char-set-complement (proc (:value) :value)) - ((char-set-union char-set-intersection char-set-xor) - (proc (&rest :value) :value)) - - (char-set-difference (proc (:value &opt :value) :value)) - - (char-set-diff+intersection (proc (:value &rest :value) - (some-values :value :value))) - - (char-set-complement! (proc (:value) :value)) - - ((char-set-union! char-set-intersection! - char-set-xor! char-set-difference!) - (proc (:value &opt :value) :value)) - - (char-set-diff+intersection! (proc (:value :value &rest :value) - (some-values :value :value))) - - char-set:lower-case - char-set:upper-case - char-set:letter - char-set:digit - char-set:letter+digit - char-set:graphic - char-set:printing - char-set:whitespace - char-set:blank - char-set:iso-control - char-set:punctuation - char-set:symbol - char-set:hex-digit - char-set:ascii - char-set:empty - char-set:full - )) - -; rdelim.scm gets into the innards of char-sets. -(define-interface scsh-char-set-low-level-interface - (export (char-set:s (proc (:value) :string)))) - -(define-structures ((char-set-lib char-set-interface) - (scsh-char-set-low-level-lib scsh-char-set-low-level-interface)) - (open error-package ; ERROR procedure - let-opt ; LET-OPTIONALS* and :OPTIONAL - ascii ; CHAR->ASCII ASCII->CHAR - bitwise ; BITWISE-AND - jar-d-r-t-package ; DEFINE-RECORD-TYPE/JAR macro. - scheme) - - (begin (define (check-arg pred val caller) - (let lp ((val val)) - (if (pred val) val (lp (error "Bad argument" val pred caller))))) - - (define %latin1->char ascii->char) ; Works for S48 - (define %char->latin1 char->ascii) ; Works for S48 - - ;; Here's a SRFI-19 d-r-t defined in terms of jar's almost-identical - ;; d-r-t. - (define-syntax define-record-type - (syntax-rules () - ((define-record-type ?name ?stuff ...) - (define-record-type/jar ?name ?name ?stuff ...))))) - - (files cset-lib) - (optimize auto-integrate)) - -;;; Import jar's DEFINE-RECORD-TYPE macro, and export it under the -;;; name DEFINE-RECORD-TYPE/JAR. -(define-structure jar-d-r-t-package (export (define-record-type/jar :syntax)) - (open define-record-types ; JAR's record macro - scheme) - (begin (define-syntax define-record-type/jar - (syntax-rules () - ((define-record-type/jar ?stuff ...) - (define-record-type ?stuff ...)))))) diff --git a/scsh/lib/cset-tests.scm b/scsh/lib/cset-tests.scm deleted file mode 100644 index 0b96314..0000000 --- a/scsh/lib/cset-tests.scm +++ /dev/null @@ -1,200 +0,0 @@ -;;; This is a regression testing suite for the SRFI-14 char-set library. -;;; Olin Shivers - -(let-syntax ((test (syntax-rules () - ((test form ...) - (cond ((not form) (error "Test failed" 'form)) ... - (else 'OK)))))) - (let ((vowel (lambda (c) (member c '(#\a #\e #\i #\o #\u))))) - -(test - (not (char-set? 5)) - - (char-set? (char-set #\a #\e #\i #\o #\u)) - - (char-set=) - (char-set= (char-set)) - - (char-set= (char-set #\a #\e #\i #\o #\u) - (string->char-set "ioeauaiii")) - - (not (char-set= (char-set #\e #\i #\o #\u) - (string->char-set "ioeauaiii"))) - - (char-set<=) - (char-set<= (char-set)) - - (char-set<= (char-set #\a #\e #\i #\o #\u) - (string->char-set "ioeauaiii")) - - (char-set<= (char-set #\e #\i #\o #\u) - (string->char-set "ioeauaiii")) - - (<= 0 (char-set-hash char-set:graphic 100) 99) - - (= 4 (char-set-fold (lambda (c i) (+ i 1)) 0 - (char-set #\e #\i #\o #\u #\e #\e))) - - (char-set= (string->char-set "eiaou2468013579999") - (char-set-unfold null? car cdr '(#\a #\e #\i #\o #\u #\u #\u) - char-set:digit)) - - (char-set= (string->char-set "eiaou246801357999") - (char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u) - (string->char-set "0123456789"))) - - (not (char-set= (string->char-set "eiaou246801357") - (char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u) - (string->char-set "0123456789")))) - - (let ((cs (string->char-set "0123456789"))) - (char-set-for-each (lambda (c) (set! cs (char-set-delete cs c))) - (string->char-set "02468000")) - (char-set= cs (string->char-set "97531"))) - - (not (let ((cs (string->char-set "0123456789"))) - (char-set-for-each (lambda (c) (set! cs (char-set-delete cs c))) - (string->char-set "02468")) - (char-set= cs (string->char-set "7531")))) - - (char-set= (char-set-map char-upcase (string->char-set "aeiou")) - (string->char-set "IOUAEEEE")) - - (not (char-set= (char-set-map char-upcase (string->char-set "aeiou")) - (string->char-set "OUAEEEE"))) - - (char-set= (char-set-copy (string->char-set "aeiou")) - (string->char-set "aeiou")) - - (char-set= (char-set #\x #\y) (string->char-set "xy")) - (not (char-set= (char-set #\x #\y #\z) (string->char-set "xy"))) - - (char-set= (string->char-set "xy") (list->char-set '(#\x #\y))) - (not (char-set= (string->char-set "axy") (list->char-set '(#\x #\y)))) - - (char-set= (string->char-set "xy12345") - (list->char-set '(#\x #\y) (string->char-set "12345"))) - (not (char-set= (string->char-set "y12345") - (list->char-set '(#\x #\y) (string->char-set "12345")))) - - (char-set= (string->char-set "xy12345") - (list->char-set! '(#\x #\y) (string->char-set "12345"))) - (not (char-set= (string->char-set "y12345") - (list->char-set! '(#\x #\y) (string->char-set "12345")))) - - (char-set= (string->char-set "aeiou12345") - (char-set-filter vowel? char-set:ascii (string->char-set "12345"))) - (not (char-set= (string->char-set "aeou12345") - (char-set-filter vowel? char-set:ascii (string->char-set "12345")))) - - (char-set= (string->char-set "aeiou12345") - (char-set-filter! vowel? char-set:ascii (string->char-set "12345"))) - (not (char-set= (string->char-set "aeou12345") - (char-set-filter! vowel? char-set:ascii (string->char-set "12345")))) - - - (char-set= (string->char-set "abcdef12345") - (ucs-range->char-set 97 103 #t (string->char-set "12345"))) - (not (char-set= (string->char-set "abcef12345") - (ucs-range->char-set 97 103 #t (string->char-set "12345")))) - - (char-set= (string->char-set "abcdef12345") - (ucs-range->char-set! 97 103 #t (string->char-set "12345"))) - (not (char-set= (string->char-set "abcef12345") - (ucs-range->char-set! 97 103 #t (string->char-set "12345")))) - - - (char-set= (->char-set #\x) - (->char-set "x") - (->char-set (char-set #\x))) - - (not (char-set= (->char-set #\x) - (->char-set "y") - (->char-set (char-set #\x)))) - - (= 10 (char-set-size (char-set-intersection char-set:ascii char-set:digit))) - - (= 5 (char-set-count vowel? char-set:ascii)) - - (equal? '(#\x) (char-set->list (char-set #\x))) - (not (equal? '(#\X) (char-set->list (char-set #\x)))) - - (equal? "x" (char-set->string (char-set #\x))) - (not (equal? "X" (char-set->string (char-set #\x)))) - - (char-set-contains? (->char-set "xyz") #\x) - (not (char-set-contains? (->char-set "xyz") #\a)) - - (char-set-every char-lower-case? (->char-set "abcd")) - (not (char-set-every char-lower-case? (->char-set "abcD"))) - (char-set-any char-lower-case? (->char-set "abcd")) - (not (char-set-any char-lower-case? (->char-set "ABCD"))) - - (char-set= (->char-set "ABCD") - (let ((cs (->char-set "abcd"))) - (let lp ((cur (char-set-cursor cs)) (ans '())) - (if (end-of-char-set? cur) (list->char-set ans) - (lp (char-set-cursor-next cs cur) - (cons (char-upcase (char-set-ref cs cur)) ans)))))) - - - (char-set= (char-set-adjoin (->char-set "123") #\x #\a) - (->char-set "123xa")) - (not (char-set= (char-set-adjoin (->char-set "123") #\x #\a) - (->char-set "123x"))) - (char-set= (char-set-adjoin! (->char-set "123") #\x #\a) - (->char-set "123xa")) - (not (char-set= (char-set-adjoin! (->char-set "123") #\x #\a) - (->char-set "123x"))) - - (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2) - (->char-set "13")) - (not (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2) - (->char-set "13a"))) - (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2) - (->char-set "13")) - (not (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2) - (->char-set "13a"))) - - (char-set= (char-set-intersection char-set:hex-digit (char-set-complement char-set:digit)) - (->char-set "abcdefABCDEF")) - (char-set= (char-set-intersection! (char-set-complement! (->char-set "0123456789")) - char-set:hex-digit) - (->char-set "abcdefABCDEF")) - - (char-set= (char-set-union char-set:hex-digit - (->char-set "abcdefghijkl")) - (->char-set "abcdefABCDEFghijkl0123456789")) - (char-set= (char-set-union! (->char-set "abcdefghijkl") - char-set:hex-digit) - (->char-set "abcdefABCDEFghijkl0123456789")) - - (char-set= (char-set-difference (->char-set "abcdefghijklmn") - char-set:hex-digit) - (->char-set "ghijklmn")) - (char-set= (char-set-difference! (->char-set "abcdefghijklmn") - char-set:hex-digit) - (->char-set "ghijklmn")) - - (char-set= (char-set-xor (->char-set "0123456789") - char-set:hex-digit) - (->char-set "abcdefABCDEF")) - (char-set= (char-set-xor! (->char-set "0123456789") - char-set:hex-digit) - (->char-set "abcdefABCDEF")) - - (call-with-values (lambda () - (char-set-diff+intersection char-set:hex-digit - char-set:letter)) - (lambda (d i) - (and (char-set= d (->char-set "0123456789")) - (char-set= i (->char-set "abcdefABCDEF"))))) - - (call-with-values (lambda () - (char-set-diff+intersection! (char-set-copy char-set:hex-digit) - (char-set-copy char-set:letter))) - (lambda (d i) - (and (char-set= d (->char-set "0123456789")) - (char-set= i (->char-set "abcdefABCDEF")))))) - -)) diff --git a/scsh/lib/list-lib.scm b/scsh/lib/list-lib.scm deleted file mode 100644 index 7386882..0000000 --- a/scsh/lib/list-lib.scm +++ /dev/null @@ -1,1599 +0,0 @@ -;;; SRFI-1 list-processing library -*- Scheme -*- -;;; Reference implementation -;;; -;;; Copyright (c) 1998, 1999 by Olin Shivers. You may do as you please with -;;; this code as long as you do not remove this copyright notice or -;;; hold me liable for its use. Please send bug reports to shivers@ai.mit.edu. -;;; -Olin - -;;; This is a library of list- and pair-processing functions. I wrote it after -;;; carefully considering the functions provided by the libraries found in -;;; R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common -;;; Lisp, Bigloo, guile, T, APL and the SML standard basis. It is a pretty -;;; rich toolkit, providing a superset of the functionality found in any of -;;; the various Schemes I considered. - -;;; This implementation is intended as a portable reference implementation -;;; for SRFI-1. See the porting notes below for more information. - -;;; Revision history -;;;;;;;;;;;;;;;;;;;; -;;; This is version 1.1. 12/18/2000 -;;; Fixes a small bug in DELETE-DUPLICATES!. - -;;; Exported: -;;; xcons tree-copy make-list list-tabulate cons* list-copy -;;; proper-list? circular-list? dotted-list? not-pair? null-list? list= -;;; circular-list length+ -;;; iota -;;; first second third fourth fifth sixth seventh eighth ninth tenth -;;; car+cdr -;;; take drop -;;; take-right drop-right -;;; take! drop-right! -;;; split-at split-at! -;;; last last-pair -;;; zip unzip1 unzip2 unzip3 unzip4 unzip5 -;;; count -;;; append! append-reverse append-reverse! concatenate concatenate! -;;; unfold fold pair-fold reduce -;;; unfold-right fold-right pair-fold-right reduce-right -;;; append-map append-map! map! pair-for-each filter-map map-in-order -;;; filter partition remove -;;; filter! partition! remove! -;;; find find-tail any every list-index -;;; take-while drop-while take-while! -;;; span break span! break! -;;; delete delete! -;;; alist-cons alist-copy -;;; delete-duplicates delete-duplicates! -;;; alist-delete alist-delete! -;;; reverse! -;;; lset<= lset= lset-adjoin -;;; lset-union lset-intersection lset-difference lset-xor lset-diff+intersection -;;; lset-union! lset-intersection! lset-difference! lset-xor! lset-diff+intersection! -;;; -;;; In principle, the following R4RS list- and pair-processing procedures -;;; are also part of this package's exports, although they are not defined -;;; in this file: -;;; Primitives: cons pair? null? car cdr set-car! set-cdr! -;;; Non-primitives: list length append reverse cadr ... cddddr list-ref -;;; memq memv assq assv -;;; (The non-primitives are defined in this file, but commented out.) -;;; -;;; These R4RS procedures have extended definitions in SRFI-1 and are defined -;;; in this file: -;;; map for-each member assoc -;;; -;;; The remaining two R4RS list-processing procedures are not included: -;;; list-tail (use drop) -;;; list? (use proper-list?) - - -;;; A note on recursion and iteration/reversal: -;;; Many iterative list-processing algorithms naturally compute the elements -;;; of the answer list in the wrong order (left-to-right or head-to-tail) from -;;; the order needed to cons them into the proper answer (right-to-left, or -;;; tail-then-head). One style or idiom of programming these algorithms, then, -;;; loops, consing up the elements in reverse order, then destructively -;;; reverses the list at the end of the loop. I do not do this. The natural -;;; and efficient way to code these algorithms is recursively. This trades off -;;; intermediate temporary list structure for intermediate temporary stack -;;; structure. In a stack-based system, this improves cache locality and -;;; lightens the load on the GC system. Don't stand on your head to iterate! -;;; Recurse, where natural. Multiple-value returns make this even more -;;; convenient, when the recursion/iteration has multiple state values. - -;;; Porting: -;;; This is carefully tuned code; do not modify casually. -;;; - It is careful to share storage when possible; -;;; - Side-effecting code tries not to perform redundant writes. -;;; -;;; That said, a port of this library to a specific Scheme system might wish -;;; to tune this code to exploit particulars of the implementation. -;;; The single most important compiler-specific optimisation you could make -;;; to this library would be to add rewrite rules or transforms to: -;;; - transform applications of n-ary procedures (e.g. LIST=, CONS*, APPEND, -;;; LSET-UNION) into multiple applications of a primitive two-argument -;;; variant. -;;; - transform applications of the mapping functions (MAP, FOR-EACH, FOLD, -;;; ANY, EVERY) into open-coded loops. The killer here is that these -;;; functions are n-ary. Handling the general case is quite inefficient, -;;; requiring many intermediate data structures to be allocated and -;;; discarded. -;;; - transform applications of procedures that take optional arguments -;;; into calls to variants that do not take optional arguments. This -;;; eliminates unnecessary consing and parsing of the rest parameter. -;;; -;;; These transforms would provide BIG speedups. In particular, the n-ary -;;; mapping functions are particularly slow and cons-intensive, and are good -;;; candidates for tuning. I have coded fast paths for the single-list cases, -;;; but what you really want to do is exploit the fact that the compiler -;;; usually knows how many arguments are being passed to a particular -;;; application of these functions -- they are usually explicitly called, not -;;; passed around as higher-order values. If you can arrange to have your -;;; compiler produce custom code or custom linkages based on the number of -;;; arguments in the call, you can speed these functions up a *lot*. But this -;;; kind of compiler technology no longer exists in the Scheme world as far as -;;; I can see. -;;; -;;; Note that this code is, of course, dependent upon standard bindings for -;;; the R5RS procedures -- i.e., it assumes that the variable CAR is bound -;;; to the procedure that takes the car of a list. If your Scheme -;;; implementation allows user code to alter the bindings of these procedures -;;; in a manner that would be visible to these definitions, then there might -;;; be trouble. You could consider horrible kludgery along the lines of -;;; (define fact -;;; (let ((= =) (- -) (* *)) -;;; (letrec ((real-fact (lambda (n) -;;; (if (= n 0) 1 (* n (real-fact (- n 1))))))) -;;; real-fact))) -;;; Or you could consider shifting to a reasonable Scheme system that, say, -;;; has a module system protecting code from this kind of lossage. -;;; -;;; This code does a fair amount of run-time argument checking. If your -;;; Scheme system has a sophisticated compiler that can eliminate redundant -;;; error checks, this is no problem. However, if not, these checks incur -;;; some performance overhead -- and, in a safe Scheme implementation, they -;;; are in some sense redundant: if we don't check to see that the PROC -;;; parameter is a procedure, we'll find out anyway three lines later when -;;; we try to call the value. It's pretty easy to rip all this argument -;;; checking code out if it's inappropriate for your implementation -- just -;;; nuke every call to CHECK-ARG. -;;; -;;; On the other hand, if you *do* have a sophisticated compiler that will -;;; actually perform soft-typing and eliminate redundant checks (Rice's systems -;;; being the only possible candidate of which I'm aware), leaving these checks -;;; in can *help*, since their presence can be elided in redundant cases, -;;; and in cases where they are needed, performing the checks early, at -;;; procedure entry, can "lift" a check out of a loop. -;;; -;;; Finally, I have only checked the properties that can portably be checked -;;; with R5RS Scheme -- and this is not complete. You may wish to alter -;;; the CHECK-ARG parameter checks to perform extra, implementation-specific -;;; checks, such as procedure arity for higher-order values. -;;; -;;; The code has only these non-R4RS dependencies: -;;; A few calls to an ERROR procedure; -;;; Uses of the R5RS multiple-value procedure VALUES and the m-v binding -;;; RECEIVE macro (which isn't R5RS, but is a trivial macro). -;;; Many calls to a parameter-checking procedure check-arg: -;;; (define (check-arg pred val caller) -;;; (let lp ((val val)) -;;; (if (pred val) val (lp (error "Bad argument" val pred caller))))) -;;; A few uses of the LET-OPTIONAL and :OPTIONAL macros for parsing -;;; optional arguments. -;;; -;;; Most of these procedures use the NULL-LIST? test to trigger the -;;; base case in the inner loop or recursion. The NULL-LIST? function -;;; is defined to be a careful one -- it raises an error if passed a -;;; non-nil, non-pair value. The spec allows an implementation to use -;;; a less-careful implementation that simply defines NULL-LIST? to -;;; be NOT-PAIR?. This would speed up the inner loops of these procedures -;;; at the expense of having them silently accept dotted lists. - -;;; A note on dotted lists: -;;; I, personally, take the view that the only consistent view of lists -;;; in Scheme is the view that *everything* is a list -- values such as -;;; 3 or "foo" or 'bar are simply empty dotted lists. This is due to the -;;; fact that Scheme actually has no true list type. It has a pair type, -;;; and there is an *interpretation* of the trees built using this type -;;; as lists. -;;; -;;; I lobbied to have these list-processing procedures hew to this -;;; view, and accept any value as a list argument. I was overwhelmingly -;;; overruled during the SRFI discussion phase. So I am inserting this -;;; text in the reference lib and the SRFI spec as a sort of "minority -;;; opinion" dissent. -;;; -;;; Many of the procedures in this library can be trivially redefined -;;; to handle dotted lists, just by changing the NULL-LIST? base-case -;;; check to NOT-PAIR?, meaning that any non-pair value is taken to be -;;; an empty list. For most of these procedures, that's all that is -;;; required. -;;; -;;; However, we have to do a little more work for some procedures that -;;; *produce* lists from other lists. Were we to extend these procedures to -;;; accept dotted lists, we would have to define how they terminate the lists -;;; produced as results when passed a dotted list. I designed a coherent set -;;; of termination rules for these cases; this was posted to the SRFI-1 -;;; discussion list. I additionally wrote an earlier version of this library -;;; that implemented that spec. It has been discarded during later phases of -;;; the definition and implementation of this library. -;;; -;;; The argument *against* defining these procedures to work on dotted -;;; lists is that dotted lists are the rare, odd case, and that by -;;; arranging for the procedures to handle them, we lose error checking -;;; in the cases where a dotted list is passed by accident -- e.g., when -;;; the programmer swaps a two arguments to a list-processing function, -;;; one being a scalar and one being a list. For example, -;;; (member '(1 3 5 7 9) 7) -;;; This would quietly return #f if we extended MEMBER to accept dotted -;;; lists. -;;; -;;; The SRFI discussion record contains more discussion on this topic. - - -;;; Constructors -;;;;;;;;;;;;;;;; - -;;; Occasionally useful as a value to be passed to a fold or other -;;; higher-order procedure. -(define (xcons d a) (cons a d)) - -;;;; Recursively copy every cons. -;(define (tree-copy x) -; (let recur ((x x)) -; (if (not (pair? x)) x -; (cons (recur (car x)) (recur (cdr x)))))) - -;;; Make a list of length LEN. - -(define (make-list len . maybe-elt) - (check-arg (lambda (n) (and (integer? n) (>= n 0))) len make-list) - (let ((elt (cond ((null? maybe-elt) #f) ; Default value - ((null? (cdr maybe-elt)) (car maybe-elt)) - (else (error "Too many arguments to MAKE-LIST" - (cons len maybe-elt)))))) - (do ((i len (- i 1)) - (ans '() (cons elt ans))) - ((<= i 0) ans)))) - - -;(define (list . ans) ans) ; R4RS - - -;;; Make a list of length LEN. Elt i is (PROC i) for 0 <= i < LEN. - -(define (list-tabulate len proc) - (check-arg (lambda (n) (and (integer? n) (>= n 0))) len list-tabulate) - (check-arg procedure? proc list-tabulate) - (do ((i (- len 1) (- i 1)) - (ans '() (cons (proc i) ans))) - ((< i 0) ans))) - -;;; (cons* a1 a2 ... an) = (cons a1 (cons a2 (cons ... an))) -;;; (cons* a1) = a1 (cons* a1 a2 ...) = (cons a1 (cons* a2 ...)) -;;; -;;; (cons first (unfold not-pair? car cdr rest values)) - -(define (cons* first . rest) - (let recur ((x first) (rest rest)) - (if (pair? rest) - (cons x (recur (car rest) (cdr rest))) - x))) - -;;; (unfold not-pair? car cdr lis values) - -(define (list-copy lis) - (let recur ((lis lis)) - (if (pair? lis) - (cons (car lis) (recur (cdr lis))) - lis))) - -;;; IOTA count [start step] (start start+step ... start+(count-1)*step) - -(define (iota count . maybe-start+step) - (check-arg integer? count iota) - (if (< count 0) (error "Negative step count" iota count)) - (let-optionals maybe-start+step ((start 0) (step 1)) - (check-arg number? start iota) - (check-arg number? step iota) - (let ((last-val (+ start (* (- count 1) step)))) - (do ((count count (- count 1)) - (val last-val (- val step)) - (ans '() (cons val ans))) - ((<= count 0) ans))))) - -;;; I thought these were lovely, but the public at large did not share my -;;; enthusiasm... -;;; :IOTA to (0 ... to-1) -;;; :IOTA from to (from ... to-1) -;;; :IOTA from to step (from from+step ...) - -;;; IOTA: to (1 ... to) -;;; IOTA: from to (from+1 ... to) -;;; IOTA: from to step (from+step from+2step ...) - -;(define (%parse-iota-args arg1 rest-args proc) -; (let ((check (lambda (n) (check-arg integer? n proc)))) -; (check arg1) -; (if (pair? rest-args) -; (let ((arg2 (check (car rest-args))) -; (rest (cdr rest-args))) -; (if (pair? rest) -; (let ((arg3 (check (car rest))) -; (rest (cdr rest))) -; (if (pair? rest) (error "Too many parameters" proc arg1 rest-args) -; (values arg1 arg2 arg3))) -; (values arg1 arg2 1))) -; (values 0 arg1 1)))) -; -;(define (iota: arg1 . rest-args) -; (receive (from to step) (%parse-iota-args arg1 rest-args iota:) -; (let* ((numsteps (floor (/ (- to from) step))) -; (last-val (+ from (* step numsteps)))) -; (if (< numsteps 0) (error "Negative step count" iota: from to step)) -; (do ((steps-left numsteps (- steps-left 1)) -; (val last-val (- val step)) -; (ans '() (cons val ans))) -; ((<= steps-left 0) ans))))) -; -; -;(define (:iota arg1 . rest-args) -; (receive (from to step) (%parse-iota-args arg1 rest-args :iota) -; (let* ((numsteps (ceiling (/ (- to from) step))) -; (last-val (+ from (* step (- numsteps 1))))) -; (if (< numsteps 0) (error "Negative step count" :iota from to step)) -; (do ((steps-left numsteps (- steps-left 1)) -; (val last-val (- val step)) -; (ans '() (cons val ans))) -; ((<= steps-left 0) ans))))) - - - -(define (circular-list val1 . vals) - (let ((ans (cons val1 vals))) - (set-cdr! (last-pair ans) ans) - ans)) - -;;; ::= () ; Empty proper list -;;; | (cons ) ; Proper-list pair -;;; Note that this definition rules out circular lists -- and this -;;; function is required to detect this case and return false. - -(define (proper-list? x) - (let lp ((x x) (lag x)) - (if (pair? x) - (let ((x (cdr x))) - (if (pair? x) - (let ((x (cdr x)) - (lag (cdr lag))) - (and (not (eq? x lag)) (lp x lag))) - (null? x))) - (null? x)))) - - -;;; A dotted list is a finite list (possibly of length 0) terminated -;;; by a non-nil value. Any non-cons, non-nil value (e.g., "foo" or 5) -;;; is a dotted list of length 0. -;;; -;;; ::= ; Empty dotted list -;;; | (cons ) ; Proper-list pair - -(define (dotted-list? x) - (let lp ((x x) (lag x)) - (if (pair? x) - (let ((x (cdr x))) - (if (pair? x) - (let ((x (cdr x)) - (lag (cdr lag))) - (and (not (eq? x lag)) (lp x lag))) - (not (null? x)))) - (not (null? x))))) - -(define (circular-list? x) - (let lp ((x x) (lag x)) - (and (pair? x) - (let ((x (cdr x))) - (and (pair? x) - (let ((x (cdr x)) - (lag (cdr lag))) - (or (eq? x lag) (lp x lag)))))))) - -(define (not-pair? x) (not (pair? x))) ; Inline me. - -;;; This is a legal definition which is fast and sloppy: -;;; (define null-list? not-pair?) -;;; but we'll provide a more careful one: -(define (null-list? l) - (cond ((pair? l) #f) - ((null? l) #t) - (else (error "null-list?: argument out of domain" l)))) - - -(define (list= = . lists) - (or (null? lists) ; special case - - (let lp1 ((list-a (car lists)) (others (cdr lists))) - (or (null? others) - (let ((list-b (car others)) - (others (cdr others))) - (if (eq? list-a list-b) ; EQ? => LIST= - (lp1 list-b others) - (let lp2 ((list-a list-a) (list-b list-b)) - (if (null-list? list-a) - (and (null-list? list-b) - (lp1 list-b others)) - (and (not (null-list? list-b)) - (= (car list-a) (car list-b)) - (lp2 (cdr list-a) (cdr list-b))))))))))) - - - -;;; R4RS, so commented out. -;(define (length x) ; LENGTH may diverge or -; (let lp ((x x) (len 0)) ; raise an error if X is -; (if (pair? x) ; a circular list. This version -; (lp (cdr x) (+ len 1)) ; diverges. -; len))) - -(define (length+ x) ; Returns #f if X is circular. - (let lp ((x x) (lag x) (len 0)) - (if (pair? x) - (let ((x (cdr x)) - (len (+ len 1))) - (if (pair? x) - (let ((x (cdr x)) - (lag (cdr lag)) - (len (+ len 1))) - (and (not (eq? x lag)) (lp x lag len))) - len)) - len))) - -(define (zip list1 . more-lists) (apply map list list1 more-lists)) - - -;;; Selectors -;;;;;;;;;;;;; - -;;; R4RS non-primitives: -;(define (caar x) (car (car x))) -;(define (cadr x) (car (cdr x))) -;(define (cdar x) (cdr (car x))) -;(define (cddr x) (cdr (cdr x))) -; -;(define (caaar x) (caar (car x))) -;(define (caadr x) (caar (cdr x))) -;(define (cadar x) (cadr (car x))) -;(define (caddr x) (cadr (cdr x))) -;(define (cdaar x) (cdar (car x))) -;(define (cdadr x) (cdar (cdr x))) -;(define (cddar x) (cddr (car x))) -;(define (cdddr x) (cddr (cdr x))) -; -;(define (caaaar x) (caaar (car x))) -;(define (caaadr x) (caaar (cdr x))) -;(define (caadar x) (caadr (car x))) -;(define (caaddr x) (caadr (cdr x))) -;(define (cadaar x) (cadar (car x))) -;(define (cadadr x) (cadar (cdr x))) -;(define (caddar x) (caddr (car x))) -;(define (cadddr x) (caddr (cdr x))) -;(define (cdaaar x) (cdaar (car x))) -;(define (cdaadr x) (cdaar (cdr x))) -;(define (cdadar x) (cdadr (car x))) -;(define (cdaddr x) (cdadr (cdr x))) -;(define (cddaar x) (cddar (car x))) -;(define (cddadr x) (cddar (cdr x))) -;(define (cdddar x) (cdddr (car x))) -;(define (cddddr x) (cdddr (cdr x))) - - -(define first car) -(define second cadr) -(define third caddr) -(define fourth cadddr) -(define (fifth x) (car (cddddr x))) -(define (sixth x) (cadr (cddddr x))) -(define (seventh x) (caddr (cddddr x))) -(define (eighth x) (cadddr (cddddr x))) -(define (ninth x) (car (cddddr (cddddr x)))) -(define (tenth x) (cadr (cddddr (cddddr x)))) - -(define (car+cdr pair) (values (car pair) (cdr pair))) - -;;; take & drop - -(define (take lis k) - (check-arg integer? k take) - (let recur ((lis lis) (k k)) - (if (zero? k) '() - (cons (car lis) - (recur (cdr lis) (- k 1)))))) - -(define (drop lis k) - (check-arg integer? k drop) - (let iter ((lis lis) (k k)) - (if (zero? k) lis (iter (cdr lis) (- k 1))))) - -(define (take! lis k) - (check-arg integer? k take!) - (if (zero? k) '() - (begin (set-cdr! (drop lis (- k 1)) '()) - lis))) - -;;; TAKE-RIGHT and DROP-RIGHT work by getting two pointers into the list, -;;; off by K, then chasing down the list until the lead pointer falls off -;;; the end. - -(define (take-right lis k) - (check-arg integer? k take-right) - (let lp ((lag lis) (lead (drop lis k))) - (if (pair? lead) - (lp (cdr lag) (cdr lead)) - lag))) - -(define (drop-right lis k) - (check-arg integer? k drop-right) - (let recur ((lag lis) (lead (drop lis k))) - (if (pair? lead) - (cons (car lag) (recur (cdr lag) (cdr lead))) - '()))) - -;;; In this function, LEAD is actually K+1 ahead of LAG. This lets -;;; us stop LAG one step early, in time to smash its cdr to (). -(define (drop-right! lis k) - (check-arg integer? k drop-right!) - (let ((lead (drop lis k))) - (if (pair? lead) - - (let lp ((lag lis) (lead (cdr lead))) ; Standard case - (if (pair? lead) - (lp (cdr lag) (cdr lead)) - (begin (set-cdr! lag '()) - lis))) - - '()))) ; Special case dropping everything -- no cons to side-effect. - -;(define (list-ref lis i) (car (drop lis i))) ; R4RS - -;;; These use the APL convention, whereby negative indices mean -;;; "from the right." I liked them, but they didn't win over the -;;; SRFI reviewers. -;;; K >= 0: Take and drop K elts from the front of the list. -;;; K <= 0: Take and drop -K elts from the end of the list. - -;(define (take lis k) -; (check-arg integer? k take) -; (if (negative? k) -; (list-tail lis (+ k (length lis))) -; (let recur ((lis lis) (k k)) -; (if (zero? k) '() -; (cons (car lis) -; (recur (cdr lis) (- k 1))))))) -; -;(define (drop lis k) -; (check-arg integer? k drop) -; (if (negative? k) -; (let recur ((lis lis) (nelts (+ k (length lis)))) -; (if (zero? nelts) '() -; (cons (car lis) -; (recur (cdr lis) (- nelts 1))))) -; (list-tail lis k))) -; -; -;(define (take! lis k) -; (check-arg integer? k take!) -; (cond ((zero? k) '()) -; ((positive? k) -; (set-cdr! (list-tail lis (- k 1)) '()) -; lis) -; (else (list-tail lis (+ k (length lis)))))) -; -;(define (drop! lis k) -; (check-arg integer? k drop!) -; (if (negative? k) -; (let ((nelts (+ k (length lis)))) -; (if (zero? nelts) '() -; (begin (set-cdr! (list-tail lis (- nelts 1)) '()) -; lis))) -; (list-tail lis k))) - -(define (split-at x k) - (check-arg integer? k split-at) - (let recur ((lis x) (k k)) - (if (zero? k) (values '() lis) - (receive (prefix suffix) (recur (cdr lis) (- k 1)) - (values (cons (car lis) prefix) suffix))))) - -(define (split-at! x k) - (check-arg integer? k split-at!) - (if (zero? k) (values '() x) - (let* ((prev (drop x (- k 1))) - (suffix (cdr prev))) - (set-cdr! prev '()) - (values x suffix)))) - - -(define (last lis) (car (last-pair lis))) - -(define (last-pair lis) - (check-arg pair? lis last-pair) - (let lp ((lis lis)) - (let ((tail (cdr lis))) - (if (pair? tail) (lp tail) lis)))) - - -;;; Unzippers -- 1 through 5 -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (unzip1 lis) (map car lis)) - -(define (unzip2 lis) - (let recur ((lis lis)) - (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle - (let ((elt (car lis))) ; dotted lists. - (receive (a b) (recur (cdr lis)) - (values (cons (car elt) a) - (cons (cadr elt) b))))))) - -(define (unzip3 lis) - (let recur ((lis lis)) - (if (null-list? lis) (values lis lis lis) - (let ((elt (car lis))) - (receive (a b c) (recur (cdr lis)) - (values (cons (car elt) a) - (cons (cadr elt) b) - (cons (caddr elt) c))))))) - -(define (unzip4 lis) - (let recur ((lis lis)) - (if (null-list? lis) (values lis lis lis lis) - (let ((elt (car lis))) - (receive (a b c d) (recur (cdr lis)) - (values (cons (car elt) a) - (cons (cadr elt) b) - (cons (caddr elt) c) - (cons (cadddr elt) d))))))) - -(define (unzip5 lis) - (let recur ((lis lis)) - (if (null-list? lis) (values lis lis lis lis lis) - (let ((elt (car lis))) - (receive (a b c d e) (recur (cdr lis)) - (values (cons (car elt) a) - (cons (cadr elt) b) - (cons (caddr elt) c) - (cons (cadddr elt) d) - (cons (car (cddddr elt)) e))))))) - - -;;; append! append-reverse append-reverse! concatenate concatenate! -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (append! . lists) - ;; First, scan through lists looking for a non-empty one. - (let lp ((lists lists) (prev '())) - (if (not (pair? lists)) prev - (let ((first (car lists)) - (rest (cdr lists))) - (if (not (pair? first)) (lp rest first) - - ;; Now, do the splicing. - (let lp2 ((tail-cons (last-pair first)) - (rest rest)) - (if (pair? rest) - (let ((next (car rest)) - (rest (cdr rest))) - (set-cdr! tail-cons next) - (lp2 (if (pair? next) (last-pair next) tail-cons) - rest)) - first))))))) - -;;; APPEND is R4RS. -;(define (append . lists) -; (if (pair? lists) -; (let recur ((list1 (car lists)) (lists (cdr lists))) -; (if (pair? lists) -; (let ((tail (recur (car lists) (cdr lists)))) -; (fold-right cons tail list1)) ; Append LIST1 & TAIL. -; list1)) -; '())) - -;(define (append-reverse rev-head tail) (fold cons tail rev-head)) - -;(define (append-reverse! rev-head tail) -; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) -; tail -; rev-head)) - -;;; Hand-inline the FOLD and PAIR-FOLD ops for speed. - -(define (append-reverse rev-head tail) - (let lp ((rev-head rev-head) (tail tail)) - (if (null-list? rev-head) tail - (lp (cdr rev-head) (cons (car rev-head) tail))))) - -(define (append-reverse! rev-head tail) - (let lp ((rev-head rev-head) (tail tail)) - (if (null-list? rev-head) tail - (let ((next-rev (cdr rev-head))) - (set-cdr! rev-head tail) - (lp next-rev rev-head))))) - - -(define (concatenate lists) (reduce-right append '() lists)) -(define (concatenate! lists) (reduce-right append! '() lists)) - -;;; Fold/map internal utilities -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; These little internal utilities are used by the general -;;; fold & mapper funs for the n-ary cases . It'd be nice if they got inlined. -;;; One the other hand, the n-ary cases are painfully inefficient as it is. -;;; An aggressive implementation should simply re-write these functions -;;; for raw efficiency; I have written them for as much clarity, portability, -;;; and simplicity as can be achieved. -;;; -;;; I use the dreaded call/cc to do local aborts. A good compiler could -;;; handle this with extreme efficiency. An implementation that provides -;;; a one-shot, non-persistent continuation grabber could help the compiler -;;; out by using that in place of the call/cc's in these routines. -;;; -;;; These functions have funky definitions that are precisely tuned to -;;; the needs of the fold/map procs -- for example, to minimize the number -;;; of times the argument lists need to be examined. - -;;; Return (map cdr lists). -;;; However, if any element of LISTS is empty, just abort and return '(). -(define (%cdrs lists) - (call-with-current-continuation - (lambda (abort) - (let recur ((lists lists)) - (if (pair? lists) - (let ((lis (car lists))) - (if (null-list? lis) (abort '()) - (cons (cdr lis) (recur (cdr lists))))) - '()))))) - -(define (%cars+ lists last-elt) ; (append! (map car lists) (list last-elt)) - (let recur ((lists lists)) - (if (pair? lists) (cons (caar lists) (recur (cdr lists))) (list last-elt)))) - -;;; LISTS is a (not very long) non-empty list of lists. -;;; Return two lists: the cars & the cdrs of the lists. -;;; However, if any of the lists is empty, just abort and return [() ()]. - -(define (%cars+cdrs lists) - (call-with-current-continuation - (lambda (abort) - (let recur ((lists lists)) - (if (pair? lists) - (receive (list other-lists) (car+cdr lists) - (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out - (receive (a d) (car+cdr list) - (receive (cars cdrs) (recur other-lists) - (values (cons a cars) (cons d cdrs)))))) - (values '() '())))))) - -;;; Like %CARS+CDRS, but we pass in a final elt tacked onto the end of the -;;; cars list. What a hack. -(define (%cars+cdrs+ lists cars-final) - (call-with-current-continuation - (lambda (abort) - (let recur ((lists lists)) - (if (pair? lists) - (receive (list other-lists) (car+cdr lists) - (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out - (receive (a d) (car+cdr list) - (receive (cars cdrs) (recur other-lists) - (values (cons a cars) (cons d cdrs)))))) - (values (list cars-final) '())))))) - -;;; Like %CARS+CDRS, but blow up if any list is empty. -(define (%cars+cdrs/no-test lists) - (let recur ((lists lists)) - (if (pair? lists) - (receive (list other-lists) (car+cdr lists) - (receive (a d) (car+cdr list) - (receive (cars cdrs) (recur other-lists) - (values (cons a cars) (cons d cdrs))))) - (values '() '())))) - - -;;; count -;;;;;;;;; -(define (count pred list1 . lists) - (check-arg procedure? pred count) - (if (pair? lists) - - ;; N-ary case - (let lp ((list1 list1) (lists lists) (i 0)) - (if (null-list? list1) i - (receive (as ds) (%cars+cdrs lists) - (if (null? as) i - (lp (cdr list1) ds - (if (apply pred (car list1) as) (+ i 1) i)))))) - - ;; Fast path - (let lp ((lis list1) (i 0)) - (if (null-list? lis) i - (lp (cdr lis) (if (pred (car lis)) (+ i 1) i)))))) - - -;;; fold/unfold -;;;;;;;;;;;;;;; - -(define (unfold-right p f g seed . maybe-tail) - (check-arg procedure? p unfold-right) - (check-arg procedure? f unfold-right) - (check-arg procedure? g unfold-right) - (let lp ((seed seed) (ans (:optional maybe-tail '()))) - (if (p seed) ans - (lp (g seed) - (cons (f seed) ans))))) - - -(define (unfold p f g seed . maybe-tail-gen) - (check-arg procedure? p unfold) - (check-arg procedure? f unfold) - (check-arg procedure? g unfold) - (if (pair? maybe-tail-gen) - - (let ((tail-gen (car maybe-tail-gen))) - (if (pair? (cdr maybe-tail-gen)) - (apply error "Too many arguments" unfold p f g seed maybe-tail-gen) - - (let recur ((seed seed)) - (if (p seed) (tail-gen seed) - (cons (f seed) (recur (g seed))))))) - - (let recur ((seed seed)) - (if (p seed) '() - (cons (f seed) (recur (g seed))))))) - - -(define (fold kons knil lis1 . lists) - (check-arg procedure? kons fold) - (if (pair? lists) - (let lp ((lists (cons lis1 lists)) (ans knil)) ; N-ary case - (receive (cars+ans cdrs) (%cars+cdrs+ lists ans) - (if (null? cars+ans) ans ; Done. - (lp cdrs (apply kons cars+ans))))) - - (let lp ((lis lis1) (ans knil)) ; Fast path - (if (null-list? lis) ans - (lp (cdr lis) (kons (car lis) ans)))))) - - -(define (fold-right kons knil lis1 . lists) - (check-arg procedure? kons fold-right) - (if (pair? lists) - (let recur ((lists (cons lis1 lists))) ; N-ary case - (let ((cdrs (%cdrs lists))) - (if (null? cdrs) knil - (apply kons (%cars+ lists (recur cdrs)))))) - - (let recur ((lis lis1)) ; Fast path - (if (null-list? lis) knil - (let ((head (car lis))) - (kons head (recur (cdr lis)))))))) - - -(define (pair-fold-right f zero lis1 . lists) - (check-arg procedure? f pair-fold-right) - (if (pair? lists) - (let recur ((lists (cons lis1 lists))) ; N-ary case - (let ((cdrs (%cdrs lists))) - (if (null? cdrs) zero - (apply f (append! lists (list (recur cdrs))))))) - - (let recur ((lis lis1)) ; Fast path - (if (null-list? lis) zero (f lis (recur (cdr lis))))))) - -(define (pair-fold f zero lis1 . lists) - (check-arg procedure? f pair-fold) - (if (pair? lists) - (let lp ((lists (cons lis1 lists)) (ans zero)) ; N-ary case - (let ((tails (%cdrs lists))) - (if (null? tails) ans - (lp tails (apply f (append! lists (list ans))))))) - - (let lp ((lis lis1) (ans zero)) - (if (null-list? lis) ans - (let ((tail (cdr lis))) ; Grab the cdr now, - (lp tail (f lis ans))))))) ; in case F SET-CDR!s LIS. - - -;;; REDUCE and REDUCE-RIGHT only use RIDENTITY in the empty-list case. -;;; These cannot meaningfully be n-ary. - -(define (reduce f ridentity lis) - (check-arg procedure? f reduce) - (if (null-list? lis) ridentity - (fold f (car lis) (cdr lis)))) - -(define (reduce-right f ridentity lis) - (check-arg procedure? f reduce-right) - (if (null-list? lis) ridentity - (let recur ((head (car lis)) (lis (cdr lis))) - (if (pair? lis) - (f head (recur (car lis) (cdr lis))) - head)))) - - - -;;; Mappers: append-map append-map! pair-for-each map! filter-map map-in-order -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (append-map f lis1 . lists) - (really-append-map append-map append f lis1 lists)) -(define (append-map! f lis1 . lists) - (really-append-map append-map! append! f lis1 lists)) - -(define (really-append-map who appender f lis1 lists) - (check-arg procedure? f who) - (if (pair? lists) - (receive (cars cdrs) (%cars+cdrs (cons lis1 lists)) - (if (null? cars) '() - (let recur ((cars cars) (cdrs cdrs)) - (let ((vals (apply f cars))) - (receive (cars2 cdrs2) (%cars+cdrs cdrs) - (if (null? cars2) vals - (appender vals (recur cars2 cdrs2)))))))) - - ;; Fast path - (if (null-list? lis1) '() - (let recur ((elt (car lis1)) (rest (cdr lis1))) - (let ((vals (f elt))) - (if (null-list? rest) vals - (appender vals (recur (car rest) (cdr rest))))))))) - - -(define (pair-for-each proc lis1 . lists) - (check-arg procedure? proc pair-for-each) - (if (pair? lists) - - (let lp ((lists (cons lis1 lists))) - (let ((tails (%cdrs lists))) - (if (pair? tails) - (begin (apply proc lists) - (lp tails))))) - - ;; Fast path. - (let lp ((lis lis1)) - (if (not (null-list? lis)) - (let ((tail (cdr lis))) ; Grab the cdr now, - (proc lis) ; in case PROC SET-CDR!s LIS. - (lp tail)))))) - -;;; We stop when LIS1 runs out, not when any list runs out. -(define (map! f lis1 . lists) - (check-arg procedure? f map!) - (if (pair? lists) - (let lp ((lis1 lis1) (lists lists)) - (if (not (null-list? lis1)) - (receive (heads tails) (%cars+cdrs/no-test lists) - (set-car! lis1 (apply f (car lis1) heads)) - (lp (cdr lis1) tails)))) - - ;; Fast path. - (pair-for-each (lambda (pair) (set-car! pair (f (car pair)))) lis1)) - lis1) - - -;;; Map F across L, and save up all the non-false results. -(define (filter-map f lis1 . lists) - (check-arg procedure? f filter-map) - (if (pair? lists) - (let recur ((lists (cons lis1 lists))) - (receive (cars cdrs) (%cars+cdrs lists) - (if (pair? cars) - (cond ((apply f cars) => (lambda (x) (cons x (recur cdrs)))) - (else (recur cdrs))) ; Tail call in this arm. - '()))) - - ;; Fast path. - (let recur ((lis lis1)) - (if (null-list? lis) lis - (let ((tail (recur (cdr lis)))) - (cond ((f (car lis)) => (lambda (x) (cons x tail))) - (else tail))))))) - - -;;; Map F across lists, guaranteeing to go left-to-right. -;;; NOTE: Some implementations of R5RS MAP are compliant with this spec; -;;; in which case this procedure may simply be defined as a synonym for MAP. - -(define (map-in-order f lis1 . lists) - (check-arg procedure? f map-in-order) - (if (pair? lists) - (let recur ((lists (cons lis1 lists))) - (receive (cars cdrs) (%cars+cdrs lists) - (if (pair? cars) - (let ((x (apply f cars))) ; Do head first, - (cons x (recur cdrs))) ; then tail. - '()))) - - ;; Fast path. - (let recur ((lis lis1)) - (if (null-list? lis) lis - (let ((tail (cdr lis)) - (x (f (car lis)))) ; Do head first, - (cons x (recur tail))))))) ; then tail. - - -;;; We extend MAP to handle arguments of unequal length. -(define map map-in-order) - - -;;; filter, remove, partition -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; FILTER, REMOVE, PARTITION and their destructive counterparts do not -;;; disorder the elements of their argument. - -;; This FILTER shares the longest tail of L that has no deleted elements. -;; If Scheme had multi-continuation calls, they could be made more efficient. - -(define (filter pred lis) ; Sleazing with EQ? makes this - (check-arg procedure? pred filter) ; one faster. - (let recur ((lis lis)) - (if (null-list? lis) lis ; Use NOT-PAIR? to handle dotted lists. - (let ((head (car lis)) - (tail (cdr lis))) - (if (pred head) - (let ((new-tail (recur tail))) ; Replicate the RECUR call so - (if (eq? tail new-tail) lis - (cons head new-tail))) - (recur tail)))))) ; this one can be a tail call. - - -;;; Another version that shares longest tail. -;(define (filter pred lis) -; (receive (ans no-del?) -; ;; (recur l) returns L with (pred x) values filtered. -; ;; It also returns a flag NO-DEL? if the returned value -; ;; is EQ? to L, i.e. if it didn't have to delete anything. -; (let recur ((l l)) -; (if (null-list? l) (values l #t) -; (let ((x (car l)) -; (tl (cdr l))) -; (if (pred x) -; (receive (ans no-del?) (recur tl) -; (if no-del? -; (values l #t) -; (values (cons x ans) #f))) -; (receive (ans no-del?) (recur tl) ; Delete X. -; (values ans #f)))))) -; ans)) - - - -;(define (filter! pred lis) ; Things are much simpler -; (let recur ((lis lis)) ; if you are willing to -; (if (pair? lis) ; push N stack frames & do N -; (cond ((pred (car lis)) ; SET-CDR! writes, where N is -; (set-cdr! lis (recur (cdr lis))); the length of the answer. -; lis) -; (else (recur (cdr lis)))) -; lis))) - - -;;; This implementation of FILTER! -;;; - doesn't cons, and uses no stack; -;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are -;;; usually expensive on modern machines, and can be extremely expensive on -;;; modern Schemes (e.g., ones that have generational GC's). -;;; It just zips down contiguous runs of in and out elts in LIS doing the -;;; minimal number of SET-CDR!s to splice the tail of one run of ins to the -;;; beginning of the next. - -(define (filter! pred lis) - (check-arg procedure? pred filter!) - (let lp ((ans lis)) - (cond ((null-list? ans) ans) ; Scan looking for - ((not (pred (car ans))) (lp (cdr ans))) ; first cons of result. - - ;; ANS is the eventual answer. - ;; SCAN-IN: (CDR PREV) = LIS and (CAR PREV) satisfies PRED. - ;; Scan over a contiguous segment of the list that - ;; satisfies PRED. - ;; SCAN-OUT: (CAR PREV) satisfies PRED. Scan over a contiguous - ;; segment of the list that *doesn't* satisfy PRED. - ;; When the segment ends, patch in a link from PREV - ;; to the start of the next good segment, and jump to - ;; SCAN-IN. - (else (letrec ((scan-in (lambda (prev lis) - (if (pair? lis) - (if (pred (car lis)) - (scan-in lis (cdr lis)) - (scan-out prev (cdr lis)))))) - (scan-out (lambda (prev lis) - (let lp ((lis lis)) - (if (pair? lis) - (if (pred (car lis)) - (begin (set-cdr! prev lis) - (scan-in lis (cdr lis))) - (lp (cdr lis))) - (set-cdr! prev lis)))))) - (scan-in ans (cdr ans)) - ans))))) - - - -;;; Answers share common tail with LIS where possible; -;;; the technique is slightly subtle. - -(define (partition pred lis) - (check-arg procedure? pred partition) - (let recur ((lis lis)) - (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle dotted lists. - (let ((elt (car lis)) - (tail (cdr lis))) - (receive (in out) (recur tail) - (if (pred elt) - (values (if (pair? out) (cons elt in) lis) out) - (values in (if (pair? in) (cons elt out) lis)))))))) - - - -;(define (partition! pred lis) ; Things are much simpler -; (let recur ((lis lis)) ; if you are willing to -; (if (null-list? lis) (values lis lis) ; push N stack frames & do N -; (let ((elt (car lis))) ; SET-CDR! writes, where N is -; (receive (in out) (recur (cdr lis)) ; the length of LIS. -; (cond ((pred elt) -; (set-cdr! lis in) -; (values lis out)) -; (else (set-cdr! lis out) -; (values in lis)))))))) - - -;;; This implementation of PARTITION! -;;; - doesn't cons, and uses no stack; -;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are -;;; usually expensive on modern machines, and can be extremely expensive on -;;; modern Schemes (e.g., ones that have generational GC's). -;;; It just zips down contiguous runs of in and out elts in LIS doing the -;;; minimal number of SET-CDR!s to splice these runs together into the result -;;; lists. - -(define (partition! pred lis) - (check-arg procedure? pred partition!) - (if (null-list? lis) (values lis lis) - - ;; This pair of loops zips down contiguous in & out runs of the - ;; list, splicing the runs together. The invariants are - ;; SCAN-IN: (cdr in-prev) = LIS. - ;; SCAN-OUT: (cdr out-prev) = LIS. - (letrec ((scan-in (lambda (in-prev out-prev lis) - (let lp ((in-prev in-prev) (lis lis)) - (if (pair? lis) - (if (pred (car lis)) - (lp lis (cdr lis)) - (begin (set-cdr! out-prev lis) - (scan-out in-prev lis (cdr lis)))) - (set-cdr! out-prev lis))))) ; Done. - - (scan-out (lambda (in-prev out-prev lis) - (let lp ((out-prev out-prev) (lis lis)) - (if (pair? lis) - (if (pred (car lis)) - (begin (set-cdr! in-prev lis) - (scan-in lis out-prev (cdr lis))) - (lp lis (cdr lis))) - (set-cdr! in-prev lis)))))) ; Done. - - ;; Crank up the scan&splice loops. - (if (pred (car lis)) - ;; LIS begins in-list. Search for out-list's first pair. - (let lp ((prev-l lis) (l (cdr lis))) - (cond ((not (pair? l)) (values lis l)) - ((pred (car l)) (lp l (cdr l))) - (else (scan-out prev-l l (cdr l)) - (values lis l)))) ; Done. - - ;; LIS begins out-list. Search for in-list's first pair. - (let lp ((prev-l lis) (l (cdr lis))) - (cond ((not (pair? l)) (values l lis)) - ((pred (car l)) - (scan-in l prev-l (cdr l)) - (values l lis)) ; Done. - (else (lp l (cdr l))))))))) - - -;;; Inline us, please. -(define (remove pred l) (filter (lambda (x) (not (pred x))) l)) -(define (remove! pred l) (filter! (lambda (x) (not (pred x))) l)) - - - -;;; Here's the taxonomy for the DELETE/ASSOC/MEMBER functions. -;;; (I don't actually think these are the world's most important -;;; functions -- the procedural FILTER/REMOVE/FIND/FIND-TAIL variants -;;; are far more general.) -;;; -;;; Function Action -;;; --------------------------------------------------------------------------- -;;; remove pred lis Delete by general predicate -;;; delete x lis [=] Delete by element comparison -;;; -;;; find pred lis Search by general predicate -;;; find-tail pred lis Search by general predicate -;;; member x lis [=] Search by element comparison -;;; -;;; assoc key lis [=] Search alist by key comparison -;;; alist-delete key alist [=] Alist-delete by key comparison - -(define (delete x lis . maybe-=) - (let ((= (:optional maybe-= equal?))) - (filter (lambda (y) (not (= x y))) lis))) - -(define (delete! x lis . maybe-=) - (let ((= (:optional maybe-= equal?))) - (filter! (lambda (y) (not (= x y))) lis))) - -;;; Extended from R4RS to take an optional comparison argument. -(define (member x lis . maybe-=) - (let ((= (:optional maybe-= equal?))) - (find-tail (lambda (y) (= x y)) lis))) - -;;; R4RS, hence we don't bother to define. -;;; The MEMBER and then FIND-TAIL call should definitely -;;; be inlined for MEMQ & MEMV. -;(define (memq x lis) (member x lis eq?)) -;(define (memv x lis) (member x lis eqv?)) - - -;;; right-duplicate deletion -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; delete-duplicates delete-duplicates! -;;; -;;; Beware -- these are N^2 algorithms. To efficiently remove duplicates -;;; in long lists, sort the list to bring duplicates together, then use a -;;; linear-time algorithm to kill the dups. Or use an algorithm based on -;;; element-marking. The former gives you O(n lg n), the latter is linear. - -(define (delete-duplicates lis . maybe-=) - (let ((elt= (:optional maybe-= equal?))) - (check-arg procedure? elt= delete-duplicates) - (let recur ((lis lis)) - (if (null-list? lis) lis - (let* ((x (car lis)) - (tail (cdr lis)) - (new-tail (recur (delete x tail elt=)))) - (if (eq? tail new-tail) lis (cons x new-tail))))))) - -(define (delete-duplicates! lis . maybe-=) - (let ((elt= (:optional maybe-= equal?))) - (check-arg procedure? elt= delete-duplicates!) - (let recur ((lis lis)) - (if (null-list? lis) lis - (let* ((x (car lis)) - (tail (cdr lis)) - (new-tail (recur (delete! x tail elt=)))) - (if (eq? tail new-tail) lis (cons x new-tail))))))) - - -;;; alist stuff -;;;;;;;;;;;;;;; - -;;; Extended from R4RS to take an optional comparison argument. -(define (assoc x lis . maybe-=) - (let ((= (:optional maybe-= equal?))) - (find (lambda (entry) (= x (car entry))) lis))) - -(define (alist-cons key datum alist) (cons (cons key datum) alist)) - -(define (alist-copy alist) - (map (lambda (elt) (cons (car elt) (cdr elt))) - alist)) - -(define (alist-delete key alist . maybe-=) - (let ((= (:optional maybe-= equal?))) - (filter (lambda (elt) (not (= key (car elt)))) alist))) - -(define (alist-delete! key alist . maybe-=) - (let ((= (:optional maybe-= equal?))) - (filter! (lambda (elt) (not (= key (car elt)))) alist))) - - -;;; find find-tail take-while drop-while span break any every list-index -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (find pred list) - (cond ((find-tail pred list) => car) - (else #f))) - -(define (find-tail pred list) - (check-arg procedure? pred find-tail) - (let lp ((list list)) - (and (not (null-list? list)) - (if (pred (car list)) list - (lp (cdr list)))))) - -(define (take-while pred lis) - (check-arg procedure? pred take-while) - (let recur ((lis lis)) - (if (null-list? lis) '() - (let ((x (car lis))) - (if (pred x) - (cons x (recur (cdr lis))) - '()))))) - -(define (drop-while pred lis) - (check-arg procedure? pred drop-while) - (let lp ((lis lis)) - (if (null-list? lis) '() - (if (pred (car lis)) - (lp (cdr lis)) - lis)))) - -(define (take-while! pred lis) - (check-arg procedure? pred take-while!) - (if (or (null-list? lis) (not (pred (car lis)))) '() - (begin (let lp ((prev lis) (rest (cdr lis))) - (if (pair? rest) - (let ((x (car rest))) - (if (pred x) (lp rest (cdr rest)) - (set-cdr! prev '()))))) - lis))) - -(define (span pred lis) - (check-arg procedure? pred span) - (let recur ((lis lis)) - (if (null-list? lis) (values '() '()) - (let ((x (car lis))) - (if (pred x) - (receive (prefix suffix) (recur (cdr lis)) - (values (cons x prefix) suffix)) - (values '() lis)))))) - -(define (span! pred lis) - (check-arg procedure? pred span!) - (if (or (null-list? lis) (not (pred (car lis)))) (values '() lis) - (let ((suffix (let lp ((prev lis) (rest (cdr lis))) - (if (null-list? rest) rest - (let ((x (car rest))) - (if (pred x) (lp rest (cdr rest)) - (begin (set-cdr! prev '()) - rest))))))) - (values lis suffix)))) - - -(define (break pred lis) (span (lambda (x) (not (pred x))) lis)) -(define (break! pred lis) (span! (lambda (x) (not (pred x))) lis)) - -(define (any pred lis1 . lists) - (check-arg procedure? pred any) - (if (pair? lists) - - ;; N-ary case - (receive (heads tails) (%cars+cdrs (cons lis1 lists)) - (and (pair? heads) - (let lp ((heads heads) (tails tails)) - (receive (next-heads next-tails) (%cars+cdrs tails) - (if (pair? next-heads) - (or (apply pred heads) (lp next-heads next-tails)) - (apply pred heads)))))) ; Last PRED app is tail call. - - ;; Fast path - (and (not (null-list? lis1)) - (let lp ((head (car lis1)) (tail (cdr lis1))) - (if (null-list? tail) - (pred head) ; Last PRED app is tail call. - (or (pred head) (lp (car tail) (cdr tail)))))))) - - -;(define (every pred list) ; Simple definition. -; (let lp ((list list)) ; Doesn't return the last PRED value. -; (or (not (pair? list)) -; (and (pred (car list)) -; (lp (cdr list)))))) - -(define (every pred lis1 . lists) - (check-arg procedure? pred every) - (if (pair? lists) - - ;; N-ary case - (receive (heads tails) (%cars+cdrs (cons lis1 lists)) - (or (not (pair? heads)) - (let lp ((heads heads) (tails tails)) - (receive (next-heads next-tails) (%cars+cdrs tails) - (if (pair? next-heads) - (and (apply pred heads) (lp next-heads next-tails)) - (apply pred heads)))))) ; Last PRED app is tail call. - - ;; Fast path - (or (null-list? lis1) - (let lp ((head (car lis1)) (tail (cdr lis1))) - (if (null-list? tail) - (pred head) ; Last PRED app is tail call. - (and (pred head) (lp (car tail) (cdr tail)))))))) - -(define (list-index pred lis1 . lists) - (check-arg procedure? pred list-index) - (if (pair? lists) - - ;; N-ary case - (let lp ((lists (cons lis1 lists)) (n 0)) - (receive (heads tails) (%cars+cdrs lists) - (and (pair? heads) - (if (apply pred heads) n - (lp tails (+ n 1)))))) - - ;; Fast path - (let lp ((lis lis1) (n 0)) - (and (not (null-list? lis)) - (if (pred (car lis)) n (lp (cdr lis) (+ n 1))))))) - -;;; Reverse -;;;;;;;;;;; - -;R4RS, so not defined here. -;(define (reverse lis) (fold cons '() lis)) - -;(define (reverse! lis) -; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis)) - -(define (reverse! lis) - (let lp ((lis lis) (ans '())) - (if (null-list? lis) ans - (let ((tail (cdr lis))) - (set-cdr! lis ans) - (lp tail lis))))) - -;;; Lists-as-sets -;;;;;;;;;;;;;;;;; - -;;; This is carefully tuned code; do not modify casually. -;;; - It is careful to share storage when possible; -;;; - Side-effecting code tries not to perform redundant writes. -;;; - It tries to avoid linear-time scans in special cases where constant-time -;;; computations can be performed. -;;; - It relies on similar properties from the other list-lib procs it calls. -;;; For example, it uses the fact that the implementations of MEMBER and -;;; FILTER in this source code share longest common tails between args -;;; and results to get structure sharing in the lset procedures. - -(define (%lset2<= = lis1 lis2) (every (lambda (x) (member x lis2 =)) lis1)) - -(define (lset<= = . lists) - (check-arg procedure? = lset<=) - (or (not (pair? lists)) ; 0-ary case - (let lp ((s1 (car lists)) (rest (cdr lists))) - (or (not (pair? rest)) - (let ((s2 (car rest)) (rest (cdr rest))) - (and (or (eq? s2 s1) ; Fast path - (%lset2<= = s1 s2)) ; Real test - (lp s2 rest))))))) - -(define (lset= = . lists) - (check-arg procedure? = lset=) - (or (not (pair? lists)) ; 0-ary case - (let lp ((s1 (car lists)) (rest (cdr lists))) - (or (not (pair? rest)) - (let ((s2 (car rest)) - (rest (cdr rest))) - (and (or (eq? s1 s2) ; Fast path - (and (%lset2<= = s1 s2) (%lset2<= = s2 s1))) ; Real test - (lp s2 rest))))))) - - -(define (lset-adjoin = lis . elts) - (check-arg procedure? = lset-adjoin) - (fold (lambda (elt ans) (if (member elt ans =) ans (cons elt ans))) - lis elts)) - - -(define (lset-union = . lists) - (check-arg procedure? = lset-union) - (reduce (lambda (lis ans) ; Compute ANS + LIS. - (cond ((null? lis) ans) ; Don't copy any lists - ((null? ans) lis) ; if we don't have to. - ((eq? lis ans) ans) - (else - (fold (lambda (elt ans) (if (any (lambda (x) (= x elt)) ans) - ans - (cons elt ans))) - ans lis)))) - '() lists)) - -(define (lset-union! = . lists) - (check-arg procedure? = lset-union!) - (reduce (lambda (lis ans) ; Splice new elts of LIS onto the front of ANS. - (cond ((null? lis) ans) ; Don't copy any lists - ((null? ans) lis) ; if we don't have to. - ((eq? lis ans) ans) - (else - (pair-fold (lambda (pair ans) - (let ((elt (car pair))) - (if (any (lambda (x) (= x elt)) ans) - ans - (begin (set-cdr! pair ans) pair)))) - ans lis)))) - '() lists)) - - -(define (lset-intersection = lis1 . lists) - (check-arg procedure? = lset-intersection) - (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. - (cond ((any null-list? lists) '()) ; Short cut - ((null? lists) lis1) ; Short cut - (else (filter (lambda (x) - (every (lambda (lis) (member x lis =)) lists)) - lis1))))) - -(define (lset-intersection! = lis1 . lists) - (check-arg procedure? = lset-intersection!) - (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. - (cond ((any null-list? lists) '()) ; Short cut - ((null? lists) lis1) ; Short cut - (else (filter! (lambda (x) - (every (lambda (lis) (member x lis =)) lists)) - lis1))))) - - -(define (lset-difference = lis1 . lists) - (check-arg procedure? = lset-difference) - (let ((lists (filter pair? lists))) ; Throw out empty lists. - (cond ((null? lists) lis1) ; Short cut - ((memq lis1 lists) '()) ; Short cut - (else (filter (lambda (x) - (every (lambda (lis) (not (member x lis =))) - lists)) - lis1))))) - -(define (lset-difference! = lis1 . lists) - (check-arg procedure? = lset-difference!) - (let ((lists (filter pair? lists))) ; Throw out empty lists. - (cond ((null? lists) lis1) ; Short cut - ((memq lis1 lists) '()) ; Short cut - (else (filter! (lambda (x) - (every (lambda (lis) (not (member x lis =))) - lists)) - lis1))))) - - -(define (lset-xor = . lists) - (check-arg procedure? = lset-xor) - (reduce (lambda (b a) ; Compute A xor B: - ;; Note that this code relies on the constant-time - ;; short-cuts provided by LSET-DIFF+INTERSECTION, - ;; LSET-DIFFERENCE & APPEND to provide constant-time short - ;; cuts for the cases A = (), B = (), and A eq? B. It takes - ;; a careful case analysis to see it, but it's carefully - ;; built in. - - ;; Compute a-b and a^b, then compute b-(a^b) and - ;; cons it onto the front of a-b. - (receive (a-b a-int-b) (lset-diff+intersection = a b) - (cond ((null? a-b) (lset-difference b a =)) - ((null? a-int-b) (append b a)) - (else (fold (lambda (xb ans) - (if (member xb a-int-b =) ans (cons xb ans))) - a-b - b))))) - '() lists)) - - -(define (lset-xor! = . lists) - (check-arg procedure? = lset-xor!) - (reduce (lambda (b a) ; Compute A xor B: - ;; Note that this code relies on the constant-time - ;; short-cuts provided by LSET-DIFF+INTERSECTION, - ;; LSET-DIFFERENCE & APPEND to provide constant-time short - ;; cuts for the cases A = (), B = (), and A eq? B. It takes - ;; a careful case analysis to see it, but it's carefully - ;; built in. - - ;; Compute a-b and a^b, then compute b-(a^b) and - ;; cons it onto the front of a-b. - (receive (a-b a-int-b) (lset-diff+intersection! = a b) - (cond ((null? a-b) (lset-difference! b a =)) - ((null? a-int-b) (append! b a)) - (else (pair-fold (lambda (b-pair ans) - (if (member (car b-pair) a-int-b =) ans - (begin (set-cdr! b-pair ans) b-pair))) - a-b - b))))) - '() lists)) - - -(define (lset-diff+intersection = lis1 . lists) - (check-arg procedure? = lset-diff+intersection) - (cond ((every null-list? lists) (values lis1 '())) ; Short cut - ((memq lis1 lists) (values '() lis1)) ; Short cut - (else (partition (lambda (elt) - (not (any (lambda (lis) (member elt lis =)) - lists))) - lis1)))) - -(define (lset-diff+intersection! = lis1 . lists) - (check-arg procedure? = lset-diff+intersection!) - (cond ((every null-list? lists) (values lis1 '())) ; Short cut - ((memq lis1 lists) (values '() lis1)) ; Short cut - (else (partition! (lambda (elt) - (not (any (lambda (lis) (member elt lis =)) - lists))) - lis1)))) diff --git a/scsh/lib/list-pack.scm b/scsh/lib/list-pack.scm deleted file mode 100644 index bafb5d5..0000000 --- a/scsh/lib/list-pack.scm +++ /dev/null @@ -1,249 +0,0 @@ -;;; This is a Scheme48 interface spec for the SRFI-1 list-lib package. -;;; It defines the LIST-LIB-INTERFACE interface and LIST-LIB structure. -;;; Bindings are typed as tightly as one can in Scheme48's type language. -;;; -Olin Shivers -;;; shivers@ai.mit.edu - -;;; list-lib -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; xcons cons* make-list list-tabulate list-copy circular-list iota -;;; proper-list? circular-list? dotted-list? not-pair? null-list? list= -;;; first second third fourth fifth sixth seventh eighth ninth tenth -;;; car+cdr -;;; take drop -;;; take-right drop-right -;;; take! drop-right! -;;; take-while drop-while take-while! -;;; split-at split-at! -;;; span break -;;; span! break! -;;; last last-pair -;;; length+ -;;; append! reverse! append-reverse append-reverse! concatenate concatenate! -;;; zip unzip1 unzip2 unzip3 unzip4 unzip5 -;;; count -;;; unfold unfold-right -;;; fold unfold pair-fold reduce -;;; fold-right unfold-right pair-fold-right reduce-right -;;; append-map append-map! map! pair-for-each filter-map map-in-order -;;; filter partition remove -;;; filter! partition! remove! -;;; find find-tail any every list-index -;;; delete delete! delete-duplicates delete-duplicates! -;;; alist-cons alist-copy -;;; alist-delete alist-delete! -;;; -;;; lset<= lset= lset-adjoin -;;; lset-union lset-union! -;;; lset-intersection lset-intersection! -;;; lset-difference lset-difference! -;;; lset-xor lset-xor! -;;; lset-diff+intersection lset-diff+intersection! -;;; -;;; map for-each member assoc (Extended R4RS procedures) -;;; -;;; cons pair? null? list length append reverse (These are the -;;; car cdr ... cdddar cddddr set-car! set-cdr! list-ref R4RS procedures -;;; memq memv assoc assq assv re-exported by -;;; list-lib unchanged.) - -(define-interface list-lib-interface - (export - ;; xcons - (xcons (proc (:value :value) :value)) - - ;; cons* item ... - (cons* (proc (:value &rest :value) :value)) - - ;; make-list len [fill] - (make-list (proc (:exact-integer &opt :value) :value)) - - ;; list-tabulate elt-proc len - (list-tabulate (proc (:exact-integer (proc (:exact-integer) :value)) :value)) - - ;; list-copy lis - (list-copy (proc (:value) :value)) - - (circular-list (proc (:value &rest :value) :pair)) - -; ((:iota iota:) -; (proc (:number &opt :number :number) :value)) - - (iota (proc (:exact-integer &opt :number :number) :value)) - - (proper-list? (proc (:value) :boolean)) - (dotted-list? (proc (:value) :boolean)) - (circular-list? (proc (:value) :boolean)) - - (not-pair? (proc (:value) :boolean)) - (null-list? (proc (:value) :boolean)) - - (list= (proc ((proc (:value :value) :boolean) &rest :value) :boolean)) - - ((first second third fourth fifth sixth seventh eighth ninth tenth) - (proc (:pair) :value)) - - (car+cdr (proc (:pair) (some-values :value :value))) - - ;; take lis i take-right lis i - ;; drop lis i drop-right lis i - ;; take! lis i drop-right! lis i - ((take drop take-right drop-right take! drop-right!) - (proc (:value :exact-integer) :value)) - - ((split-at split-at!) - (proc (:value :exact-integer) (some-values :value :value))) - - (last (proc (:pair) :value)) - (last-pair (proc (:pair) :pair)) - - (length+ (proc (:value) :value)) - (append! (proc (:value &rest :value) :value)) - (reverse! (proc (:value) :value)) - ((append-reverse append-reverse!) (proc (:value :value) :value)) - ((concatenate concatenate!) (proc (:value) :value)) - - (zip (proc (:value &rest :value) :value)) - (unzip1 (proc (:value) :value)) - (unzip2 (proc (:value) (some-values :value :value))) - (unzip3 (proc (:value) (some-values :value :value :value))) - (unzip4 (proc (:value) (some-values :value :value :value :value))) - (unzip5 (proc (:value) (some-values :value :value :value :value :value))) - - (count (proc ((proc (:value &rest :value) :boolean) :value) :exact-integer)) - - ((fold fold-right) - (proc ((proc (:value :value &rest :value) :value) - :value :value &rest :value) - :value)) - - ((unfold unfold-right) (proc ((proc (:value) :boolean) - (proc (:value) :value) - (proc (:value) :value) - :value - &opt (proc (:value) :value)) - :value)) - - ((pair-fold pair-fold-right) - (proc ((proc (:pair :value &rest :value) :value) - :value :value &rest :value) - :value)) - - ((reduce reduce-right) - (proc ((proc (:value :value) :value) :value :value) :value)) - - ((append-map append-map! map! filter-map map-in-order) - (proc ((proc (:value &rest :value) :value) :value &rest :value) :value)) - - (pair-for-each (proc ((proc (:pair &rest :pair) :values) :value &rest :value) - :unspecific)) - - ((filter filter! remove remove!) - (proc ((proc (:value) :boolean) :value) :value)) - - ((partition partition!) (proc ((proc (:value) :boolean) :value) - (some-values :value :value))) - - ((find find-tail) (proc ((proc (:value) :boolean) :value) :value)) - - ((take-while take-while! drop-while) - (proc ((proc (:value) :boolean) :value) :value)) - - ((span break span! break!) - (proc ((proc (:value) :boolean) :value) (some-values :value :value))) - - ((any every) - (proc ((proc (:value &rest :value) :value) :value &rest :value) :value)) - - (list-index (proc ((proc (:value &rest :value) :value) :value &rest :value) - :value)) - - ((delete delete!) - (proc (:value :value &opt (proc (:value :value) :boolean)) :value)) - - ;; Extended from their R5RS definitions to take an optional comparison - ;; function: (MEMBER x lis [=]). - (member (proc (:value :value &opt (proc (:value :value) :boolean)) :value)) - (assoc (proc (:value :value &opt (proc (:value :value) :boolean)) :value)) - - ((delete-duplicates delete-duplicates!) - (proc (:value &opt (proc (:value :value) :boolean)) :value)) - - (alist-cons (proc (:value :value :value) :value)) - (alist-copy (proc (:value) :value)) - ((alist-delete alist-delete!) - (proc (:value :value &opt (proc (:value :value) :value)) :value)) - - ;; Extended from their R4RS definitions. - (map (proc ((proc (:value &rest :value) :value) :value &rest :value) :value)) - (for-each (proc ((proc (:value &rest :value) :values) :value &rest :value) :unspecific)) - - ;; R4RS exports - (cons (proc (:value :value) :pair)) - ((pair? null?) (proc (:value) :boolean)) - (list-ref (proc (:value :exact-integer) :value)) - (list (proc (&rest :value) :value)) - (length (proc (:value) :exact-integer)) - (append (proc (&rest :value) :value)) - (reverse (proc (:value) :value)) - ((car cdr - caaaar caaadr caadar caaddr caaar caadr caar - cadaar cadadr caddar cadddr cadar caddr cadr - cdaaar cdaadr cdadar cdaddr cdaar cdadr cdar - cddaar cddadr cdddar cddddr cddar cdddr cddr) (proc (:value) :value)) - ((set-car! set-cdr!) (proc (:pair :value) :unspecific)) - ((memq memv) (proc (:value :value) :value)) - ((assq assv) (proc (:value :value) :value)) - - - ;; lset-lib - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; lset<= = list1 list2 ... - ;; lset= = list1 list2 ... - ;; lset-adjoin = list elt1 ... - ;; lset-union = list1 ... - ;; lset-intersection = list1 list2 ... - ;; lset-difference = list1 list2 ... - ;; lset-xor = list1 ... - ;; lset-diff+intersection = list1 list2 ... - ;; ... and their side effecting counterparts: - ;; lset-union! lset-intersection! lset-difference! lset-xor! - ;; lset-diff+intersection! - - ;; lset= = list1 ... -> boolean - ;; lset<= = list1 ... -> boolean - ((lset= lset<=) - (proc ((proc (:value :value) :boolean) &rest :value) :boolean)) - - ;; lset-adjoin = list elt1 ... - (lset-adjoin (proc ((proc (:value :value) :boolean) :value &rest :value) :value)) - - ;; lset-union = list1 ... lset-xor = list1 ... - ;; lset-union! = list1 ... lset-xor! = list1 ... - ((lset-union lset-xor) - (proc ((proc (:value :value) :boolean) &rest :value) :value)) - - ;; lset-intersection = list1 list2 ... - ;; lset-intersection! = list1 list2 ... - ;; lset-difference = list1 list2 ... - ;; lset-difference! = list1 list2 ... - ((lset-intersection lset-difference - lset-intersection! lset-difference!) - (proc ((proc (:value :value) :boolean) :value &rest :value) :value)) - - ;; lset-diff+intersection = list1 list2 ... - ;; lset-diff+intersection! = list1 list2 ... - ((lset-diff+intersection lset-diff+intersection!) - (proc ((proc (:value :value) :boolean) :value &rest :value) - (some-values :value :value))) - )) - -(define-structure list-lib list-lib-interface - (open error-package ; ERROR procedure - receiving ; RECEIVE m-v macro - let-opt ; LET-OPTIONALS and :OPTIONAL. - scheme) - (begin (define (check-arg pred val caller) - (let lp ((val val)) - (if (pred val) val (lp (error "Bad argument" val pred caller)))))) - (files list-lib)) diff --git a/scsh/lib/srfi-1.html b/scsh/lib/srfi-1.html deleted file mode 100644 index 18e6c39..0000000 --- a/scsh/lib/srfi-1.html +++ /dev/null @@ -1,3257 +0,0 @@ - - - - - - - - - - SRFI 1: List Library - - - - - - - - - - -

Title

-
-List Library -
- - -

Author

-

-Olin Shivers - -

- http://www.ai.mit.edu/~shivers/ / - shivers@ai.mit.edu -
- - -

Status

-

-This SRFI is currently in ``final status. To see an explanation of each status that a SRFI can hold, see here. -You can access the discussion via the archive of the mailing list. -

-

- - -

Table of contents

- - - - - - -

Abstract

-

-R5RS Scheme has an impoverished set of list-processing utilities, which is a -problem for authors of portable code. This SRFI proposes a coherent and -comprehensive set of list-processing procedures; it is accompanied by a -reference implementation of the spec. The reference implementation is -

- - -

Rationale

-

-The set of basic list and pair operations provided by R4RS/R5RS Scheme is far -from satisfactory. Because this set is so small and basic, most -implementations provide additional utilities, such as a list-filtering -function, or a "left fold" operator, and so forth. But, of course, this -introduces incompatibilities -- different Scheme implementations provide -different sets of procedures. - -

-I have designed a full-featured library of procedures for list processing. -While putting this library together, I checked as many Schemes as I could get -my hands on. (I have a fair amount of experience with several of these -already.) I missed Chez -- no on-line manual that I can find -- but I hit most -of the other big, full-featured Schemes. The complete list of list-processing -systems I checked is: -

- R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, - Common Lisp, Bigloo, guile, T, APL and the SML standard basis -
-

-As a result, the library I am proposing is fairly rich. -

-Following this initial design phase, this library went through several -months of discussion on the SRFI mailing lists, and was altered in light -of the ideas and suggestions put forth during this discussion. -

-In parallel with designing this API, I have also written a reference -implementation. I have placed this source on the Net with an unencumbered, -"open" copyright. A few notes about the reference implementation: - -

-

-In short, I've written the reference implementation to make it as painless -as possible for an implementor -- or a regular programmer -- to adopt this -library and get good results with it. - - - -

Procedure Index

-

-Here is a short list of the procedures provided by the list-lib package. -R5RS procedures are shown in -bold; -extended R5RS - procedures, in bold italic. -

-
-
Constructors -
-
-cons list
-xcons cons* make-list list-tabulate 
-list-copy circular-list iota
-
- -
Predicates -
-
-pair? null?
-proper-list? circular-list? dotted-list? 
-not-pair? null-list?
-list=
-
- -
Selectors -
-
-car cdr ... cddadr cddddr list-ref
-first second third fourth fifth sixth seventh eighth ninth tenth
-car+cdr
-take       drop
-take-right drop-right
-take!      drop-right! 
-split-at   split-at! 
-last last-pair
-
- -
Miscellaneous: length, append, concatenate, reverse, zip & count -
-
-length length+
-append  concatenate  reverse
-append! concatenate! reverse!
-append-reverse append-reverse!
-zip unzip1 unzip2 unzip3 unzip4 unzip5
-count
-
- -
Fold, unfold & map -
-
-map for-each
-fold       unfold       pair-fold       reduce 
-fold-right unfold-right pair-fold-right reduce-right 
-append-map append-map!
-map! pair-for-each filter-map map-in-order
-
- -
Filtering & partitioning -
-
-filter  partition  remove
-filter! partition! remove! 
-
- -
Searching -
-
-member memq memv
-find find-tail 
-any every
-list-index
-take-while drop-while take-while!
-span break span! break!
-
- -
Deleting -
-
-delete  delete-duplicates 
-delete! delete-duplicates!
-
- -
Association lists -
-
-assoc assq assv
-alist-cons alist-copy
-alist-delete alist-delete!
-
- -
Set operations on lists -
-
-lset<= lset= lset-adjoin
-lset-union			lset-union!
-lset-intersection		lset-intersection!
-lset-difference		        lset-difference!
-lset-xor			lset-xor!
-lset-diff+intersection	        lset-diff+intersection!
-
- -
Primitive side-effects -
-
-set-car! set-cdr!
-
-
-
- -

-Four R4RS/R5RS list-processing procedures are extended by this library in -backwards-compatible ways: -

- -
map for-each - (Extended to take lists of unequal length) -
member assoc - (Extended to take an optional comparison procedure.) -
-
- -

-The following R4RS/R5RS list- and pair-processing procedures are also part of -list-lib's exports, as defined by the R5RS: -

-
-cons pair? null?
-car cdr ... cdddar cddddr 
-set-car! set-cdr! 
-list append reverse
-length list-ref
-memq memv assq assv
-
-
- -

-The remaining two R4RS/R5RS list-processing -procedures are not part of -this library: -

- -
list-tail - (renamed drop) -
list? - (see proper-list?, - circular-list? and - dotted-list?) -
-
- - -

General discussion

-

- -A set of general criteria guided the design of this library. - -

- -I don't require "destructive" (what I call "linear update") procedures to -alter and recycle cons cells from the argument lists. They are allowed to, but -not required to. (And the reference implementations I have written do -recycle the argument lists.) - -

-List-filtering procedures such as filter or delete do not disorder -lists. Elements appear in the answer list in the same order as they appear in -the argument list. This constrains implementation, but seems like a desirable -feature, since in many uses of lists, order matters. (In particular, -disordering an alist is definitely a bad idea.) -

-Contrariwise, although the reference implementations of the list-filtering -procedures share longest common tails between argument and answer lists, -it not is part of the spec. -

-Because lists are an inherently sequential data structure (unlike, say, -vectors), list-inspection functions such as find, find-tail, for-each, any -and every commit to a left-to-right traversal order of their argument list. -

-However, constructor functions, such as list-tabulate and the mapping -procedures (append-map, append-map!, map!, pair-for-each, filter-map, -map-in-order), do not specify the dynamic order in which their procedural -argument is applied to its various values. -

-Predicates return useful true values wherever possible. Thus any must return -the true value produced by its predicate, and every returns the final true -value produced by applying its predicate argument to the last element of its -argument list. -

-Functionality is provided both in pure and linear-update (potentially -destructive) forms wherever this makes sense. -

-No special status accorded Scheme's built-in equality functions. -Any functionality provided in terms of eq?, eqv?, equal? is also -available using a client-provided equality function. -

-Proper design counts for more than backwards compatibility, but I have tried, -ceteris paribus, -to be as backwards-compatible as possible with existing -list-processing libraries, in order to facilitate porting old code to run as a -client of the procedures in this library. Name choices and semantics are, for -the most part, in agreement with existing practice in many current Scheme -systems. I have indicated some incompatibilities in the following text. -

-These procedures are not "sequence generic" -- i.e., procedures that -operate on either vectors and lists. They are list-specific. I prefer to -keep the library simple and focussed. -

-I have named these procedures without a qualifying initial "list-" lexeme, -which is in keeping with the existing set of list-processing utilities in -Scheme. -I follow the general Scheme convention (vector-length, string-ref) of -placing the type-name before the action when naming procedures -- so -we have list-copy and pair-for-each rather than the perhaps -more fluid, but less consistent, copy-list or for-each-pair. -

-I have generally followed a regular and consistent naming scheme, composing -procedure names from a set of basic lexemes. - - -

"Linear update" procedures

-

- -Many procedures in this library have "pure" and "linear update" variants. A -"pure" procedure has no side-effects, and in particular does not alter its -arguments in any way. A "linear update" procedure is allowed -- but not -required -- to side-effect its arguments in order to construct its -result. "Linear update" procedures are typically given names ending with an -exclamation point. So, for example, (append! list1 list2) is allowed to -construct its result by simply using set-cdr! to set the cdr of the last pair -of list1 to point to list2, and then returning list1 (unless list1 is the -empty list, in which case it would simply return list2). However, append! may -also elect to perform a pure append operation -- this is a legal definition -of append!: -

-(define append! append)
-
-

-This is why we do not call these procedures "destructive" -- because they -aren't required to be destructive. They are potentially destructive. -

-What this means is that you may only apply linear-update procedures to -values that you know are "dead" -- values that will never be used again -in your program. This must be so, since you can't rely on the value passed -to a linear-update procedure after that procedure has been called. It -might be unchanged; it might be altered. -

-The "linear" in "linear update" doesn't mean "linear time" or "linear space" -or any sort of multiple-of-n kind of meaning. It's a fancy term that -type theorists and pure functional programmers use to describe -systems where you are only allowed to have exactly one reference to each -variable. This provides a guarantee that the value bound to a variable is -bound to no other variable. So when you use a variable in a variable -reference, you "use it up." Knowing that no one else has a pointer to that -value means the a system primitive is free to side-effect its arguments to -produce what is, observationally, a pure-functional result. -

-In the context of this library, "linear update" means you, the programmer, -know there are no other live references to the value passed to the -procedure -- after passing the value to one of these procedures, the -value of the old pointer is indeterminate. Basically, you are licensing -the Scheme implementation to alter the data structure if it feels like -it -- you have declared you don't care either way. -

-You get no help from Scheme in checking that the values you claim are "linear" -really are. So you better get it right. Or play it safe and use the non-! -procedures -- it doesn't do any good to compute quickly if you get the wrong -answer. -

-Why go to all this trouble to define the notion of "linear update" and use it -in a procedure spec, instead of the more common notion of a "destructive" -operation? First, note that destructive list-processing procedures are almost -always used in a linear-update fashion. This is in part required by the -special case of operating upon the empty list, which can't be side-effected. -This means that destructive operators are not pure side-effects -- they have -to return a result. Second, note that code written using linear-update -operators can be trivially ported to a pure, functional subset of Scheme by -simply providing pure implementations of the linear-update operators. Finally, -requiring destructive side-effects ruins opportunities to parallelise these -operations -- and the places where one has taken the trouble to spell out -destructive operations are usually exactly the code one would want a -parallelising compiler to parallelise: the efficiency-critical kernels of the -algorithm. Linear-update operations are easily parallelised. Going with a -linear-update spec doesn't close off these valuable alternative implementation -techniques. This list library is intended as a set of low-level, basic -operators, so we don't want to exclude these possible implementations. -

-The linear-update procedures in this library are -

-take! drop-right! split-at! -append! concatenate! reverse! append-reverse! -append-map! map! -filter! partition! remove! -take-while! span! break! -delete! alist-delete! delete-duplicates! -lset-adjoin! lset-union! lset-intersection! -lset-difference! lset-xor! lset-diff+intersection! -
- - - -

Improper Lists

-

- -Scheme does not properly have a list type, just as C does not have a string -type. Rather, Scheme has a binary-tuple type, from which one can build binary -trees. There is an interpretation of Scheme values that allows one to -treat these trees as lists. Further complications ensue from the fact that -Scheme allows side-effects to these tuples, raising the possibility of lists -of unbounded length, and trees of unbounded depth (that is, circular data -structures). - -

-However, there is a simple view of the world of Scheme values that considers -every value to be a list of some sort. that is, every value is either -

-

-Note that the zero-length dotted lists are simply all the non-null, non-pair -values. - -

-This view is captured by the predicates proper-list?, dotted-list?, and -circular-list?. List-lib users should note that dotted lists are not commonly -used, and are considered by many Scheme programmers to be an ugly artifact of -Scheme's lack of a true list type. However, dotted lists do play a noticeable -role in the syntax of Scheme, in the "rest" parameters used by n-ary -lambdas: (lambda (x y . rest) ...). - -

-Dotted lists are not fully supported by list-lib. Most procedures are -defined only on proper lists -- that is, finite, nil-terminated lists. The -procedures that will also handle circular or dotted lists are specifically -marked. While this design decision restricts the domain of possible arguments -one can pass to these procedures, it has the benefit of allowing the -procedures to catch the error cases where programmers inadvertently pass -scalar values to a list procedure by accident, -e.g., by switching the arguments to a procedure call. - - -

Errors

-

- -Note that statements of the form "it is an error" merely mean "don't -do that." They are not a guarantee that a conforming implementation will -"catch" such improper use by, for example, raising some kind of exception. -Regrettably, R5RS Scheme requires no firmer guarantee even for basic operators such -as car and cdr, so there's little point in requiring these procedures to do -more. Here is the relevant section of the R5RS: -

-

- When speaking of an error situation, this report uses the phrase "an - error is signalled" to indicate that implementations must detect and - report the error. If such wording does not appear in the discussion - of an error, then implementations are not required to detect or - report the error, though they are encouraged to do so. An error - situation that implementations are not required to detect is usually - referred to simply as "an error." -

- For example, it is an error for a procedure to be passed an argument - that the procedure is not explicitly specified to handle, even though - such domain errors are seldom mentioned in this report. - Implementations may extend a procedure's domain of definition to - include such arguments. -

- - - -

Not included in this library

-

-The following items are not in this library: -

-

-They should have their own SRFI specs. -

- - - -

The procedures

-

- -In a Scheme system that has a module or package system, these procedures -should be contained in a module named "list-lib". - -The templates given below obey the following conventions for procedure formals: - - -
list - A proper (finite, nil-terminated) list -
clist - A proper or circular list -
flist - A finite (proper or dotted) list -
pair - A pair -
x, y, d, a - Any value -
object, value - Any value -
n, i - A natural number (an integer >= 0) -
proc - A procedure -
pred - A procedure whose return value is treated as a boolean -
= - A boolean procedure taking two arguments -
- -

-It is an error to pass a circular or dotted list to a procedure not -defined to accept such an argument. - - -

Constructors

-

- -

- - -
- -cons a d -> pair -
- [R5RS] - The primitive constructor. Returns a newly allocated pair whose car is - a and whose cdr is d. - The pair is guaranteed to be different (in the sense of eqv?) - from every existing object. -
-(cons 'a '())        => (a)
-(cons '(a) '(b c d)) => ((a) b c d)
-(cons "a" '(b c))    => ("a" b c)
-(cons 'a 3)          => (a . 3)
-(cons '(a b) 'c)     => ((a b) . c)
-
- - -
- -list object ... -> list -
- [R5RS] - Returns a newly allocated list of its arguments. -
-(list 'a (+ 3 4) 'c) =>  (a 7 c)
-(list)               =>  ()
-
- - -
- -xcons d a -> pair -
-
-(lambda (d a) (cons a d))
-
- Of utility only as a value to be conveniently passed to higher-order - procedures. - -
-(xcons '(b c) 'a) => (a b c)
-
- - The name stands for "eXchanged CONS." - - - -
cons* elt1 elt2 ... -> object -
- - Like list, - but the last argument provides the tail of the constructed list, - returning -
-(cons elt1 (cons elt2 (cons ... eltn))) -
- This function is called list* in Common Lisp and about - half of the Schemes that provide it, - and cons* in the other half. -
-(cons* 1 2 3 4) => (1 2 3 . 4)
-(cons* 1) => 1
-
- - - -
make-list n [fill] -> list -
- Returns an n-element list, - whose elements are all the value fill. - If the fill argument is not given, the elements of the list may - be arbitrary values. -
-(make-list 4 'c) => (c c c c)
-
- - - -
list-tabulate n init-proc -> list -
- Returns an n-element list. Element i of the list, where 0 <= i < n, - is produced by (init-proc i). No guarantee is made about the dynamic - order in which init-proc is applied to these indices. - -
-(list-tabulate 4 values) => (0 1 2 3)
-
- - - -
list-copy flist -> flist -
- Copies the spine of the argument. - - - -
circular-list elt1 elt2 ... -> list -
- Constructs a circular list of the elements. -
-(circular-list 'z 'q) => (z q z q z q ...)
-
- - - -
iota count [start step] -> list -
- Returns a list containing the elements -
-(start start+step ... start+(count-1)*step)
-
- The start and step parameters default to 0 and 1, respectively. - This procedure takes its name from the APL primitive. - -
-(iota 5) => (0 1 2 3 4)
-(iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4)
-
-
- - -

Predicates

-

-Note: the predicates proper-list?, circular-list?, and dotted-list? -partition the entire universe of Scheme values. - -

- -
-proper-list? x -> boolean - -
- Returns true iff x is a proper list -- a finite, nil-terminated list. -

- More carefully: The empty list is a proper list. A pair whose cdr is a - proper list is also a proper list: -

-<proper-list> ::= ()                            (Empty proper list)
-              |   (cons <x> <proper-list>)      (Proper-list pair)
-
- Note that this definition rules out circular lists. This - function is required to detect this case and return false. -

- Nil-terminated lists are called "proper" lists by R5RS and Common Lisp. - The opposite of proper is improper. -

- R5RS binds this function to the variable list?. -

-

-(not (proper-list? x)) = (or (dotted-list? x) (circular-list? x))
-
- - - -
circular-list? x -> boolean -
- True if x is a circular list. A circular list is a value such that - for every n >= 0, cdrn(x) is a pair. -

- Terminology: The opposite of circular is finite. -

-(not (circular-list? x)) = (or (proper-list? x) (dotted-list? x))
-
- - - -
dotted-list? x -> boolean -
- True if x is a finite, non-nil-terminated list. That is, there exists - an n >= 0 such that cdrn(x) is neither a pair nor (). - This includes - non-pair, non-() values (e.g. symbols, numbers), - which are considered to be dotted lists of length 0. -
-(not (dotted-list? x)) = (or (proper-list? x) (circular-list? x))
-
- - - -
pair? object -> boolean -
- [R5RS] - Returns #t if object is a pair; otherwise, #f. -
-(pair? '(a . b)) =>  #t
-(pair? '(a b c)) =>  #t
-(pair? '())      =>  #f
-(pair? '#(a b))  =>  #f
-(pair? 7)        =>  #f
-(pair? 'a)       =>  #f
-
- - - -
null? object -> boolean -
- [R5RS] - Returns #t if object is the empty list; otherwise, #f. - - - -
null-list? list -> boolean -
- List is a proper or circular list. This procedure returns true if - the argument is the empty list (), and false otherwise. It is an - error to pass this procedure a value which is not a proper or - circular list. - - This procedure is recommended as the termination condition for - list-processing procedures that are not defined on dotted lists. - - -
- -not-pair? x -> boolean -
-
(lambda (x) (not (pair? x)))
- Provided as a procedure as it can be useful as the termination condition - for list-processing procedures that wish to handle all finite lists, - both proper and dotted. - - -
- -list= elt= list1 ... -> boolean -
- Determines list equality, given an element-equality procedure. - Proper list A equals proper list B - if they are of the same length, - and their corresponding elements are equal, - as determined by elt=. - If the element-comparison procedure's first argument is - from listi, - then its second argument is from listi+1, - i.e. it is always called as - (elt= a b) - for a an element of list A, - and b an element of list B. -

- In the n-ary case, - every listi is compared to - listi+1 - (as opposed, for example, to comparing - list1 to every listi, - for i>1). - If there are no list arguments at all, - list= simply returns true. -

- It is an error to apply list= to anything except proper lists. - While - implementations may choose to extend it to circular lists, note that it - cannot reasonably be extended to dotted lists, as it provides no way to - specify an equality procedure for comparing the list terminators. -

- Note that the dynamic order in which the elt= procedure is - applied to pairs of elements is not specified. - For example, if list= is applied - to three lists, A, B, and C, - it may first completely compare A to B, - then compare B to C, - or it may compare the first elements of A and B, - then the first elements of B and C, - then the second elements of A and B, and so forth. -

- The equality procedure must be consistent with eq?. - That is, it must be the case that -

- (eq? x y) => (elt= x y). -
- Note that this implies that two lists which are eq? - are always list=, as well; implementations may exploit this - fact to "short-cut" the element-by-element comparisons. -
-(list= eq?) => #t       ; Trivial cases
-(list= eq? '(a)) => #t
-
- -
- - - -

Selectors

-
- - -
- - -car pair -> value -
cdr pair -> value -
- [R5RS] - These functions return the contents of the car and cdr field of their - argument, respectively. - Note that it is an error to apply them to the empty list. -
-(car '(a b c))     =>  a             (cdr '(a b c))     =>  (b c)  
-(car '((a) b c d)) =>  (a)	     (cdr '((a) b c d)) =>  (b c d)
-(car '(1 . 2))     =>  1	     (cdr '(1 . 2))     =>  2      
-(car '())          =>  *error*	     (cdr '())          =>  *error*
-
- - - - - - - - -
caar pair -> value -
cadr pair -> value -
: -
cdddar pair -> value -
cddddr pair -> value -
- [R5RS] - These procedures are compositions of car and cdr, - where for example caddr could be defined by -
    
-(define caddr (lambda (x) (car (cdr (cdr x))))).
-
- Arbitrary compositions, up to four deep, are provided. There are - twenty-eight of these procedures in all. - - - -
list-ref clist i -> value -
- [R5RS] - Returns the ith element of clist. - (This is the same as the car of - (drop clist i).) - It is an error if i >= n, - where n is the length of clist. -
    
-(list-ref '(a b c d) 2) => c
-
- - -
- -first   pair -> object -
- -second  pair -> object -
- -third   pair -> object -
- -fourth  pair -> object -
- -fifth   pair -> object -
- -sixth   pair -> object -
- -seventh pair -> object -
- -eighth  pair -> object -
- -ninth   pair -> object -
- -tenth   pair -> object -
- Synonyms for car, cadr, caddr, ... - -
-(third '(a b c d e)) => c
-
- - -
- -car+cdr pair -> [x y] -
- The fundamental pair deconstructor: -
-(lambda (p) (values (car p) (cdr p)))
-
- This can, of course, be implemented more efficiently by a compiler. - - -
- -take x i -> list -
- -drop x i -> object -
- take returns the first i elements of list x.
- drop returns all but the first i elements of list x. -
-(take '(a b c d e)  2) => (a b)
-(drop '(a b c d e)  2) => (c d e)
-
- x may be any value -- a proper, circular, or dotted list: -
-(take '(1 2 3 . d) 2) => (1 2)
-(drop '(1 2 3 . d) 2) => (3 . d)
-(take '(1 2 3 . d) 3) => (1 2 3)
-(drop '(1 2 3 . d) 3) => d
-
- For a legal i, take and drop partition the list in a manner which - can be inverted with append: -
-(append (take x i) (drop x i)) = x
-
- drop is exactly equivalent to performing i cdr operations on x; - the returned value shares a common tail with x. - - If the argument is a list of non-zero length, take is guaranteed to - return a freshly-allocated list, even in the case where the entire - list is taken, e.g. (take lis (length lis)). - - -
- -take-right flist i -> object -
- -drop-right flist i -> list -
- take-right returns the last i elements of flist.
- drop-right returns all but the last i elements of flist. -
-(take-right '(a b c d e) 2) => (d e)
-(drop-right '(a b c d e) 2) => (a b c)
-
- The returned list may share a common tail with the argument list. -

- flist may be any finite list, either proper or dotted: -

-(take-right '(1 2 3 . d) 2) => (2 3 . d)
-(drop-right '(1 2 3 . d) 2) => (1)
-(take-right '(1 2 3 . d) 0) => d
-(drop-right '(1 2 3 . d) 0) => (1 2 3)
-
- For a legal i, take-right and drop-right partition the list in a manner - which can be inverted with append: -
-(append (take flist i) (drop flist i)) = flist
-
- take-right's return value is guaranteed to share a common tail with flist. - - If the argument is a list of non-zero length, drop-right is guaranteed to - return a freshly-allocated list, even in the case where nothing is - dropped, e.g. (drop-right lis 0). - - -
- -take! x i -> list -
- -drop-right! flist i -> list -
- take! and drop-right! are "linear-update" variants of take and - drop-right: the procedure is allowed, but not required, to alter the - argument list to produce the result. -

- If x is circular, take! may return a shorter-than-expected list: -

-(take! (circular-list 1 3 5) 8) => (1 3)
-(take! (circular-list 1 3 5) 8) => (1 3 5 1 3 5 1 3)
-
- - - -
- -split-at  x i -> [list object] -
- -split-at! x i -> [list object] -
- split-at splits the list x - at index i, returning a list of the - first i elements, and the remaining tail. It is equivalent - to -
-(values (take x i) (drop x i))
-
- split-at! is the linear-update variant. It is allowed, but not - required, to alter the argument list to produce the result. -
-(split-at '(a b c d e f g h) 3) =>
-    (a b c)
-    (d e f g h)
-
- - - -
- -last pair -> object -
- -last-pair pair -> pair -
- last returns the last element of the non-empty, - finite list pair. - last-pair returns the last pair in the non-empty, - finite list pair. - -
-(last '(a b c)) => c
-(last-pair '(a b c)) => (c)
-
- -
- - -

Miscellaneous: length, append, concatenate, reverse, zip & count

- -
- -
- -length  list -> integer -
- -length+ clist -> integer or #f -
- Both length and length+ return the length of the argument. - It is an error to pass a value to length which is not a proper - list (finite and nil-terminated). In particular, this means an - implementation may diverge or signal an error when length is - applied to a circular list. -

- length+, on the other hand, returns #F when applied to a circular - list. -

- The length of a proper list is a non-negative integer n such that cdr - applied n times to the list produces the empty list. - - - -

- -append  list1 ... -> list -
- -append! list1 ... -> list -
- [R5RS] - append returns a list consisting of the elements - of list1 - followed by the elements of the other list parameters. -
-(append '(x) '(y))        =>  (x y)
-(append '(a) '(b c d))    =>  (a b c d)
-(append '(a (b)) '((c)))  =>  (a (b) (c))
-
- The resulting list is always newly allocated, except that it - shares structure with the final listi argument. - This last argument may be any value at all; - an improper list results if it is not - a proper list. All other arguments must be proper lists. -
-(append '(a b) '(c . d))  =>  (a b c . d)
-(append '() 'a)           =>  a
-(append '(x y))           =>  (x y)
-(append)                  =>  ()
-
- - append! is the "linear-update" variant of append - -- it is allowed, but not required, to alter cons cells in the argument - lists to construct the result list. - The last argument is never altered; the result - list shares structure with this parameter. - - -
- -concatenate  list-of-lists -> value -
- -concatenate! list-of-lists -> value -
- These functions append the elements of their argument together. - That is, concatenate returns -
-(apply append list-of-lists)
-
- or, equivalently, -
-(reduce-right append '() list-of-lists)
-
- - concatenate! is the linear-update variant, defined in - terms of append! instead of append. - -

- Note that some Scheme implementations do not support passing more than a - certain number (e.g., 64) of arguments to an n-ary procedure. - In these implementations, the (apply append ...) idiom - would fail when applied to long lists, - but concatenate would continue to function properly. - -

- As with append and append!, - the last element of the input list may be any value at all. - - -

- -reverse  list -> list -
- -reverse! list -> list -
- [R5RS] - - reverse returns a newly allocated list consisting of - the elements of list in reverse order. -
-(reverse '(a b c)) =>  (c b a)
-(reverse '(a (b c) d (e (f))))
-    =>  ((e (f)) d (b c) a)
-
- reverse! is the linear-update variant of reverse. - It is permitted, but not required, to alter the argument's cons cells - to produce the reversed list. - - - -
- -append-reverse  rev-head tail -> list -
- -append-reverse! rev-head tail -> list -
- append-reverse returns - (append (reverse rev-head) tail). - It is provided because it is a common operation -- a common - list-processing style calls for this exact operation to transfer values - accumulated in reverse order onto the front of another list, and because - the implementation is significantly more efficient than the simple - composition it replaces. (But note that this pattern of iterative - computation followed by a reverse can frequently be rewritten as a - recursion, dispensing with the reverse and append-reverse steps, and - shifting temporary, intermediate storage from the heap to the stack, - which is typically a win for reasons of cache locality and eager storage - reclamation.) -

- append-reverse! is just the linear-update variant -- it is allowed, but - not required, to alter rev-head's cons cells to construct the result. - - - -

zip clist1 clist2 ... -> list -
-
(lambda lists (apply map list lists))
-
- If zip is passed n lists, it returns a list as long as the shortest - of these lists, each element of which is an n-element list comprised - of the corresponding elements from the parameter lists. - -
-(zip '(one two three) 
-     '(1 2 3)
-     '(odd even odd even odd even odd even))
-    => ((one 1 odd) (two 2 even) (three 3 odd))
-
-(zip '(1 2 3)) => ((1) (2) (3))
-
- At least one of the argument lists must be finite: -
-(zip '(3 1 4 1) (circular-list #f #t)) 
-    => ((3 #f) (1 #t) (4 #f) (1 #t))
-
- - - -
unzip1 list -> list - -
unzip2 list -> [list list] - -
unzip3 list -> [list list list] - -
unzip4 list -> [list list list list] - -
unzip5 list -> [list list list list list] -
- unzip1 takes a list of lists, - where every list must contain at least one element, - and returns a list containing the initial element of each such list. - That is, it returns (map car lists). - unzip2 takes a list of lists, where every list must contain at least - two elements, and returns two values: a list of the first elements, - and a list of the second elements. unzip3 does the same for the first - three elements of the lists, and so forth. - -
-(unzip2 '((1 one) (2 two) (3 three))) =>
-    (1 2 3) 
-    (one two three)
-
- - -
- -count pred clist1 clist2 -> integer -
- pred is a procedure taking as many arguments as there - are lists and returning a single value. It is applied - element-wise to the elements of the lists, and a count is - tallied of the number of elements that produce a true value. This count - is returned. count is "iterative" in that it is guaranteed - to apply pred to the list elements in a - left-to-right order. - The counting stops when the shortest list expires. -
-(count even? '(3 1 4 1 5 9 2 5 6)) => 3
-(count < '(1 2 4 8) '(2 4 6 8 10 12 14 16)) => 3
-
- At least one of the argument lists must be finite: -
-(count < '(3 1 4 1) (circular-list 1 10)) => 2
-
- -
- - -

Fold, unfold & map

-
- -
- -fold kons knil clist1 clist2 ... -> value -
- The fundamental list iterator. -

- First, consider the single list-parameter case. If clist1 = (e1 e2 ... en), - then this procedure returns -

-(kons en ... (kons e2 (kons e1 knil)) ... ) -
- That is, it obeys the (tail) recursion -
-(fold kons knil lis) = (fold kons (kons (car lis) knil) (cdr lis))
-(fold kons knil '()) = knil
-
- - Examples: -
-(fold + 0 lis)			; Add up the elements of LIS.
-
-(fold cons '() lis)		; Reverse LIS.
-
-(fold cons tail rev-head)	; See APPEND-REVERSE.
-
-;; How many symbols in LIS?
-(fold (lambda (x count) (if (symbol? x) (+ count 1) count))
-      0
-      lis)
-
-;; Length of the longest string in LIS:
-(fold (lambda (s max-len) (max max-len (string-length s)))
-      0
-      lis)
-
- - If n list arguments are provided, then the kons function must take - n+1 parameters: one element from each list, and the "seed" or fold - state, which is initially knil. The fold operation terminates when - the shortest list runs out of values: -
-(fold cons* '() '(a b c) '(1 2 3 4 5)) => (c 3 b 2 a 1)
-
- At least one of the list arguments must be finite. - - -
- -fold-right kons knil clist1 clist2 ... -> value -
- The fundamental list recursion operator. -

- First, consider the single list-parameter case. If clist1 = (e1 e2 ... en), - then this procedure returns -

-(kons e1 (kons e2 ... (kons en knil))) -
- That is, it obeys the recursion -
-(fold-right kons knil lis) = (kons (car lis) (fold-right kons knil (cdr lis)))
-(fold-right kons knil '()) = knil
-
- - Examples: -
-(fold-right cons '() lis)		; Copy LIS.
-
-;; Filter the even numbers out of LIS.
-(fold-right (lambda (x l) (if (even? x) (cons x l) l)) '() lis))
-
- - If n list arguments are provided, then the kons function must take - n+1 parameters: one element from each list, and the "seed" or fold - state, which is initially knil. The fold operation terminates when - the shortest list runs out of values: -
-(fold-right cons* '() '(a b c) '(1 2 3 4 5)) => (a 1 b 2 c 3)
-
- At least one of the list arguments must be finite. - - -
- -pair-fold kons knil clist1 clist2 ... -> value -
- Analogous to fold, but kons is applied to successive sublists of the - lists, rather than successive elements -- that is, kons is applied to the - pairs making up the lists, giving this (tail) recursion: -
-(pair-fold kons knil lis) = (let ((tail (cdr lis)))
-                              (pair-fold kons (kons lis knil) tail))
-(pair-fold kons knil '()) = knil
-
- For finite lists, the kons function may reliably apply - set-cdr! to the pairs it is given - without altering the sequence of execution. -

- Example: -

-;;; Destructively reverse a list.
-(pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis))
-
- - At least one of the list arguments must be finite. - - - -
- -pair-fold-right kons knil clist1 clist2 ... -> value -
- Holds the same relationship with fold-right that pair-fold holds with fold. - Obeys the recursion -
-(pair-fold-right kons knil lis) = 
-    (kons lis (pair-fold-right kons knil (cdr lis)))
-(pair-fold-right kons knil '()) = knil
-
- - Example: -
-(pair-fold-right cons '() '(a b c)) => ((a b c) (b c) (c))
-
- - At least one of the list arguments must be finite. - - -
- -reduce f ridentity list -> value -
- reduce is a variant of fold. -

- ridentity should be a "right identity" of the procedure f -- that is, - for any value x acceptable to f, -

-(f x ridentity) = x
-
- - reduce has the following definition: -
-If list = (), return ridentity;
-Otherwise, return (fold f (car list) (cdr list)). -
- ...in other words, we compute - (fold f ridentity list). -

- Note that ridentity is used only in the empty-list case. - You typically use reduce when applying f is expensive and you'd - like to avoid the extra application incurred when fold applies - f to the head of list and the identity value, - redundantly producing the same value passed in to f. - For example, if f involves searching a file directory or - performing a database query, this can be significant. - In general, however, fold is useful in many contexts where reduce is not - (consider the examples given in the fold definition -- only one of the - five folds uses a function with a right identity. - The other four may not be performed with reduce). - -

- Note: MIT Scheme and Haskell flip F's arg order for their reduce and - fold functions. - -

-;; Take the max of a list of non-negative integers.
-(reduce max 0 nums) ; i.e., (apply max 0 nums)
-
- - -
- -reduce-right f ridentity list -> value -
- reduce-right is the fold-right variant of reduce. - It obeys the following definition: -
-(reduce-right f ridentity '()) = ridentity
-(reduce-right f ridentity '(e1)) = (f e1 ridentity) = e1
-(reduce-right f ridentity '(e1 e2 ...)) =
-    (f e1 (reduce f ridentity (e2 ...)))
-
- ...in other words, we compute - (fold-right f ridentity list). - -
-;; Append a bunch of lists together.
-;; I.e., (apply append list-of-lists)
-(reduce-right append '() list-of-lists)
-
- - -
- -unfold p f g seed [tail-gen] -> list -
-unfold is best described by its basic recursion: -
-(unfold p f g seed) = 
-    (if (p seed) (tail-gen seed)
-        (cons (f seed)
-              (unfold p f g (g seed))))
-
-
-
p
Determines when to stop unfolding. -
f
Maps each seed value to the corresponding list element. -
g
Maps each seed value to next seed value. -
seed
The "state" value for the unfold. -
tail-gen
Creates the tail of the list; - defaults to (lambda (x) '()) -
-

- In other words, we use g to generate a sequence of seed values -

-seed, g(seed), g2(seed), g3(seed), ... -
- These seed values are mapped to list elements by f, - producing the elements of the result list in a left-to-right order. - P says when to stop. - -

- unfold is the fundamental recursive list constructor, - just as fold-right is - the fundamental recursive list consumer. - While unfold may seem a bit abstract - to novice functional programmers, it can be used in a number of ways: - -

-;; List of squares: 1^2 ... 10^2
-(unfold (lambda (x) (> x 10))
-        (lambda (x) (* x x))
-	(lambda (x) (+ x 1))
-	1)
-		
-(unfold null-list? car cdr lis) ; Copy a proper list.
-
-;; Read current input port into a list of values.
-(unfold eof-object? values (lambda (x) (read)) (read))
-
-;; Copy a possibly non-proper list:
-(unfold not-pair? car cdr lis 
-              values)
-
-;; Append HEAD onto TAIL:
-(unfold null-list? car cdr head 
-              (lambda (x) tail))
-
- - Interested functional programmers may enjoy noting that - fold-right and unfold - are in some sense inverses. - That is, given operations knull?, kar, - kdr, kons, and knil satisfying -
-(kons (kar x) (kdr x)) = x - and -(knull? knil) = #t -
- then -
-(fold-right kons knil (unfold knull? kar kdr x)) = x -
- and -
-(unfold knull? kar kdr (fold-right kons knil x)) = x. -
- - This combinator sometimes is called an "anamorphism;" when an - explicit tail-gen procedure is supplied, it is called an - "apomorphism." - - - -
- -unfold-right p f g seed [tail] -> list -
- unfold-right constructs a list with the following loop: -
-(let lp ((seed seed) (lis tail))
-  (if (p seed) lis
-      (lp (g seed)
-          (cons (f seed) lis))))
-
-
-
p
Determines when to stop unfolding. -
f
Maps each seed value to the corresponding list element. -
g
Maps each seed value to next seed value. -
seed
The "state" value for the unfold. -
tail
list terminator; defaults to '(). -
-

- In other words, we use g to generate a sequence of seed values -

-seed, g(seed), g2(seed), g3(seed), ... -
- These seed values are mapped to list elements by f, - producing the elements of the result list in a right-to-left order. - P says when to stop. - -

- unfold-right is the fundamental iterative list constructor, - just as fold is the - fundamental iterative list consumer. - While unfold-right may seem a bit abstract - to novice functional programmers, it can be used in a number of ways: -

-;; List of squares: 1^2 ... 10^2
-(unfold-right zero? 
-              (lambda (x) (* x x))
-              (lambda (x) (- x 1))
-              10)
-	
-;; Reverse a proper list.
-(unfold-right null-list? car cdr lis)
-
-;; Read current input port into a list of values.
-(unfold-right eof-object? values (lambda (x) (read)) (read))
-
-;; (append-reverse rev-head tail)
-(unfold-right null-list? car cdr rev-head tail)
-
- - Interested functional programmers may enjoy noting that - fold and unfold-right - are in some sense inverses. - That is, given operations knull?, kar, - kdr, kons, and knil satisfying -
-(kons (kar x) (kdr x)) = x - and -(knull? knil) = #t -
- then -
-(fold kons knil (unfold-right knull? kar kdr x)) = x -
- and -
-(unfold-right knull? kar kdr (fold kons knil x)) = x. -
- - This combinator presumably has some pretentious mathematical name; - interested readers are invited to communicate it to the author. - - -
- -map proc clist1 clist2 ... -> list -
- [R5RS+] - - proc is a procedure taking as many arguments - as there are list arguments and returning a single value. - map applies proc element-wise to the elements - of the lists and returns a list of the results, - in order. - The dynamic order in which proc - is applied to the elements of the lists is unspecified. - -
-(map cadr '((a b) (d e) (g h))) =>  (b e h)
-
-(map (lambda (n) (expt n n))
-     '(1 2 3 4 5))
-    =>  (1 4 27 256 3125)
-
-(map + '(1 2 3) '(4 5 6)) =>  (5 7 9)
-
-(let ((count 0))
-  (map (lambda (ignored)
-         (set! count (+ count 1))
-         count)
-       '(a b))) =>  (1 2) or (2 1)
-
- - This procedure is extended from its - R5RS - specification to allow the arguments to be of unequal length; - it terminates when the shortest list runs out. -

- At least one of the argument lists must be finite: -

-(map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1)
-
- - -
- -for-each proc clist1 clist2 ... -> unspecified -
- [R5RS+] - - The arguments to for-each are like the arguments to - map, but - for-each calls proc for its side effects rather - than for its values. - Unlike map, for-each is guaranteed to call - proc on the elements of the lists in order from the first - element(s) to the last, - and the value returned by for-each is unspecified. -
-(let ((v (make-vector 5)))
-  (for-each (lambda (i)
-              (vector-set! v i (* i i)))
-            '(0 1 2 3 4))
-  v)  =>  #(0 1 4 9 16)
-
- - This procedure is extended from its - R5RS - specification to allow the arguments to be of unequal length; - it terminates when the shortest list runs out. -

- At least one of the argument lists must be finite. - - -

- -append-map  f clist1 clist2 ... -> value -
- -append-map! f clist1 clist2 ... -> value -
- Equivalent to -
-(apply append (map f clist1 clist2 ...)) -
- and -
-(apply append! (map f clist1 clist2 ...)) -
- - Map f over the elements of the lists, just as in the map function. - However, the results of the applications are appended together to - make the final result. append-map uses append to append the results - together; append-map! uses append!. -

- The dynamic order in which the various applications of f are made is - not specified. -

- Example: -

-(append-map! (lambda (x) (list x (- x))) '(1 3 8))
-    => (1 -1 3 -3 8 -8)
-
- - At least one of the list arguments must be finite. - - -
- -map! f list1 clist2 ... -> list -
- Linear-update variant of map -- map! is allowed, but not required, to - alter the cons cells of list1 to construct the result list. -

- The dynamic order in which the various applications of f are made is - not specified. - - In the n-ary case, clist2, clist3, ... must have at least as many - elements as list1. - - -

- -map-in-order f clist1 clist2 ... -> list -
- A variant of the map procedure that guarantees to apply f across - the elements of the listi arguments in a left-to-right order. This - is useful for mapping procedures that both have side effects and - return useful values. -

- At least one of the list arguments must be finite. - - -

- -pair-for-each f clist1 clist2 ... -> unspecific -
- Like for-each, but f is applied to successive sublists of the argument - lists. That is, f is applied to the cons cells of the lists, rather - than the lists' elements. These applications occur in left-to-right - order. -

- The f procedure may reliably apply set-cdr! to the pairs it is given - without altering the sequence of execution. - -

-(pair-for-each (lambda (pair) (display pair) (newline)) '(a b c)) ==>
-    (a b c)
-    (b c)
-    (c)
-
- - At least one of the list arguments must be finite. - - -
- -filter-map f clist1 clist2 ... -> list -
- Like map, but only true values are saved. -
-(filter-map (lambda (x) (and (number? x) (* x x))) '(a 1 b 3 c 7))
-    => (1 9 49)
-
- The dynamic order in which the various applications of f are made is - not specified. -

- At least one of the list arguments must be finite. -

- - -

Filtering & partitioning

-
- - -
- -filter pred list -> list -
- Return all the elements of list that satisfy predicate pred. - The list is not disordered -- elements that appear in the result list - occur in the same order as they occur in the argument list. - The returned list may share a common tail with the argument list. - The dynamic order in which the various applications of pred are made is - not specified. - -
-(filter even? '(0 7 8 8 43 -4)) => (0 8 8 -4)
-
- - -
- -partition pred list -> [list list] -
- Partitions the elements of list with predicate pred, and returns two - values: the list of in-elements and the list of out-elements. - The list is not disordered -- elements occur in the result lists - in the same order as they occur in the argument list. - The dynamic order in which the various applications of pred are made is - not specified. One of the returned lists may share a common tail with the - argument list. - -
-(partition symbol? '(one 2 3 four five 6)) => 
-    (one four five)
-    (2 3 6)
-
- - -
- -remove pred list -> list -
- Returns list without the elements that satisfy predicate pred: -
-(lambda (pred list) (filter (lambda (x) (not (pred x))) list))
-
- The list is not disordered -- elements that appear in the result list - occur in the same order as they occur in the argument list. - The returned list may share a common tail with the argument list. - The dynamic order in which the various applications of pred are made is - not specified. - -
-(remove even? '(0 7 8 8 43 -4)) => (7 43)
-
- - -
- -filter!    pred list -> list -
- -partition! pred list -> [list list] -
- -remove!    pred list -> list -
- Linear-update variants of filter, partition and remove. - These procedures are allowed, but not required, to alter the cons cells - in the argument list to construct the result lists. - -
- - -

Searching

-

- -The following procedures all search lists for a leftmost element satisfying -some criteria. This means they do not always examine the entire list; thus, -there is no efficient way for them to reliably detect and signal an error when -passed a dotted or circular list. Here are the general rules describing how -these procedures work when applied to different kinds of lists: - -

-
Proper lists: -
The standard, canonical behavior happens in this case. - -
Dotted lists: -
It is an error to pass these procedures a dotted list - that does not contain an element satisfying the search - criteria. That is, it is an error if the procedure has - to search all the way to the end of the dotted list. - However, this SRFI does not specify anything at all - about the behavior of these procedures when passed a - dotted list containing an element satisfying the search - criteria. It may finish successfully, signal an error, - or perform some third action. Different implementations - may provide different functionality in this case; code - which is compliant with this SRFI may not rely on any - particular behavior. Future SRFI's may refine SRFI-1 - to define specific behavior in this case. -

- In brief, SRFI-1 compliant code may not pass a dotted - list argument to these procedures. - -

Circular lists: -
It is an error to pass these procedures a circular list - that does not contain an element satisfying the search - criteria. Note that the procedure is not required to - detect this case; it may simply diverge. It is, however, - acceptable to search a circular list if the search is - successful -- that is, if the list contains an element - satisfying the search criteria. -
-

-Here are some examples, using the find and any procedures as canonical -representatives: -

-;; Proper list -- success
-(find even? '(1 2 3))	=> 2
-(any  even? '(1 2 3))	=> #t
-
-;; proper list -- failure
-(find even? '(1 7 3))	=> #f
-(any  even? '(1 7 3))	=> #f
-
-;; Failure is error on a dotted list.
-(find even? '(1 3 . x))	=> error
-(any  even? '(1 3 . x))	=> error
-
-;; The dotted list contains an element satisfying the search.
-;; This case is not specified -- it could be success, an error, 
-;; or some third possibility.
-(find even? '(1 2 . x))	=> error/undefined
-(any  even? '(1 2 . x))	=> error/undefined ; success, error or other.
-
-;; circular list -- success
-(find even? (circular-list 1 6 3)) => 6
-(any  even? (circular-list 1 6 3)) => #t
-
-;; circular list -- failure is error. Procedure may diverge.
-(find even? (circular-list 1 3)) => error
-(any  even? (circular-list 1 3)) => error
-
- -
- -
- -find pred clist -> value -
- Return the first element of clist that satisfies predicate pred; - false if no element does. - -
-(find even? '(3 1 4 1 5 9)) => 4
-
- - Note that find has an ambiguity in its lookup semantics -- if find - returns #f, you cannot tell (in general) if it found a #f element - that satisfied pred, or if it did not find any element at all. In - many situations, this ambiguity cannot arise -- either the list being - searched is known not to contain any #f elements, or the list is - guaranteed to have an element satisfying pred. However, in cases - where this ambiguity can arise, you should use find-tail instead of - find -- find-tail has no such ambiguity: -
-(cond ((find-tail pred lis) => (lambda (pair) ...)) ; Handle (CAR PAIR)
-      (else ...)) ; Search failed.
-
- - -
- -find-tail pred clist -> pair or false -
- Return the first pair of clist whose car satisfies pred. If no pair does, - return false. -

- find-tail can be viewed as a general-predicate variant of the member - function. -

- Examples: -

-(find-tail even? '(3 1 37 -8 -5 0 0)) => (-8 -5 0 0)
-(find-tail even? '(3 1 37 -5)) => #f
-
-;; MEMBER X LIS:
-(find-tail (lambda (elt) (equal? x elt)) lis)
-
- - In the circular-list case, this procedure "rotates" the list. - -

- Find-tail is essentially drop-while, - where the sense of the predicate is inverted: - Find-tail searches until it finds an element satisfying - the predicate; drop-while searches until it finds an - element that doesn't satisfy the predicate. - - -

- -take-while  pred clist -> list -
- -take-while! pred clist -> list -
- -Returns the longest initial prefix of clist whose elements all -satisfy the predicate pred. - -

-Take-while! is the linear-update variant. It is allowed, but not -required, to alter the argument list to produce the result. - -

-(take-while even? '(2 18 3 10 22 9)) => (2 18)
-
- - -
- -drop-while pred clist -> list -
-Drops the longest initial prefix of clist whose elements all -satisfy the predicate pred, and returns the rest of the list. - -
-(drop-while even? '(2 18 3 10 22 9)) => (3 10 22 9)
-
-The circular-list case may be viewed as "rotating" the list. - - - -
- -span   pred clist -> [list clist] -
- -span!  pred list  -> [list list] -
- -break  pred clist -> [list clist] -
- -break! pred list  -> [list list] -
- -Span splits the list into the longest initial prefix whose -elements all satisfy pred, and the remaining tail. -Break inverts the sense of the predicate: -the tail commences with the first element of the input list -that satisfies the predicate. - -

-In other words: -span finds the intial span of elements -satisfying pred, -and break breaks the list at the first element satisfying -pred. - -

-Span is equivalent to -

-(values (take-while pred clist) 
-        (drop-while pred clist))
-
- -

-Span! and break! are the linear-update variants. -They are allowed, but not required, -to alter the argument list to produce the result. - -

-(span even? '(2 18 3 10 22 9)) =>
-  (2 18)
-  (3 10 22 9)
-
-(break even? '(3 1 4 1 5 9)) =>
-  (3 1)
-  (4 1 5 9)
-
- - - -
- -any pred clist1 clist2 ... -> value -
- Applies the predicate across the lists, returning true if the predicate - returns true on any application. -

- If there are n list arguments clist1 ... clistn, then pred must be a - procedure taking n arguments and returning a boolean result. -

- any applies pred to the first elements of the clisti parameters. - If this application returns a true value, any immediately returns - that value. Otherwise, it iterates, applying pred to the second - elements of the clisti parameters, then the third, and so forth. - The iteration stops when a true value is produced or one of the lists runs - out of values; in - the latter case, any returns #f. - The application of pred to the last element of the - lists is a tail call. -

- Note the difference between find and any -- find returns the element - that satisfied the predicate; any returns the true value that the - predicate produced. -

- Like every, any's name does not end with a question mark -- this is to - indicate that it does not return a simple boolean (#t or #f), but a - general value. - -

-(any integer? '(a 3 b 2.7))   => #t
-(any integer? '(a 3.1 b 2.7)) => #f
-(any < '(3 1 4 1 5)
-       '(2 7 1 8 2)) => #t
-
- - -
- -every pred clist1 clist2 ... -> value -
- Applies the predicate across the lists, returning true if the predicate - returns true on every application. -

- If there are n list arguments clist1 ... clistn, then pred must be a - procedure taking n arguments and returning a boolean result. -

- every applies pred to the first elements of the clisti parameters. - If this application returns false, every immediately returns false. - Otherwise, it iterates, applying pred to the second elements of the - clisti parameters, then the third, and so forth. The iteration stops - when a false value is produced or one of the lists runs out of values. - In the latter case, every returns - the true value produced by its final application of pred. - The application of pred to the last element of the lists - is a tail call. -

- If one of the clisti has no elements, every simply returns #t. -

- Like any, every's name does not end with a question mark -- this is to - indicate that it does not return a simple boolean (#t or #f), but a - general value. - - -

- -list-index pred clist1 clist2 ... -> integer or false -
- Return the index of the leftmost element that satisfies pred. -

- If there are n list arguments clist1 ... clistn, then pred must be a - function taking n arguments and returning a boolean result. -

- list-index applies pred to the first elements of the clisti parameters. - If this application returns true, list-index immediately returns zero. - Otherwise, it iterates, applying pred to the second elements of the - clisti parameters, then the third, and so forth. When it finds a tuple of - list elements that cause pred to return true, it stops and returns the - zero-based index of that position in the lists. -

- The iteration stops when one of the lists runs out of values; in this - case, list-index returns #f. - -

-(list-index even? '(3 1 4 1 5 9)) => 2
-(list-index < '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => 1
-(list-index = '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => #f
-
- - -
- -member x list [=] -> list -
- -memq x list -> list -
- -memv x list -> list -
- [R5RS+] - - These procedures return the first sublist of list whose car is - x, where the sublists of list are the - non-empty lists returned by - (drop list i) - for i less than the length of list. - If x does - not occur in list, then #f is returned. - memq uses eq? to compare x - with the elements of list, - while memv uses eqv?, and - member uses equal?. - -
-    (memq 'a '(a b c))          =>  (a b c)
-    (memq 'b '(a b c))          =>  (b c)
-    (memq 'a '(b c d))          =>  #f
-    (memq (list 'a) '(b (a) c)) =>  #f
-    (member (list 'a)
-            '(b (a) c))         =>  ((a) c)
-    (memq 101 '(100 101 102))   =>  *unspecified*
-    (memv 101 '(100 101 102))   =>  (101 102)
-
- - member is extended from its - R5RS - definition to allow the client to pass in - an optional equality procedure = used to compare keys. - -

- The comparison procedure is used to compare the elements ei of list - to the key x in this way: -

-(= x ei) ; list is (E1 ... En) -
- That is, the first argument is always x, and the second argument is - one of the list elements. Thus one can reliably find the first element - of list that is greater than five with - (member 5 list <) - -

- Note that fully general list searching may be performed with - the find-tail and find procedures, e.g. -

-(find-tail even? list) ; Find the first elt with an even key.
-
- -
- - -

Deletion

-

- -

- -
- -delete  x list [=] -> list -
- -delete! x list [=] -> list -
- delete uses the comparison procedure =, which defaults to equal?, to find - all elements of list that are equal to x, and deletes them from list. The - dynamic order in which the various applications of = are made is not - specified. - -

- The list is not disordered -- elements that appear in the result list - occur in the same order as they occur in the argument list. - The result may share a common tail with the argument list. - -

- Note that fully general element deletion can be performed with the remove - and remove! procedures, e.g.: -

-;; Delete all the even elements from LIS:
-(remove even? lis)
-
- - The comparison procedure is used in this way: - (= x ei). - That is, x is always the first argument, - and a list element is always the - second argument. The comparison procedure will be used to compare each - element of list exactly once; the order in which it is applied to the - various ei is not specified. Thus, one can reliably remove all the - numbers greater than five from a list with - (delete 5 list <) - -

- delete! is the linear-update variant of delete. - It is allowed, but not required, to alter the cons cells in - its argument list to construct the result. - - -

- -delete-duplicates  list [=] -> list -
- -delete-duplicates! list [=] -> list -
- delete-duplicates removes duplicate elements from the - list argument. - If there are multiple equal elements in the argument list, the result list - only contains the first or leftmost of these elements in the result. - The order of these surviving elements is the same as in the original - list -- delete-duplicates does not disorder the list (hence it is useful - for "cleaning up" association lists). -

- The = parameter is used to compare the elements of the list; it defaults - to equal?. If x comes before y in list, then the comparison is performed - (= x y). - The comparison procedure will be used to compare each pair of elements in - list no more than once; - the order in which it is applied to the various pairs is not specified. -

- Implementations of delete-duplicates - are allowed to share common tails - between argument and result lists -- for example, if the list argument - contains only unique elements, it may simply return exactly - this list. -

- Be aware that, in general, delete-duplicates - runs in time O(n2) for n-element lists. - Uniquifying long lists can be accomplished in O(n lg n) time by sorting - the list to bring equal elements together, then using a linear-time - algorithm to remove equal elements. Alternatively, one can use algorithms - based on element-marking, with linear-time results. - -

- delete-duplicates! is the linear-update variant of delete-duplicates; it - is allowed, but not required, to alter the cons cells in its argument - list to construct the result. -

-(delete-duplicates '(a b a c a b c z)) => (a b c z)
-
-;; Clean up an alist:
-(delete-duplicates '((a . 3) (b . 7) (a . 9) (c . 1))
-                   (lambda (x y) (eq? (car x) (car y))))
-    => ((a . 3) (b . 7) (c . 1))
-
-
- - -

Association lists

-

-An "association list" (or "alist") is a list of pairs. The car of each pair -contains a key value, and the cdr contains the associated data value. They can -be used to construct simple look-up tables in Scheme. Note that association -lists are probably inappropriate for performance-critical use on large data; -in these cases, hash tables or some other alternative should be employed. - -

- -
- -assoc key alist [=] -> pair or #f -
- -assq key alist -> pair or #f -
- -assv key alist -> pair or #f -
- - [R5RS+] - alist must be an association list -- a list of pairs. - These procedures - find the first pair in alist whose car field is key, - and returns that pair. - If no pair in alist has key as its car, - then #f is returned. - assq uses eq? to compare key - with the car fields of the pairs in alist, - while assv uses eqv? - and assoc uses equal?. -
-(define e '((a 1) (b 2) (c 3)))
-(assq 'a e)                            =>  (a 1)
-(assq 'b e)                            =>  (b 2)
-(assq 'd e)                            =>  #f
-(assq (list 'a) '(((a)) ((b)) ((c))))  =>  #f
-(assoc (list 'a) '(((a)) ((b)) ((c)))) =>  ((a))
-(assq 5 '((2 3) (5 7) (11 13)))	   =>  *unspecified*
-(assv 5 '((2 3) (5 7) (11 13)))	   =>  (5 7)
-
- - assoc is extended from its - R5RS - definition to allow the client to pass in - an optional equality procedure = used to compare keys. - -

- The comparison procedure is used to compare the elements ei of list - to the key parameter in this way: -

-(= key (car ei)) ; list is (E1 ... En) -
- That is, the first argument is always key, - and the second argument is one of the list elements. - Thus one can reliably find the first entry - of alist whose key is greater than five with - (assoc 5 alist <) - -

- Note that fully general alist searching may be performed with - the find-tail and find procedures, e.g. -

-;; Look up the first association in alist with an even key:
-(find (lambda (a) (even? (car a))) alist)
-
- - - -
- -alist-cons key datum alist -> alist -
-
-(lambda (key datum alist) (cons (cons key datum) alist))
-
- Cons a new alist entry mapping key -> datum onto alist. - - -
- -alist-copy alist -> alist -
- Make a fresh copy of alist. This means copying each pair that - forms an association as well as the spine of the list, i.e. -
-(lambda (a) (map (lambda (elt) (cons (car elt) (cdr elt))) a))
-
- - -
- -alist-delete  key alist [=] -> alist -
- -alist-delete! key alist [=] -> alist -
- alist-delete deletes all associations from alist with the given key, - using key-comparison procedure =, which defaults to equal?. - The dynamic order in which the various applications of = are made is not - specified. -

- Return values may share common tails with the alist argument. - The alist is not disordered -- elements that appear in the result alist - occur in the same order as they occur in the argument alist. -

- The comparison procedure is used to compare the element keys ki of alist's - entries to the key parameter in this way: - (= key ki). - Thus, one can reliably remove all entries of alist whose key is greater - than five with - (alist-delete 5 alist <) -

- alist-delete! is the linear-update variant of alist-delete. - It is allowed, but not required, - to alter cons cells from the alist parameter to construct the result. - -

- - - -

Set operations on lists

-

-These procedures implement operations on sets represented as lists of elements. -They all take an = argument used to compare elements of lists. -This equality procedure is required to be consistent with eq?. -That is, it must be the case that -

- (eq? x y) => (= x y). -
-

-Note that this implies, in turn, that two lists that are eq? are -also set-equal by any legal comparison procedure. This allows for -constant-time determination of set operations on eq? lists. - -

-Be aware that these procedures typically run in time -O(n * m) -for n- and m-element list arguments. -Performance-critical applications -operating upon large sets will probably wish to use other data -structures and algorithms. - -

- -
- -lset<= = list1 ... -> boolean -
- Returns true iff every listi is a subset of listi+1, using = for - the element-equality procedure. - List A is a subset of list B if every - element in A is equal to some element of B. - When performing an element comparison, - the = procedure's first argument is an element - of A; its second, an element of B. -
-(lset<= eq? '(a) '(a b a) '(a b c c)) => #t
-
-(lset<= eq?) => #t             ; Trivial cases
-(lset<= eq? '(a)) => #t
-
- - -
- -lset= = list1 list2 ... -> boolean -
- Returns true iff every listi is set-equal to listi+1, using = for - the element-equality procedure. "Set-equal" simply means that - listi is a subset of listi+1, and listi+1 is a subset of listi. - The = procedure's first argument is an element of listi; its second is an element of - listi+1. -
-(lset= eq? '(b e a) '(a e b) '(e e b a)) => #t
-
-(lset= eq?) => #t               ; Trivial cases
-(lset= eq? '(a)) => #t
-
- - -
- -lset-adjoin = list elt1 ... -> list -
- Adds the elti elements not already in the list parameter to the - result list. The result shares a common tail with the list parameter. - The new elements are added to the front of the list, but no guarantees - are made about their order. The = parameter is an equality procedure - used to determine if an elti is already a member of list. Its first - argument is an element of list; its second is one of the elti. -

- The list parameter is always a suffix of the result -- even if the list - parameter contains repeated elements, these are not reduced. -

-(lset-adjoin eq? '(a b c d c e) 'a 'e 'i 'o 'u) => (u o i a b c d c e)
-
- - -
- -lset-union = list1 ... -> list -
- Returns the union of the lists, using = for the element-equality - procedure. -

- The union of lists A and B is constructed as follows: -

    -
  • If A is the empty list, - the answer is B (or a copy of B). -
  • Otherwise, the result is initialised to be list A - (or a copy of A). -
  • Proceed through the elements of list B - in a left-to-right order. - If b is such an element of B, - compare every element r of the current result list - to b: - (= r b). - If all comparisons fail, - b is consed onto the front of the result. -
- However, there is no guarantee that = will be applied to every pair - of arguments from A and B. - In particular, if A is eq? to B, - the operation may immediately terminate. - -

- In the n-ary case, the two-argument list-union operation is simply - folded across the argument lists. - -

-(lset-union eq? '(a b c d e) '(a e i o u)) => 
-    (u o i a b c d e)
-
-;; Repeated elements in LIST1 are preserved.
-(lset-union eq? '(a a c) '(x a x)) => (x a a c)
-
-;; Trivial cases
-(lset-union eq?) => ()
-(lset-union eq? '(a b c)) => (a b c)
-
- - -
- -lset-intersection = list1 list2 ... -> list -
- Returns the intersection of the lists, - using = for the element-equality procedure. -

- The intersection of lists A and B - is comprised of every element of A that is = - to some element of B: - (= a b), - for a in A, and b in B. - Note this implies that an element which appears in B - and multiple times in list A - will also appear multiple times in the result. -

- The order in which elements appear in the result is the same as - they appear in list1 -- - that is, lset-intersection essentially filters - list1, - without disarranging element order. - The result may - share a common tail with list1. -

- In the n-ary case, the two-argument list-intersection operation is simply - folded across the argument lists. However, the dynamic order in which the - applications of = are made is not specified. - The procedure may check an - element of list1 for membership - in every other list before proceeding to - consider the next element of list1, - or it may completely intersect list1 - and list2 - before proceeding to list3, - or it may go about its work in some third order. - -

-(lset-intersection eq? '(a b c d e) '(a e i o u)) => (a e)
-
-;; Repeated elements in LIST1 are preserved.
-(lset-intersection eq? '(a x y a) '(x a x z)) => '(a x a)
-
-(lset-intersection eq? '(a b c)) => (a b c)     ; Trivial case
-
- - -
- -lset-difference = list1 list2 ... -> list -
- Returns the difference of the lists, using = for the element-equality - procedure -- all the elements of list1 that are not - = to any element from one of the - other listi parameters. -

- The = procedure's first argument is - always an element of list1; - its second is an element of one of the other listi. - Elements that are repeated multiple times in the - list1 parameter - will occur multiple times in the result. - - The order in which elements appear in the result is the same as - they appear in list1 -- - that is, lset-difference essentially - filters list1, without disarranging element order. - The result may share a common tail with list1. - - The dynamic order in which the applications of = are made is not - specified. - The procedure may check an element of list1 - for membership in every other list before proceeding to consider the - next element of list1, - or it may completely compute the difference of - list1 and list2 before - proceeding to list3, - or it may go about its work in some third order. - -

-(lset-difference eq? '(a b c d e) '(a e i o u)) => (b c d)
-
-(lset-difference eq? '(a b c)) => (a b c) ; Trivial case
-
- - -
- -lset-xor = list1 ... -> list -
- Returns the exclusive-or of the sets, - using = for the element-equality procedure. - If there are exactly two lists, this is all the elements - that appear in exactly one of the two lists. The operation is associative, - and thus extends to the n-ary case -- the elements that appear in an - odd number of the lists. The result may share a common tail with any of - the listi parameters. -

- More precisely, for two lists A and B, - A xor B is a list of -

    -
  • every element a of A - such that there is no element b of B - such that (= a b), and -
  • every element b of B - such that there is no element a of A - such that (= b a). -
- However, an implementation is allowed to assume that = is - symmetric -- that is, that -
- (= a b) => - (= b a). -
- This means, for example, that if a comparison - (= a b) produces - true for some a in A - and b in B, - both a and b may be removed from - inclusion in the result. -

- In the n-ary case, the binary-xor operation is simply folded across - the lists. - -

-(lset-xor eq? '(a b c d e) '(a e i o u)) => (d c b i o u)
-
-;; Trivial cases.
-(lset-xor eq?) => ()
-(lset-xor eq? '(a b c d e)) => (a b c d e)
-
- - - -
- -lset-diff+intersection = list1 list2 ... -> [list list] -
- Returns two values -- the difference and the intersection of the lists. - Is equivalent to -
-(values (lset-difference = list1 list2 ...)
-        (lset-intersection = list1
-                             (lset-union = list2 ...)))
-
- but can be implemented more efficiently. -

- The = procedure's first argument is an element of list1; its second - is an element of one of the other listi. -

- Either of the answer lists may share a - common tail with list1. - This operation essentially partitions list1. - - -

- -lset-union!             = list1 ... -> list -
- -lset-intersection!      = list1 list2 ... -> list -
- -lset-difference!        = list1 list2 ... -> list -
- -lset-xor!               = list1 ... -> list -
- -lset-diff+intersection! = list1 list2 ... -> [list list] -
- These are linear-update variants. They are allowed, but not required, - to use the cons cells in their first list parameter to construct their - answer. lset-union! is permitted to recycle cons cells from any - of its list arguments. -
- - -

Primitive side-effects

-

-These two procedures are the primitive, -R5RS -side-effect operations on pairs. - -

- -
- -set-car! pair object -> unspecified -
- -set-cdr! pair object -> unspecified -
- [R5RS] - These procedures store object in the car and cdr field - of pair, respectively. - The value returned is unspecified. -
-(define (f) (list 'not-a-constant-list))
-(define (g) '(constant-list))
-(set-car! (f) 3) =>  *unspecified*
-(set-car! (g) 3) =>  *error*
-
-
- - -

Acknowledgements

-

-The design of this library benefited greatly from the feedback provided during -the SRFI discussion phase. Among those contributing thoughtful commentary and -suggestions, both on the mailing list and by private discussion, were Mike -Ashley, Darius Bacon, Alan Bawden, Phil Bewig, Jim Blandy, Dan Bornstein, Per -Bothner, Anthony Carrico, Doug Currie, Kent Dybvig, Sergei Egorov, Doug Evans, -Marc Feeley, Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Dan Friedman, -Lars Thomas Hansen, Brian Harvey, Erik Hilsdale, Wolfgang Hukriede, Richard -Kelsey, Donovan Kolbly, Shriram Krishnamurthi, Dave Mason, Jussi Piitulainen, -David Pokorny, Duncan Smith, Mike Sperber, Maciej Stachowiak, Harvey J. Stein, -John David Stone, and Joerg F. Wittenberger. I am grateful to them for their -assistance. -

-I am also grateful the authors, implementors and documentors of all the systems -mentioned in the rationale. Aubrey Jaffer and Kent Pitman should be noted -for their work in producing Web-accessible versions of the R5RS and -Common Lisp spec, which was a tremendous aid. -

-This is not to imply that these individuals necessarily endorse the final -results, of course. - - - -

References & links

-

- -

-
This document, in HTML: -
- http://srfi.schemers.org/srfi-1/srfi-1.html - -
Source code for the reference implementation: -
- http://srfi.schemers.org/srfi-1/srfi-1-reference.scm - -
Archive of SRFI-1 discussion-list email: -
- http://srfi.schemers.org/srfi-1/mail-archive/maillist.html - -
SRFI web site: -
- http://srfi.schemers.org/ -
- -

- -

-
[CommonLisp]
-
Common Lisp: the Language
-Guy L. Steele Jr. (editor).
-Digital Press, Maynard, Mass., second edition 1990.
-Available at -http://www.elwood.com/alu/table/references.htm#cltl2. -

- -The Common Lisp "HyperSpec," produced by Kent Pitman, is essentially -the ANSI spec for Common Lisp: - -http://www.harlequin.com/education/books/HyperSpec/. - -

[R5RS]
-
Revised5 report on the algorithmic language Scheme.
- R. Kelsey, W. Clinger, J. Rees (editors).
- Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998.
- and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998.
- Available at - http://www.schemers.org/Documents/Standards/. - -
- - - - -

Copyright

-

- -Certain portions of this document -- the specific, marked segments of text -describing the R5RS procedures -- were adapted with permission from the R5RS -report. -

- -All other text is copyright (C) Olin Shivers (1998, 1999). -All Rights Reserved. -

-This document and translations of it may be copied and furnished to -others, and derivative works that comment on or otherwise explain it -or assist in its implementation may be prepared, copied, published and -distributed, in whole or in part, without restriction of any kind, -provided that the above copyright notice and this paragraph are -included on all such copies and derivative works. However, this -document itself may not be modified in any way, such as by removing -the copyright notice or references to the Scheme Request For -Implementation process or editors, except as needed for the purpose of -developing SRFIs in which case the procedures for copyrights defined -in the SRFI process must be followed, or as required to translate it -into languages other than English. -

-The limited permissions granted above are perpetual and will not be -revoked by the authors or their successors or assigns. -

-This document and the information contained herein is provided on an -"AS IS" basis and THE AUTHOR AND THE SRFI EDITORS DISCLAIM ALL -WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY -WARRANTY THAT THE USE OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY -RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A -PARTICULAR PURPOSE. - - - diff --git a/scsh/lib/srfi-1.txt b/scsh/lib/srfi-1.txt deleted file mode 100644 index bdff052..0000000 --- a/scsh/lib/srfi-1.txt +++ /dev/null @@ -1,2015 +0,0 @@ -The SRFI-1 list library -*- outline -*- -Olin Shivers -98/10/16 -Last Update: 99/10/3 - -Emacs should display this document in outline mode. Say c-h m for -instructions on how to move through it by sections (e.g., c-c c-n, c-c c-p). -During the SRFI discussion period, the current draft may be found at - ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt - - -* Table of contents -------------------- - -Abstract -Introduction -Procedure index -General discussion - "Linear update" procedures - Improper lists - Errors - Not included in this library -The procedures - Constructors - Predicates - Selectors - Miscellaneous: length, append, reverse, zip & count - Fold, unfold & map - Filtering & partitioning - Searching - Deletion - Association lists - Set operations on lists - Primitive side-effects -Acknowledgements -References & links -Copyright - - -* Abstract ----------- - -R5RS Scheme has an impoverished set of list-processing utilities, which is a -problem for authors of portable code. This SRFI proposes a coherent and -comprehensive set of list-processing procedures; it is accompanied by a -reference implementation of the spec. The reference implementation is - - portable - - efficient - - completely open, public-domain source - - -* Introduction --------------- - -The set of basic list and pair operations provided by R4RS/R5RS Scheme is far -from satisfactory. Because this set is so small and basic, most -implementations provide additional utilities, such as a list-filtering -function, or a "left fold" operator, and so forth. But, of course, this -introduces incompatibilities -- different Scheme implementations provide -different sets of procedures. - -I have designed a full-featured library of procedures for list processing. -While putting this library together, I checked as many Schemes as I could get -my hands on. (I have a fair amount of experience with several of these -already.) I missed Chez -- no on-line manual that I can find -- but I hit most -of the other big, full-featured Schemes. The complete list of list-processing -systems I checked is: - R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common - Lisp, Bigloo, guile, T, APL and the SML standard basis -As a result, the library I am proposing is fairly rich. - -Following this initial design phase, this library went through several -months of discussion on the SRFI mailing lists, and was altered in light -of the ideas and suggestions put forth during this discussion. - -In parallel with designing this API, I have also written a reference -implementation. I have placed this source on the Net with an unencumbered, -"open" copyright. A few notes about the reference implementation: - - - Although I got procedure names and specs from many Schemes, I wrote this - code myself. Thus, there are *no* entanglements. Any Scheme implementor - can pick this library up with no worries about copyright problems -- both - commercial and non-commercial systems. - - - The code is written for portability and should be trivial to port to - any Scheme. It has only four deviations from R4RS, clearly discussed - in the comments: - - Use of an ERROR procedure; - - Use of the R5RS VALUES and a simple RECEIVE macro for producing - and consuming multiple return values; - - Use of simple :OPTIONAL and LET-OPTIONALS macros for optional - argument parsing and defaulting; - - Use of a simple CHECK-ARG procedure for argument checking. - - - It is written for clarity and well-commented. The current source is - 768 lines of source code and 826 lines of comments and white space. - - - It is written for efficiency. Fast paths are provided for common - cases. Side-effecting procedures such as FILTER! avoid unnecessary, - redundant SET-CDR!s which would thrash a generational GC's write barrier - and the store buffers of fast processors. Functions reuse longest common - tails from input parameters to construct their results where - possible. Constant-space iterations are used in preference to recursions; - local recursions are used in preference to consing temporary intermediate - data structures. - - This is not to say that the implementation can't be tuned up for - a specific Scheme implementation. There are notes in comments addressing - ways implementors can tune the reference implementation for performance. - -In short, I've written the reference implementation to make it as painless -as possible for an implementor -- or a regular programmer -- to adopt this -library and get good results with it. - - - -* Procedure index ------------------ -Here is a short list of the procedures provided by the list-lib package. -"#" marks R5RS procedures; "+" marks extended R5RS procedures - -Constructors -# cons list - xcons cons* make-list list-tabulate - list-copy circular-list iota - -Predicates -# pair? null? - proper-list? circular-list? dotted-list? - not-pair? null-list? - list= - -Selectors -# car cdr ... cdddar cddddr list-ref - first second third fourth fifth sixth seventh eighth ninth tenth - car+cdr - take drop - take-right drop-right - take! drop-right! - split-at split-at! - last last-pair - -Miscellaneous: length, append, concatenate, reverse, zip & count -# length - length+ -# append reverse - append! reverse! - concatenate concatenate! - append-reverse append-reverse! - zip unzip1 unzip2 unzip3 unzip4 unzip5 - count - -Fold, unfold & map -+ map for-each - fold unfold pair-fold reduce - fold-right unfold-right pair-fold-right reduce-right - append-map append-map! - map! pair-for-each filter-map map-in-order - -Filtering & partitioning - filter partition remove - filter! partition! remove! - -Searching -+ member -# memq memv - find - any every - list-index - take-while drop-while take-while! - span break span! break! - -Deleting - delete delete-duplicates - delete! delete-duplicates! - -Association lists -+ assoc -# assq assv - alist-cons alist-copy - alist-delete alist-delete! - -Set operations on lists - lset<= lset= lset-adjoin - lset-union lset-union! - lset-intersection lset-intersection! - lset-difference lset-difference! - lset-xor lset-xor! - lset-diff+intersection lset-diff+intersection! - -Primitive side effects -# set-car! set-cdr! - ------- -Four R4RS/R5RS list-processing procedures are extended by this library in -backwards-compatible ways: - map for-each (Extended to take lists of unequal length) - member assoc (Extended to take an optional comparison procedure) - -The following R4RS/R5RS list- and pair-processing procedures are also part of -list-lib's exports, as defined by the R5RS report: - cons pair? null? list length append reverse - car cdr ... cdddar cddddr set-car! set-cdr! list-ref - memq memv assq assv - -The remaining two R4RS/R5RS list-processing procedures are *not* part of -this library: - list-tail (renamed DROP) - list? (see PROPER-LIST?, CIRCULAR-LIST? and DOTTED-LIST?) - - - -* General discussion --------------------- - -A set of general criteria guided the design of this library. - -I don't require "destructive" (what I call "linear update") procedures to -alter and recycle cons cells from the argument lists. They are allowed to, but -not required to. (The reference implementations I have written *do* recycle -the argument lists.) See below for further discussion. - -List-filtering procedures such as FILTER or DELETE do not disorder -lists. Elements appear in the answer list in the same order as they appear in -the argument list. This constrains implementation, but seems like a desirable -feature, since in many uses of lists, order matters. (In particular, -disordering an alist is definitely a bad idea.) - -Contrariwise, although the reference implementations of the list-filtering -procedures share longest common tails between argument and answer lists, -it not is part of the spec. - -Because lists are an inherently sequential data structure (unlike, say, -vectors), list-inspection functions such as FIND, FIND-TAIL, FOR-EACH, ANY -and EVERY commit to a left-to-right traversal order of their argument list. - -However, constructor functions, such as LIST-TABULATE and the mapping -procedures (APPEND-MAP, APPEND-MAP!, MAP!, PAIR-FOR-EACH, FILTER-MAP, -MAP-IN-ORDER) do *not* specify the dynamic order in which their -procedural argument is applied to its various values. - -Predicates return useful true values wherever possible. Thus ANY must return -the true value produced by its predicate, and EVERY returns the final true -value produced by applying its predicate argument to the last element of its -argument list. - -Functionality is provided both in pure and linear-update (potentially -destructive) forms wherever this makes sense. - -No special status accorded Scheme's built-in equality functions. -Any functionality provided in terms of EQ?, EQV?, EQUAL? is also -available using a client-provided equality function. - -Proper design counts for more than backwards compatibility, but I have tried, -ceteris paribus, to be as backwards-compatible as possible with existing -list-processing libraries, in order to facilitate porting old code to run as a -client of the procedures in this library. Name choices and semantics are, for -the most part, in agreement with existing practice in many current Scheme -systems. I have indicated some incompatibilities in the following text. - -These procedures are *not* "sequence generic" -- i.e., procedures that -operate on either vectors and lists. They are list-specific. I prefer to -keep the library simple and focussed. - -I have named these procedures without a qualifying initial "list-" -lexeme, which is in keeping with the existing set of list-processing -utilities in Scheme. I follow the general Scheme convention -(VECTOR-LENGTH, STRING-REF) of placing the type-name before the action -when naming procedures -- so we have LIST-COPY and PAIR-FOR-EACH rather -than the perhaps more fluid, but less consistent, COPY-LIST, or -FOR-EACH-PAIR. - -I have generally followed a regular and consistent naming scheme, composing -procedure names from a set of basic lexemes. - - -** "Linear update" procedures -============================= - -Many procedures in this library have "pure" and "linear update" variants. A -"pure" procedure has no side-effects, and in particular does not alter its -arguments in any way. A "linear update" procedure is allowed -- but *not* -required -- to side-effect its arguments in order to construct its -result. "Linear update" procedures are typically given names ending with an -exclamation point. So, for example, (APPEND! list1 list2) is allowed to -construct its result by simply using SET-CDR! to set the cdr of the last pair -of list1 to point to list2, and then returning list1 (unless list1 is the -empty list, in which case it would simply return list2). However, APPEND! may -also elect to perform a pure append operation -- this is a legal definition -of APPEND!: - (define append! append) -This is why we do not call these procedures "destructive" -- because they -aren't *required* to be destructive. They are *potentially* destructive. - -What this means is that you may only apply linear-update procedures to -values that you know are "dead" -- values that will never be used again -in your program. This must be so, since you can't rely on the value passed -to a linear-update procedure after that procedure has been called. It -might be unchanged; it might be altered. - -The "linear" in "linear update" doesn't mean "linear time" or "linear space" -or any sort of multiple-of-n kind of meaning. It's a fancy term that type -theorists and pure functional programmers use to describe systems where you -are only allowed to have exactly one reference to each variable. This provides -a guarantee that the value bound to a variable is bound to no other -variable. So when you *use* a variable in a variable reference, you "use it -up." Knowing that no one else has a pointer to that value means the system -primitive is free to side-effect its arguments to produce what is, -observationally, a pure-functional result. - -In the context of this library, "linear update" means you, the programmer, -know there are *no other* live references to the value passed to the -procedure -- after passing the value to one of these procedures, the -value of the old pointer is indeterminate. Basically, you are licensing -the Scheme implementation to alter the data structure if it feels like -it -- you have declared you don't care either way. - -You get no help from Scheme in checking that the values you claim are "linear" -really are. So you better get it right. Or play it safe and use the non-! -procedures -- it doesn't do any good to compute quickly if you get the wrong -answer. - -Why go to all this trouble to define the notion of "linear update" and use it -in a procedure spec, instead of the more common notion of a "destructive" -operation? First, note that destructive list-processing procedures are almost -always used in a linear-update fashion. This is in part required by the -special case of operating upon the empty list, which can't be side-effected. -This means that destructive operators are not pure side-effects -- they have -to return a result. Second, note that code written using linear-update -operators can be trivially ported to a pure, functional subset of Scheme by -simply providing pure implementations of the linear-update operators. Finally, -requiring destructive side-effects ruins opportunities to parallelise these -operations -- and the places where one has taken the trouble to spell out -destructive operations are usually exactly the code one would want a -parallelising compiler to parallelise: the efficiency-critical kernels of the -algorithm. Linear-update operations are easily parallelised. Going with a -linear-update spec doesn't close off these valuable alternative implementation -techniques. This list library is intended as a set of low-level, basic -operators, so we don't want to exclude these possible implementations. - -The linear-update procedures in this library are - take! drop-right! - append! reverse! append-reverse! - append-map! map! - filter! partition! remove! - delete! alist-delete! delete-duplicates! - lset-adjoin! lset-union! lset-intersection! lset-difference! lset-xor! - lset-diff+intersection! - - -** Improper lists -================= - -Scheme does not properly have a list type, just as C does not have a string -type. Rather, Scheme has a binary-tuple type, from which one can build binary -trees. There is an *interpretation* of Scheme values that allows one to treat -these trees as lists. Further complications ensue from the fact that Scheme -allows side-effects to these tuples, raising the possibility of lists of -unbounded length, and trees of unbounded depth (that is, circular data -structures). - -However, there is a simple view of the world of Scheme values that considers -every value to be a list of some sort. That is, every value is either - - a "proper list" -- a finite, nil-terminated list, such as: - (a b c) - () - (32) - - a "dotted list" -- a finite, non-nil terminated list, such as - (a b c . d) - (x . y) - 42 - george - - or a "circular list" -- an infinite, unterminated list. -Note that the zero-length dotted lists are simply all the non-null, non-pair -values. - -This view is captured by the predicates PROPER-LIST?, DOTTED-LIST?, and -CIRCULAR-LIST?. List-lib users should note that dotted lists are not commonly -used, and are considered by many Scheme programmers to be an ugly artifact of -Scheme's lack of a true list type. However, dotted lists do play a noticeable -role in the *syntax* of Scheme, in the "rest" parameters used by n-ary -lambdas: (lambda (x y . rest) ...). - -Dotted lists are *not* fully supported by list-lib. Most procedures are -defined only on proper lists -- that is, finite, nil-terminated lists. The -procedures that will also handle circular or dotted lists are specifically -marked. While this design decision restricts the domain of possible arguments -one can pass to these procedures, it has the benefit of allowing the -procedures to catch the error cases where programmers inadvertently pass -scalar values to a list procedure by accident, e.g. by switching the arguments -to a procedure call. - - -** Errors -========= - -Note that statements of the form "it is an error" merely mean "don't -do that." They are not a guarantee that a conforming implementation will -"catch" such improper use by, for example, raising some kind of exception. -Regrettably, R5RS Scheme requires no firmer guarantee even for basic operators -such as CAR and CDR, so there's little point in requiring these procedures to -do more. Here is the relevant section of the R5RS report: - - When speaking of an error situation, this report uses the phrase "an - error is signalled" to indicate that implementations must detect and - report the error. If such wording does not appear in the discussion - of an error, then implementations are not required to detect or - report the error, though they are encouraged to do so. An error - situation that implementations are not required to detect is usually - referred to simply as "an error." - - For example, it is an error for a procedure to be passed an argument - that the procedure is not explicitly specified to handle, even though - such domain errors are seldom mentioned in this report. - Implementations may extend a procedure's domain of definition to - include such arguments. - - -** Not included in this library -=============================== - -The following items are not in this library: -- Sort routines -- Destructuring/pattern-matching macro -- Tree-processing routines -They should have their own SRFI specs. - - - -* The procedures ----------------- -In a Scheme system that has a module or package system, these procedures -should be contained in a module named "list-lib". - -The templates given below obey the following conventions for procedure formals: - list A proper (finite, nil-terminated) list - clist A proper or circular list - flist A finite (proper or dotted) list - pair A pair - x, y, d, a Any value - object, value Any value - n, i A natural number (an integer >= 0) - proc A procedure - pred A procedure whose return value is treated as a boolean - = A boolean procedure taking two arguments - -It is an error to pass a circular or dotted list to a procedure not -defined to accept such an argument. - -** Constructors -=============== - -cons a d -> pair R5RS - The primitive constructor. Returns a newly allocated pair whose car is A - and whose cdr is D. The pair is guaranteed to be different (in the sense - of EQV?) from every existing object. - - (cons 'a '()) ==> (a) - (cons '(a) '(b c d)) ==> ((a) b c d) - (cons "a" '(b c)) ==> ("a" b c) - (cons 'a 3) ==> (a . 3) - (cons '(a b) 'c) ==> ((a b) . c) - -list object ... -> list R5RS - Returns a newly allocated list of its arguments. - - (list 'a (+ 3 4) 'c) ==> (a 7 c) - (list) ==> () - -xcons d a -> pair - (lambda (d a) (cons a d)) - Of utility only as a value to be conveniently passed to higher-order - procedures. - - (xcons '(b c) 'a) => (a b c) - - The name stands for "eXchanged CONS." - -cons* elt1 elt2 ... -> object - Like LIST, but the last argument provides the tail of the constructed - list, returning (cons elt1 (cons elt2 (cons ... eltn))). - This function is called LIST* in Common Lisp and about half of the - Schemes that provide it; and CONS* in the other half. - - (cons* 1 2 3 4) => (1 2 3 . 4) - (cons* 1) => 1 - -make-list n [fill] -> list - Returns an N-element list, whose elements are all the value FILL. - If the FILL argument is not given, the elements of the list may - be arbitrary values. - - (make-list 4 'c) => (c c c c) - (make-list 10) => (2 3 5 7 11 13 17 19 23 29) - -list-tabulate n init-proc -> list - Returns an N-element list. Element i of the list, where 0 <= i < N, - is produced by (INIT-PROC i). No guarantee is made about the dynamic - order in which INIT-PROC is applied to these indices. - - (list-tabulate 4 values) => (0 1 2 3) - -list-copy flist -> flist - Copies the "spine" of the argument. - -circular-list elt1 elt2 ... -> clist - Constructs a circular list of the elements. - (circular-list 'z 'q) => (z q z q z q ...) - -iota count [start step] -> list - Returns a list containing the elements - (start start+step ... start+(count-1)*step) - The START and STEP parameters default to 0 and 1, respectively. - This procedure takes its name from the APL primitive. - - (iota 5) => (0 1 2 3 4) - (iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4) - - -** Predicates -============= - -Note: the predicates PROPER-LIST?, CIRCULAR-LIST?, and DOTTED-LIST? -partition the entire universe of Scheme values. - -proper-list? x -> boolean - Returns true iff X is a proper list -- a finite, nil-terminated list. - - More carefully: The empty list is a proper list. A pair whose cdr is a - proper list is also a proper list: - ::= () (Empty proper list) - | (cons ) (Proper-list pair) - Note that this definition rules out circular lists. This - function is required to detect this case and return false. - - Nil-terminated lists are called "proper" lists by R5RS and Common Lisp. - The opposite of proper is improper. - - R5RS binds this function to the variable LIST?. - - (not (proper-list? x)) = (or (dotted-list? x) (circular-list? x)) - -circular-list? x -> boolean - True if X is a circular list. A circular list is a value such that - for every n >= 0, cdr^n(x) is a pair. - - Terminology: The opposite of circular is finite. - (not (circular-list? x)) = (or (proper-list? x) (dotted-list? x)) - -dotted-list? x -> boolean - True if X is a finite, non-nil-terminated list. That is, there exists - an n >= 0 such that cdr^n(x) is neither a pair nor (). This includes - non-pair, non-() values (e.g. symbols, numbers), which are considered to - be dotted lists of length 0. - - (not (dotted-list? x)) = (or (proper-list? x) (circular-list? x)) - -pair? object -> boolean R5RS - Returns #t if OBJECT is a pair; otherwise, #f. - - (pair? '(a . b)) ==> #t - (pair? '(a b c)) ==> #t - (pair? '()) ==> #f - (pair? '#(a b)) ==> #f - (pair? 7) ==> #f - (pair? 'a) ==> #f - -null? object -> boolean R5RS - Returns #t if OBJECT is the empty list; otherwise, #f. - -null-list? list -> boolean - LIST is a proper or circular list. This procedure returns true if - the argument is the empty list (), and false otherwise. It is an - error to pass this procedure a value which is not a proper or - circular list. - - This procedure is recommended as the termination condition for - list-processing procedures that are not defined on dotted lists. - -not-pair? x -> boolean - (lambda (x) (not (pair? x))) - Provided as a procedure as it can be useful as the termination condition - for list-processing procedures that wish to handle all finite lists, - both proper and dotted. - -list= elt= list1 ... -> boolean - Determines list equality, given an element-equality procedure. - Proper list A equals proper list B if they are of the same length, - and their corresponding elements are equal, as determined by ELT=. - If the element-comparison procedure's first argument is from LISTi, - then its second argument is from LISTi+1, i.e. it is always called as - (elt= a b) - for a an element of list A, and b an element of list B. - - In the n-ary case, every LISTi is compared to LISTi+1 (as opposed, - for example, to comparing LIST1 to every LISTi, for i>1). If there - are no list arguments at all, LIST= simply returns true. - - It is an error to apply LIST= to anything except proper lists. While - implementations may choose to extend it to circular lists, note that it - cannot reasonably be extended to dotted lists, as it provides no way to - specify an equality procedure for comparing the list terminators. - - Note that the dynamic order in which the ELT= procedure is applied to - pairs of elements is not specified. For example, if LIST= is applied - to three lists, A, B, and C, it may first completely compare A to B, - then compare B to C, or it may compare the first elements of A and B, - then the first elements of B and C, then the second elements of A and - B, and so forth. - - The equality procedure must be consistent with EQ?. That is, - it must be the case that - (eq? x y) => (elt= x y). - Note that this implies that two lists which are EQ? are always LIST=, - as well; implementations may exploit this fact to "short-cut" the - element-by-element comparisons. - - (list= eq?) => #t ; Trivial cases - (list= eq? '(a)) => #t - - -** Selectors -============ - -car pair -> value R5RS -cdr pair -> value R5RS - These procedures return the contents of the car and cdr field of - their argument, respectively. Note that it is an error to apply - them to the empty list. - - (car '(a b c)) ==> a - (car '((a) b c d)) ==> (a) - (car '(1 . 2)) ==> 1 - (car '()) ==> *error* - - (cdr '(a b c)) ==> (b c) - (cdr '((a) b c d)) ==> (b c d) - (cdr '(1 . 2)) ==> 2 - (cdr '()) ==> *error* - -caar pair -> value R5RS -cadr pair -> value - : -cdddar pair -> value -cddddr pair -> value - These procedures are compositions of CAR and CDR, where for - example CADDR could be defined by - - (define caddr (lambda (x) (car (cdr (cdr x))))). - - Arbitrary compositions, up to four deep, are provided. There are - twenty-eight of these procedures in all. - -list-ref clist i -> value R5RS - Returns the Ith element of CLIST. (This is the same as the car - of (DROP CLIST I).) It is an error if I >= N, where N is the length - of CLIST. - - (list-ref '(a b c d) 2) ==> c - -first second third fourth fifth -sixth seventh eighth ninth tenth: pair -> value - Synonyms for car, cadr, caddr, ... - - (third '(a b c d e)) => c - -car+cdr pair -> [x y] - The fundamental pair deconstructor: - (lambda (p) (values (car p) (cdr p))) - This can, of course, be implemented more efficiently by a compiler. - -take x i -> list -drop x i -> object - TAKE returns the first I elements of list X. - DROP returns all but the first I elements of list X. - - (take '(a b c d e) 2) => (a b) - (drop '(a b c d e) 2) => (c d e) - - X may be any value -- a proper, circular, or dotted list: - (take '(1 2 3 . d) 2) => (1 2) - (drop '(1 2 3 . d) 2) => (3 . d) - (take '(1 2 3 . d) 3) => (1 2 3) - (drop '(1 2 3 . d) 3) => d - For a legal I, TAKE and DROP partition the list in a manner which - can be inverted with APPEND: - (append (take x i) (drop x i)) = x - - DROP is exactly equivalent to performing I cdr operations on X; - the returned value shares a common tail with X. - - If the argument is a list of non-zero length, TAKE is guaranteed to - return a freshly-allocated list, even in the case where the entire - list is taken, e.g. (TAKE LIS (LENGTH LIS)). - -take-right flist i -> object -drop-right flist i -> list - TAKE-RIGHT returns the last I elements of FLIST. - DROP-RIGHT returns all but the last I elements of FLIST. - - The returned list may share a common tail with the argument list. - - (take-right '(a b c d e) 2) => (d e) - (drop-right '(a b c d e) 2) => (a b c) - - FLIST may be any finite list, either proper or dotted: - (take-right '(1 2 3 . d) 2) => (2 3 . d) - (drop-right '(1 2 3 . d) 2) => (1) - (take-right '(1 2 3 . d) 0) => d - (drop-right '(1 2 3 . d) 0) => (1 2 3) - For a legal I, TAKE-RIGHT and DROP-RIGHT partition the list in a manner - which can be inverted with APPEND: - (append (take flist i) (drop flist i)) = flist - - TAKE-RIGHT's return value is guaranteed to share a common tail with FLIST. - - If the argument is a list of non-zero length, DROP-RIGHT is guaranteed to - return a freshly-allocated list, even in the case where nothing is - dropped, e.g. (DROP-RIGHT LIS 0). - -take! x i -> list -drop-right! flist i -> list - TAKE! and DROP-RIGHT! are "linear-update" variants of TAKE and - DROP-RIGHT: the procedure is allowed, but not required, to alter the - argument list to produce the result. - - If X is circular, TAKE! may return a shorter-than-expected list: - (take! (circular-list 1 3 5) 8) => (1 3) - (take! (circular-list 1 3 5) 8) => (1 3 5 1 3 5 1 3) - -split-at x i -> [list object] -split-at! x i -> [list object] - SPLIT-AT splits the list X at index I, returning a list of the - first I elements, and the remaining tail. It is equivalent - to - (values (take x i) (drop x i)) - - SPLIT-AT! is the linear-update variant. It is allowed, but not - required, to alter the argument list to produce the result. - - (split-at '(a b c d e f g h) 3) => - (a b c) - (d e f g h) - -last pair -> object -last-pair pair -> pair - LAST returns the last element of the non-empty, finite list PAIR. - LAST-PAIR returns the last pair in the non-empty, finite list PAIR. - - (last '(a b c)) => c - (last-pair '(a b c)) => (c) - (last-pair '(a b c . d)) => (c . d) - - -** Miscellaneous: length, append, concatenate, reverse, zip & count -=================================================================== - -length list -> integer R5RS -length+ clist -> integer or #f - Both LENGTH and LENGTH+ return the length of the argument. - It is an error to pass a value to LENGTH which is not a proper - list (finite and nil-terminated). In particular, this means an - implementation may diverge or signal an error when LENGTH is - applied to a circular list. - - LENGTH+, on the other hand, returns #F when applied to a circular - list. - - The length of a proper list is a non-negative integer N such that CDR - applied N times to the list produces the empty list. - - (length '(a b c)) ==> 3 - (length '(a (b) (c d e))) ==> 3 - (length '()) ==> 0 - -append list1 ... -> value R5RS -append! list1 ... -> value - APPEND returns a list consisting of the elements of LIST1 - followed by the elements of the other list parameters. - - (append '(x) '(y)) ==> (x y) - (append '(a) '(b c d)) ==> (a b c d) - (append '(a (b)) '((c))) ==> (a (b) (c)) - - The resulting list is always newly allocated, except that it - shares structure with the final LISTi argument. This last argument - may be any value at all; an improper list results if it is not - a proper list. All other arguments must be proper lists. - - (append '(a b) '(c . d)) ==> (a b c . d) - (append '() 'a) ==> a - (append '(x y)) ==> (x y) - (append) ==> () - - APPEND! is the "linear-update" variant of APPEND -- it is allowed, but - not required, to alter cons cells in the argument lists to construct - the result list. The last argument is never altered; the result - list shares structure with this parameter. - -concatenate list-of-lists -> value -concatenate! list-of-lists -> value - These functions append the elements of their argument together. - That is, CONCATENATE returns - (apply append list-of-lists) - or, equivalently, - (reduce-right append '() list-of-lists) - - CONCATENATE! is the linear-update variant, defined in - terms of APPEND! instead of APPEND. - - Note that some Scheme implementations do not support passing more than a - certain number (e.g., 64) of arguments to an n-ary procedure. In these - implementations, the (APPLY APPEND ...) idiom would fail when applied to - long lists, but CONCATENATE would continue to function properly. - - As with APPEND and APPEND!, the last element of the input list - may be any value at all. - -reverse list -> list R5RS -reverse! list -> list - REVERSE returns a newly allocated list consisting of the elements of - LIST in reverse order. - - (reverse '(a b c)) ==> (c b a) - (reverse '(a (b c) d (e (f)))) - ==> ((e (f)) d (b c) a) - - REVERSE! is the linear-update variant of REVERSE. It is permitted, - but not required, to alter the argument's cons cells to produce the - reversed list. - -append-reverse rev-head tail -> value -append-reverse! rev-head tail -> value - APPEND-REVERSE returns - (append (reverse rev-head) tail) - It it provided because it is a common operation -- a common - list-processing style calls for this exact operation to transfer values - accumulated in reverse order onto the front of another list, and because - the implementation is significantly more efficient than the simple - composition it replaces. (But note that this pattern of iterative - computation followed by a reverse can frequently be rewritten as a - recursion, dispensing with the REVERSE and APPEND-REVERSE steps, and - shifting temporary, intermediate storage from the heap to the stack, - which is typically a win for reasons of cache locality and eager storage - reclamation.) - - APPEND-REVERSE! is just the linear-update variant -- it is allowed, but - not required, to alter REV-HEAD's cons cells to construct the result. - -zip clist1 clist2 ... -> list - (lambda lists (apply map list lists)) - If ZIP is passed N lists, it returns a list as long as the shortest - of these lists, each element of which is an N-element list comprised - of the corresponding elements from the parameter lists. - - (zip '(one two three) '(1 2 3) '(odd even odd even odd even odd even)) => - ((one 1 odd) (two 2 even) (three 3 odd)) - - (zip '(1 2 3)) => ((1) (2) (3)) - - At least one of the argument lists must be finite: - (zip '(3 1 4 1) (circular-list #f #t)) => - ((3 #f) (1 #t) (4 #f) (1 #t)) - -unzip1 list -> list -unzip2 list -> [list list] -unzip3 list -> [list list list] -unzip4 list -> [list list list list] -unzip5 list -> [list list list list list] - UNZIP1 takes a list of lists, where every list must contain at least one - element, and returns a list containing the initial element of each such - list. That is, it returns (MAP CAR LISTS). UNZIP2 takes a list of lists, - where every list must contain at least two elements, and returns two - values: a list of the first elements, and a list of the second - elements. UNZIP3 does the same for the first three elements of the lists, - and so forth. - - (unzip2 '((1 one) (2 two) (3 three))) => - (1 2 3) - (one two three) - -count pred clist1 clist2 ... -> integer - PRED is a procedure taking as many arguments as there are lists and - returning a single value. It is applied element-wise to the elements of - the LISTs, and a count is tallied of the number of elements that produce a - true value. This count is returned. COUNT is "iterative" in that it is - guaranteed to apply PRED to the LIST elements in a left-to-right order. - The counting stops when the shortest list expires. - - (count even? '(3 1 4 1 5 9 2 5 6)) => 3 - (count < '(1 2 4 8) '(2 4 6 8 10 12 14 16)) => 3 - - At least one of the argument lists must be finite: - (count < '(3 1 4 1) (circular-list 1 10)) => 2 - - -** Fold, unfold & map -===================== - -fold kons knil clist1 clist2 ... -> value - The fundamental list iterator. - - First, consider the single list-parameter case. If CLIST1 = (e1 e2 ... en), - then this procedure returns - (kons en ... (kons e2 (kons e1 knil)) ... ) - That is, it obeys the (tail) recursion - (fold kons knil lis) = (fold kons (kons (car lis) knil) (cdr lis)) - (fold kons knil '()) = knil - - Examples: - (fold + 0 lis) ; Add up the elements of LIS. - - (fold cons '() lis) ; Reverse LIS. - - (fold cons tail rev-head) ; See APPEND-REVERSE. - - ;; How many symbols in LIS? - (fold (lambda (x count) (if (symbol? x) (+ count 1) count)) - 0 - lis) - - ;; Length of the longest string in LIS: - (fold (lambda (s max-len) (max max-len (string-length s))) - 0 - lis) - - If N list arguments are provided, then the KONS function must take - N+1 parameters: one element from each list, and the "seed" or fold - state, which is initially KNIL. The fold operation terminates when - the shortest list runs out of values: - (fold cons* '() '(a b c) '(1 2 3 4 5)) => (c 3 b 2 a 1) - - At least one of the list arguments must be finite. - -fold-right kons knil clist1 clist2 ... -> value - The fundamental list recursion operator. - - First, consider the single list-parameter case. If CLIST1 = (e1 e2 ... en), - then this procedure returns - (kons e1 (kons e2 ... (kons en knil))) - That is, it obeys the recursion - (fold-right kons knil lis) = (kons (car lis) (fold-right kons knil (cdr lis))) - (fold-right kons knil '()) = knil - - Examples: - (fold-right cons '() lis) ; Copy LIS. - - ;; Filter the even numbers out of LIS. - (fold-right (lambda (x l) (if (even? x) (cons x l) l)) '() lis)) - - If N list arguments are provided, then the KONS function must take - N+1 parameters: one element from each list, and the "seed" or fold - state, which is initially KNIL. The fold operation terminates when - the shortest list runs out of values: - (fold-right cons* '() '(a b c) '(1 2 3 4 5)) => (a 1 b 2 c 3) - - At least one of the list arguments must be finite. - -pair-fold kons knil clist1 clist2 ... -> value - Analogous to FOLD, but KONS is applied to successive sublists of the - lists, rather than successive elements -- that is, KONS is applied to the - pairs making up the lists, giving this (tail) recursion: - - (pair-fold kons knil lis) = (let ((tail (cdr lis))) - (pair-fold kons (kons lis knil) tail)) - - (pair-fold kons knil '()) = knil - - For finite lists, the KONS function may reliably apply SET-CDR! to the - pairs it is given without altering the sequence of execution. - - Example: - ;;; Destructively reverse a list. - (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis)) - - At least one of the list arguments must be finite. - -pair-fold-right kons knil clist1 clist2 ... -> value - Holds the same relationship with FOLD-RIGHT that PAIR-FOLD holds with FOLD. - Obeys the recursion - - (pair-fold-right kons knil lis) = - (kons lis (pair-fold-right kons knil (cdr lis))) - - (pair-fold-right kons knil '()) = knil - - Example: - (pair-fold-right cons '() '(a b c)) => ((a b c) (b c) (c)) - - At least one of the list arguments must be finite. - -reduce f ridentity list -> value - REDUCE is a variant of FOLD. - - RIDENTITY should be a "right identity" of the procedure F -- that is, - for any value X acceptable to F, - (f x ridentity) = x - - REDUCE has the following definition: - If LIST = (), return RIDENTITY. - Otherwise, return (fold F (car LIST) (cdr LIST)). - - ...in other words, we compute (fold F RIDENTITY LIST). - - Note that RIDENTITY is used *only* in the empty-list case. You - typically use REDUCE when applying F is expensive and you'd like - to avoid the extra application incurred when FOLD applies F to the - head of LIST and the identity value, redundantly producing the - same value passed in to F. For example, if F involves searching a - file directory or performing a database query, this can be - significant. In general, however, FOLD is useful in many contexts - where REDUCE is not (consider the examples given in the FOLD - definition -- only one of the five folds uses function with a - right identity. The other four may not be performed with REDUCE). - - Note: MIT Scheme and Haskell flip F's arg order for their REDUCE and - FOLD functions. - - ;; Take the max of a list of non-negative integers. - (reduce max 0 nums) ; i.e., (apply max 0 nums) - -reduce-right f ridentity list -> value - REDUCE-RIGHT is the fold-right variant of REDUCE. - It obeys the following definition: - (reduce-right f ridentity '()) = ridentity - (reduce-right f ridentity '(e1)) = (f e1 ridentity) = e1 - (reduce-right f ridentity '(e1 e2 ...)) = - (f e1 (reduce f ridentity (e2 ...))) - - ...in other words, we compute (fold-right F RIDENTITY LIST). - - ;; Append a bunch of lists together. - ;; I.e., (apply append list-of-lists) - (reduce-right append '() list-of-lists) - -unfold p f g seed [tail-gen]-> list - UNFOLD is best described by its basic recursion: - (unfold p f g seed) = (if (p seed) (tail-gen seed) - (cons (f seed) - (unfold p f g (g seed)))) - P: Determines when to stop unfolding. - F: Maps each seed value to the corresponding list element. - G: Maps each seed value to next seed value. - SEED: The "state" value for the unfold. - TAIL-GEN: creates the tail of the list; defaults to (lambda (x) '()) - - In other words, we use G to generate a sequence of seed values - SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... - These seed values are mapped to list elements by F, producing the - elements of the result list in a left-to-right order. P says when to stop. - - UNFOLD is the fundamental recursive list constructor, just as FOLD-RIGHT - is the fundamental recursive list consumer. While UNFOLD may seem a - bit abstract to novice functional programmers, it can be used in a number - of ways: - - (unfold (lambda (x) (> x 10)) ; List of squares: 1^2 ... 10^2. - (lambda (x) (* x x)) - (lambda (x) (+ x 1)) - 1) - - (unfold null-list? car cdr lis) ; Copy a proper list. - - ;; Read current input port into a list of values. - (unfold eof-object? values (lambda (x) (read)) (read)) - - ;; Copy a possibly non-proper list: - (unfold not-pair? car cdr lis - values) - - ;; Append HEAD onto TAIL: - (unfold null-list? car cdr head - (lambda (x) tail)) - - Interested functional programmers may enjoy noting that FOLD-RIGHT and - UNFOLD are in some sense inverses. That is, given operations KNULL?, - KAR, KDR, KONS, and KNIL satisfying - (kons (kar x) (kdr x)) = x and (knull? knil) = #t - then - (FOLD-RIGHT kons knil (UNFOLD knull? kar kdr x)) = x - and - (UNFOLD knull? kar kdr (FOLD-RIGHT kons knil x)) = x. - - This combinator sometimes is called an "anamorphism;" when an - explicit TAIL-GEN procedure is supplied, it is called an - "apomorphism." - -unfold-right p f g seed [tail] -> value - UNFOLD constructs a list with the following loop: - (let lp ((seed seed) (lis tail)) - (if (p seed) lis - (lp (g seed) - (cons (f seed) lis)))) - - P: Determines when to stop unfolding. - F: Maps each seed value to the corresponding list element. - G: Maps each seed value to next seed value. - SEED: The "state" value for the unfold. - TAIL: list terminator; defaults to '(). - - In other words, we use G to generate a sequence of seed values - SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... - These seed values are mapped to list elements by F, producing the - elements of the result list in a right-to-left order. P says when to stop. - - UNFOLD-RIGHT is the fundamental iterative list constructor, just as FOLD - is the fundamental iterative list consumer. While UNFOLD-RIGHT may seem a - bit abstract to novice functional programmers, it can be used in a number - of ways: - - (unfold-right zero? ; List of squares: 1^2 ... 10^2 - (lambda (x) (* x x)) - (lambda (x) (- x 1)) - 10) - - (unfold-right null-list? car cdr lis) ; Reverse a proper list. - - ;; Read current input port into a list of values. - (unfold-right eof-object? values (lambda (x) (read)) (read)) - - ;; (APPEND-REVERSE rev-head tail) - (unfold-right null-list? car cdr rev-head tail) - - Interested functional programmers may enjoy noting that FOLD and - UNFOLD-RIGHT are in some sense inverses. That is, given operations KNULL?, - KAR, KDR, KONS, and KNIL satisfying - (kons (kar x) (kdr x)) = x and (knull? knil) = #t - then - (FOLD kons knil (UNFOLD-RIGHT knull? kar kdr x)) = x - and - (UNFOLD-RIGHT knull? kar kdr (FOLD kons knil x)) = x. - - This combinator presumably has some pretentious mathematical name; - interested readers are invited to communicate it to the author. - -map proc clist1 clist2 ... -> list R5RS+ - - PROC is a procedure taking as many arguments as there are list arguments - and returning a single value. MAP applies PROC element-wise to the - elements of the lists and returns a list of the results, in order. The - dynamic order in which PROC is applied to the elements of the lists is - unspecified. - - (map cadr '((a b) (d e) (g h))) - ==> (b e h) - - (map (lambda (n) (expt n n)) - '(1 2 3 4 5)) - ==> (1 4 27 256 3125) - - (map + '(1 2 3) '(4 5 6)) ==> (5 7 9) - - (let ((count 0)) - (map (lambda (ignored) - (set! count (+ count 1)) - count) - '(a b))) ==> (1 2) OR (2 1) - - This procedure is extended from its R5RS specification - to allow the arguments to be of unequal length; it terminates - when the shortest list runs out. - - At least one of the argument lists must be finite: - (map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1) - -for-each proc clist1 clist2 ... -> unspecified R5RS+ - The arguments to FOR-EACH are like the arguments to MAP, but - FOR-EACH calls PROC for its side effects rather than for its - values. Unlike MAP, FOR-EACH is guaranteed to call PROC on - the elements of the CLISTs in order from the first element(s) to - the last, and the value returned by FOR-EACH is unspecified. - - (let ((v (make-vector 5))) - (for-each (lambda (i) - (vector-set! v i (* i i))) - '(0 1 2 3 4)) - v) ==> #(0 1 4 9 16) - - This procedure is extended from its R5RS specification - to allow the arguments to be of unequal length; it terminates - when the shortest list runs out. - - At least one of the argument lists must be finite. - -append-map f clist1 clist2 ... -> value -append-map! f clist1 clist2 ... -> value - Equivalent to - (apply append (map f clist1 clist2 ...)) - and - (apply append! (map f clist1 clist2 ...)) - - Map F over the elements of the lists, just as in the MAP function. - However, the results of the applications are appended together to - make the final result. APPEND-MAP uses APPEND to append the results - together; APPEND-MAP! uses APPEND!. - - The dynamic order in which the various applications of F are made is - not specified. - - Example: - (append-map! (lambda (x) (list x (- x))) '(1 3 8)) - => (1 -1 3 -3 8 -8) - - At least one of the list arguments must be finite. - -map! f list1 clist2 ... -> list - Linear-update variant of MAP -- MAP! is allowed, but not required, to - alter the cons cells of LIST1 to construct the result list. - - The dynamic order in which the various applications of F are made is - not specified. - - In the n-ary case, CLIST2, CLIST3, ... must have at least as many - elements as LIST1. - -map-in-order f clist1 clist2 ... -> list - A variant of the MAP procedure that guarantees to apply F across - the elements of the LISTi arguments in a left-to-right order. This - is useful for mapping procedures that both have side effects and - return useful values. - - At least one of the list arguments must be finite. - -pair-for-each f clist1 clist2 ... -> unspecific - Like FOR-EACH, but F is applied to successive sublists of the argument - lists. That is, F is applied to the cons cells of the lists, rather - than the lists' elements. These applications occur in left-to-right - order. - - The F procedure may reliably apply SET-CDR! to the pairs it is given - without altering the sequence of execution. - - (pair-for-each (lambda (pair) (display pair) (newline)) '(a b c)) ==> - (a b c) - (b c) - (c) - - At least one of the list arguments must be finite. - -filter-map f clist1 clist2 ... -> list - Like MAP, but only true values are saved. - (filter-map (lambda (x) (and (number? x) (* x x))) '(a 1 b 3 c 7)) - => (1 9 49) - The dynamic order in which the various applications of F are made is - not specified. - - At least one of the list arguments must be finite. - - -** Filtering & partitioning -=========================== - -filter pred list -> list - Return all the elements of LIST that satisfy predicate PRED. - The list is not disordered -- elements that appear in the result list - occur in the same order as they occur in the argument list. - The returned list may share a common tail with the argument list. - The dynamic order in which the various applications of PRED are made is - not specified. - - (filter even? '(0 7 8 8 43 -4)) => (0 8 8 -4) - -partition pred list -> [list list] - Partitions the elements of LIST with predicate PRED, and returns two - values: the list of in-elements and the list of out-elements. - The list is not disordered -- elements occur in the result lists - in the same order as they occur in the argument list. - The dynamic order in which the various applications of PRED are made is - not specified. One of the returned lists may share a common tail with the - argument list. - - (partition symbol? '(one 2 3 four five 6)) - => (one four five) - (2 3 6) - -remove pred list -> list - Returns LIST without the elements that satisfy predicate PRED: - (lambda (pred list) (filter (lambda (x) (not (pred x))) list)) - The list is not disordered -- elements that appear in the result list - occur in the same order as they occur in the argument list. - The returned list may share a common tail with the argument list. - The dynamic order in which the various applications of PRED are made is - not specified. - - (remove even? '(0 7 8 8 43 -4)) => (7 43) - -filter! pred list -> list -partition! pred list -> [list list] -remove! pred list -> list - Linear-update variants of FIND, PARTITION and REMOVE. - These procedures are allowed, but not required, to alter the cons cells - in the argument list to construct the result lists. - - -** Searching -============ - -The following procedures all search lists for a leftmost element satisfying -some criteria. This means they do not always examine the entire list; thus, -there is no efficient way for them to reliably detect and signal an error when -passed a dotted or circular list. Here are the general rules describing how -these procedures work when applied to different kinds of lists: - - Proper lists: The standard, canonical behavior happens in this case. - - Dotted lists: It is an error to pass these procedures a dotted list - that does not contain an element satisfying the search - criteria. That is, it is an error if the procedure has - to search all the way to the end of the dotted list. - However, this SRFI does *not* specify anything at all - about the behavior of these procedures when passed a - dotted list containing an element satisfying the search - criteria. It may finish successfully, signal an error, - or perform some third action. Different implementations - may provide different functionality in this case; code - which is compliant with this SRFI may not rely on any - particular behavior. Future SRFI's may refine SRFI-1 - to define specific behavior in this case. - - In brief, SRFI-1 compliant code may not pass a dotted - list argument to these procedures. - - Circular lists: It is an error to pass these procedures a circular list - that does not contain an element satisfying the search - criteria. Note that the procedure is not required to - detect this case; it may simply diverge. It is, however, - acceptable to search a circular list *if the search is - successful* -- that is, if the list contains an element - satisfying the search criteria. - -Here are some examples, using the FIND and ANY procedures as canonical -representatives: - ;; Proper list -- success - (find even? '(1 2 3)) => 2 - (any even? '(1 2 3)) => #t - - ;; proper list -- failure - (find even? '(1 7 3)) => #f - (any even? '(1 7 3)) => #f - - ;; Failure is error on a dotted list. - (find even? '(1 3 . x)) => error - (any even? '(1 3 . x)) => error - - ;; The dotted list contains an element satisfying the search. - ;; This case is not specified -- it could be success, an error, - ;; or some third possibility. - (find even? '(1 2 . x)) => error/undefined - (any even? '(1 2 . x)) => error/undefined ; success, error or other. - - ;; circular list -- success - (find even? (circular-list 1 6 3)) => 6 - (any even? (circular-list 1 6 3)) => #t - - ;; circular list -- failure is error. Procedure may diverge. - (find even? (circular-list 1 3)) => error - (any even? (circular-list 1 3)) => error - - -find pred clist -> value - Return the first element of CLIST that satisfies predicate PRED; - false if no element does. - - (find even? '(3 1 4 1 5 9)) => 4 - - Note that FIND has an ambiguity in its lookup semantics -- if FIND - returns #F, you cannot tell (in general) if it found a #F element - that satisfied PRED, or if it did not find any element at all. In - many situations, this ambiguity cannot arise -- either the list being - searched is known not to contain any #F elements, or the list is - guaranteed to have an element satisfying PRED. However, in cases - where this ambiguity can arise, you should use FIND-TAIL instead of - FIND -- FIND-TAIL has no such ambiguity: - (cond ((find-tail pred lis) => (lambda (pair) ...)) ; Handle (CAR PAIR) - (else ...)) ; Search failed. - -find-tail pred clist -> pair or false - Return the first pair of CLIST whose car satisfies PRED. If no pair does, - return false. - - FIND-TAIL can be viewed as a general-predicate variant of the MEMBER - function. - - Examples: - (find-tail even? '(3 1 37 -8 -5 0 0)) => (-8 -5 0 0) - - (find-tail even? '(3 1 37 -5)) => #f - - ;; MEMBER X LIS: - (find-tail (lambda (elt) (equal? x elt)) lis) - - In the circular-list case, this procedure "rotates" the list. - - FIND-TAIL is essentially DROP-WHILE, where the sense of the predicate - is inverted: FIND-TAIL searches until it finds an element satisfying - the predicate; DROP-WHILE searches until it finds an element that - *doesn't* satisfy the predicate. - -take-while pred clist -> list -take-while! pred clist -> list - Returns the longest initial prefix of CLIST whose elements all - satisfy the predicate PRED. - - TAKE-WHILE! is the linear-update variant. It is allowed, but not - required, to alter the argument list to produce the result. - - (take-while even? '(2 18 3 10 22 9)) => (2 18) - -drop-while pred clist -> list - Drops the longest initial prefix of LIST whose elements all - satisfy the predicate PRED, and returns the rest of the list. - - (drop-while even? '(2 18 3 10 22 9)) => (3 10 22 9) - - The circular-list case may be viewed as "rotating" the list. - -span pred clist -> [list clist] -span! pred list -> [list list] -break pred clist -> [list clist] -break! pred list -> [list list] - SPAN splits the list into the longest initial prefix whose elements - all satisfy PRED, and the remaining tail. BREAK inverts the sense - of the predicate: the tail commences with the first element of the - input list that satisfies the predicate. - - In other words: SPAN finds the intial span of elements satisfying - PRED, and BREAK breaks the list at the first element satisfying PRED. - - SPAN is equivalent to (VALUES (TAKE-WHILE PRED CLIST) - (DROP-WHILE PRED CLIST)). - - SPAN! and BREAK! are the linear-update variants. They are allowed, but not - required, to alter the argument list to produce the result. - - (span even? '(2 18 3 10 22 9)) => - (2 18) - (3 10 22 9) - - (break even? '(3 1 4 1 5 9)) => - (3 1) - (4 1 5 9) - -any pred clist1 clist2 ... -> value - Applies the predicate across the lists, returning true if the predicate - returns true on any application. - - If there are N list arguments CLIST1 ... CLISTn, then PRED must be a - procedure taking N arguments and returning a boolean result. - - ANY applies PRED to the first elements of the CLISTi parameters. If this - application returns a true value, ANY immediately returns that value. - Otherwise, it iterates, applying PRED to the second elements of the CLISTi - parameters, then the third, and so forth. The iteration stops when a true - value is produced or one of the lists runs out of values; in the latter - case, ANY returns #F. The application of PRED to the last element of the - lists is a tail call. - - Note the difference between FIND and ANY -- FIND returns the element - that satisfied the predicate; ANY returns the true value that the - predicate produced. - - Like EVERY, ANY's name does not end with a question mark -- this is to - indicate that it does not return a simple boolean (#T or #F), but a - general value. - - (any integer? '(a 3 b 2.7)) => #T - (any integer? '(a 3.1 b 2.7)) => #F - (any < '(3 1 4 1 5) - '(2 7 1 8 2)) => #T - -every pred clist1 clist2 ... -> value - Applies the predicate across the lists, returning true if the predicate - returns true on every application. - - If there are N list arguments CLIST1 ... CLISTn, then PRED must be a - procedure taking N arguments and returning a boolean result. - - EVERY applies PRED to the first elements of the CLISTi parameters. If - this application returns false, EVERY immediately returns false. - Otherwise, it iterates, applying PRED to the second elements of the CLISTi - parameters, then the third, and so forth. The iteration stops when a false - value is produced or one of the lists run out of values. In the latter - case, EVERY returns the true value produced by its final application of - PRED. The application of PRED to the last element of the lists is a tail - call. - - If one of the CLISTi has no elements, EVERY simply returns #T. - - Like ANY, EVERY's name does not end with a question mark -- this is to - indicate that it does not return a simple boolean (#T or #F), but a - general value. - -list-index pred clist1 clist2 ... -> integer or false - Return the index of the leftmost element that satisfies PRED. - - If there are N list arguments CLIST1 ... CLISTn, then PRED must be a - function taking N arguments and returning a boolean result. - - LIST-INDEX applies PRED to the first elements of the CLISTi parameters. - If this application returns true, LIST-INDEX immediately returns zero. - Otherwise, it iterates, applying PRED to the second elements of the - CLISTi parameters, then the third, and so forth. When it finds a tuple of - list elements that cause PRED to return true, it stops and returns the - zero-based index of that position in the lists. - - The iteration stops when one of the lists runs out of values; in this - case, LIST-INDEX returns #F. - - (list-index even? '(3 1 4 1 5 9)) => 2 - (list-index < '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => 1 - (list-index = '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => #f - -member x list [=] -> list or #f R5RS+ -memq x list -> list or #f R5RS -memv x list -> list or #f R5RS - These procedures return the first sublist of LIST whose car is - X, where the sublists of LIST are the non-empty lists returned - by (DROP LIST I) for I less than the length of LIST. If X does - not occur in LIST, then #f is returned. MEMQ uses EQ? to compare X - with the elements of LIST, while MEMV uses EQV? and MEMBER uses EQUAL?. - - (memq 'a '(a b c)) ==> (a b c) - (memq 'b '(a b c)) ==> (b c) - (memq 'a '(b c d)) ==> #f - (memq (list 'a) '(b (a) c)) ==> #f - (member (list 'a) - '(b (a) c)) ==> ((a) c) - (memq 101 '(100 101 102)) ==> *unspecified* - (memv 101 '(100 101 102)) ==> (101 102) - - MEMBER is extended from its R5RS definition to allow the client to pass - in an optional equality procedure = used to compare keys. - - The comparison procedure is used to compare the elements Ei of LIST - to the key X in this way: - (= X Ei) ; list is (E1 ... En) - That is, the first argument is always X, and the second argument is - one of the list elements. Thus one can reliably find the first element - of LIST that is greater than five with - (member 5 LIST <) - - Note that fully general list searching may be performed with - the FIND-TAIL and FIND procedures, e.g. - (find-tail even? list) ; Find the first elt with an even key. - - -** Deletion -=========== - -delete x list [=] -> list -delete! x list [=] -> list - DELETE uses the comparison procedure =, which defaults to EQUAL?, to find - all elements of LIST that are equal to X, and deletes them from LIST. The - dynamic order in which the various applications of = are made is not - specified. - - The list is not disordered -- elements that appear in the result list - occur in the same order as they occur in the argument list. - The result may share a common tail with the argument list. - - Note that fully general element deletion can be performed with the REMOVE - and REMOVE! procedures, e.g.: - ;; Delete all the even elements from LIS: - (remove even? lis) - - The comparison procedure is used in this way: - (= X Ei) - That is, X is always the first argument, and a list element is always the - second argument. The comparison procedure will be used to compare each - element of LIST exactly once; the order in which it is applied to the - various Ei is not specified. Thus, one can reliably remove all the - numbers greater than five from a list with - (delete 5 list <) - - DELETE! is the linear-update variant of DELETE. It is allowed, but not - required, to alter the cons cells in its argument list to construct the - result. - -delete-duplicates list [=] -> list -delete-duplicates! list [=] -> list - DELETE-DUPLICATES removes duplicate elements from the list argument. - If there are multiple equal elements in the argument list, the result list - only contains the first or leftmost of these elements in the result. - The order of these surviving elements is the same as in the original - list -- DELETE-DUPLICATES does not disorder the list (hence it is useful - for "cleaning up" association lists). - - The = parameter is used to compare the elements of the list; it defaults - to EQUAL?. If X comes before Y in LIST, then the comparison is performed - (= X Y) - The comparison procedure will be used to compare each pair of - elements in LIST no more than once; the order in which it is - applied to the various pairs is not specified. - - Implementations of DELETE-DUPLICATE are allowed to share common tails - between argument and result lists -- for example, if the list argument - contains only unique elements, it may simply return exactly this list. - - Be aware that, in general, DELETE-DUPLICATES runs in time O(n^2) - for N-element lists. Uniquifying long lists can be accomplished - in O(n lg n) time by sorting the list to bring equal elements - together, then using a linear-time algorithm to remove equal - elements. Alternatively, one can use algorithms based on - element-marking, with linear-time results. - - DELETE-DUPLICATES! is the linear-update variant of DELETE-DUPLICATES; it - is allowed, but not required, to alter the cons cells in its argument - list to construct the result. - - (delete-duplicates '(a b a c a b c z)) => (a b c z) - - ;; Clean up an alist: - (delete-duplicates '((a . 3) (b . 7) (a . 9) (c . 1)) - (lambda (x y) (eq? (car x) (car y)))) - => ((a . 3) (b . 7) (c . 1)) - - -** Association lists -==================== - -An "association list" (or "alist") is a list of pairs. The car of each pair -contains a key value, and the cdr contains the associated data value. They can -be used to construct simple look-up tables in Scheme. Note that association -lists are probably inappropriate for performance-critical use on large data; -in these cases, hash tables or some other alternative should be employed. - -assoc key alist [=] -> pair or #f R5RS+ -assq key alist -> pair or #f R5RS -assv key alist -> pair or #f R5RS - - ALIST must be an association list -- a list of pairs. These procedures - find the first pair in ALIST whose car field is KEY, and returns that - pair. If no pair in ALIST has KEY as its car, then #f is returned. ASSQ - uses EQ? to compare KEY with the car fields of the pairs in ALIST, while - ASSV uses EQV? and ASSOC uses EQUAL?. - - (define e '((a 1) (b 2) (c 3))) - (assq 'a e) ==> (a 1) - (assq 'b e) ==> (b 2) - (assq 'd e) ==> #f - (assq (list 'a) '(((a)) ((b)) ((c)))) ==> #f - (assoc (list 'a) '(((a)) ((b)) ((c)))) ==> ((a)) - (assq 5 '((2 3) (5 7) (11 13))) ==> *unspecified* - (assv 5 '((2 3) (5 7) (11 13))) ==> (5 7) - - ASSOC is extended from its R5RS definition to allow the client to pass in - an optional equality procedure = used to compare keys. - - The comparison procedure is used to compare the elements Ei of LIST - to the KEY parameter in this way: - (= KEY (CAR Ei)) ; list is (E1 ... En) - That is, the first argument is always KEY, and the second argument is - one of the list elements. Thus one can reliably find the first entry - of ALIST whose key is greater than five with - (assoc 5 ALIST <) - - Note that fully general alist searching may be performed with - the FIND-TAIL and FIND procedures, e.g. - ;; Look up the first association in ALIST with an even key: - (find (lambda (a) (even? (car a))) alist) - -alist-cons key datum alist -> alist - (lambda (key datum alist) (cons (cons key datum) alist)) - Cons a new alist entry mapping KEY -> DATUM onto ALIST. - -alist-copy alist -> alist - Make a fresh copy of ALIST. This means copying each pair that - forms an association as well as the spine of the list, i.e. - (lambda (a) (map (lambda (elt) (cons (car elt) (cdr elt))) a)) - -alist-delete key alist [=] -> alist -alist-delete! key alist [=] -> alist - ALIST-DELETE deletes all associations from ALIST with the given - KEY, using key-comparison procedure =, which defaults to EQUAL?. - The dynamic order in which the various applications of = are made - is not specified. - - Return values may share common tails with the ALIST argument. - The alist is not disordered -- elements that appear in the result alist - occur in the same order as they occur in the argument alist. - - The comparison procedure is used to compare the element keys Ki of ALIST's - entries to the KEY parameter in this way: - (= KEY Ki) - Thus, one can reliably remove all entries of ALIST whose key is greater - than five with - (alist-delete 5 alist <) - - ALIST-DELETE! is the linear-update variant of ALIST-DELETE. It - is allowed, but not required, to alter the cons cells from the ALIST - parameter to construct the result. - - -** Set operations on lists -========================== -These procedures implement operations on sets represented as lists of -elements. They all take an = argument used to compare elements of -lists. This equality procedure is required to be consistent with -EQ?. That is, it must be the case that - (eq? x y) => (= x y). -Note that this implies, in turn, that two lists that are EQ? are -also set-equal by any legal comparison procedure. This allows for -constant-time determination of set operations on EQ? lists. - -Be aware that these procedures typically run in time O(n * m) for N- -and M-element list arguments. Performance-critical applications -operating upon large sets will probably wish to use other data -structures and algorithms. - -lset<= = list1 ... -> boolean - Returns true iff every LISTi is a subset of LISTi+1, using = for the - element-equality procedure. List A is a subset of list B if every - element in A is equal to some element of B. When performing an - element comparison, the = procedure's first argument is an element - of A; its second, an element of B. - - (lset<= eq? '(a) '(a b a) '(a b c c)) => #t - - (lset<= eq?) => #t ; Trivial cases - (lset<= eq? '(a)) => #t - - -lset= = list1 ... -> boolean - Returns true iff every LISTi is set-equal to LISTi+1, using = for - the element-equality procedure. "Set-equal" simply means that - LISTi is a subset of LISTi+1, and LISTi+1 is a subset of LISTi. - - (lset= eq? '(b e a) '(a e b) '(e e b a)) => #t - - (lset= eq?) => #t ; Trivial cases - (lset= eq? '(a)) => #t - -lset-adjoin = list elt1 ... -> list - Adds the ELTi elements not already in the list parameter to the - result list. The result shares a common tail with the list parameter. - The new elements are added to the front of the list, but no guarantees - are made about their order. The = parameter is an equality procedure - used to determine if an ELTi is already a member of LIST. Its first - argument is an element of LIST; its second is one of the ELTi. - - The list parameter is always a suffix of the result -- even if the list - parameter contains repeated elements, these are not reduced. - - (lset-adjoin eq? '(a b c d c e) 'a 'e 'i 'o 'u) => (u o i a b c d c e) - -lset-union = list1 ... -> list - Returns the union of the lists, using = for the element-equality - procedure. - - The union of lists A and B is constructed as follows: - - If A is the empty list, the answer is B (or a copy of B). - - Otherwise, the result is initialised to be list A (or a copy of A). - - Proceed through the elements of list B in a left-to-right order. - If b is such an element of B, compare every element r of the current - result list to b: (= r b). If all comparisons fail, b is consed - onto the front of the result. - However, there is no guarantee that = will be applied to every pair - of arguments from A and B. In particular, if A is EQ? to B, the operation - may immediately terminate. - - In the n-ary case, the two-argument list-union operation is simply - folded across the argument lists. - - (lset-union eq? '(a b c d e) '(a e i o u)) => (u o i a b c d e) - - ;; Repeated elements in LIST1 are preserved. - (lset-union eq? '(a a c) '(x a x)) => (x a a c) - - (lset-union eq?) => () ; Trivial cases - (lset-union eq? '(a b c)) => (a b c) - -lset-intersection = list1 list2 ... -> list - Returns the intersection of the lists, using = for the element-equality - procedure. - - The intersection of lists A and B is comprised of every element of A - that is = to some element of B: (= a b), for a in A, and b in B. - Note this implies that an element which appears in B and multiple times - in list A will also appear multiple times in the result. - - The order in which elements appear in the result is the same as - they appear in LIST1 -- that is, LSET-INTERSECTION essentially - filters LIST1, without disarranging element order. The result may - share a common tail with LIST1. - - In the n-ary case, the two-argument list-intersection operation is simply - folded across the argument lists. However, the dynamic order in which the - applications of = are made is not specified. The procedure may check an - element of LIST1 for membership in every other list before proceeding to - consider the next element of LIST1, or it may completely intersect LIST1 - and LIST2 before proceeding to LIST3, or it may go about its work in some - third order. - - (lset-intersection eq? '(a b c d e) '(a e i o u)) => (a e) - - ;; Repeated elements in LIST1 are preserved. - (lset-intersection eq? '(a x y a) '(x a x z)) => '(a x a) - - (lset-intersection eq? '(a b c)) => (a b c) ; Trivial case - -lset-difference = list1 list2 ... -> list - Returns the difference of the lists, using = for the element-equality - procedure -- all the elements of LIST1 that are not = to any element from - one of the other LISTi parameters. - - The = procedure's first argument is always an element of LIST1; its second - is an element of one of the other LISTi. Elements that are repeated - multiple times in the LIST1 parameter will occur multiple times in the - result. - - The order in which elements appear in the result is the same as - they appear in LIST1 -- that is, LSET-DIFFERENCE essentially - filters LIST1, without disarranging element order. The result may - share a common tail with LIST1. - - The dynamic order in which the applications of = are made is not - specified. The procedure may check an element of LIST1 for membership in - every other list before proceeding to consider the next element of LIST1, - or it may completely compute the difference of LIST1 and LIST2 before - proceeding to LIST3, or it may go about its work in some third order. - - (lset-difference eq? '(a b c d e) '(a e i o u)) => (b c d) - - (lset-difference eq? '(a b c)) => (a b c) ; Trivial case - -lset-xor = list1 ... -> list - Returns the exclusive-or of the sets, using = for the element-equality - procedure. If there are exactly two lists, this is all the elements - that appear in exactly one of the two lists. The operation is associative, - and thus extends to the n-ary case -- the elements that appear in an - odd number of the lists. The result may share a common tail with any of - the LISTi parameters. - - More precisely, for two lists A and B, A xor B is a list of - - every element a of A such that there is no element b of B - such that (= a b) - - every element b of B such that there is no element a of A - such that (= b a) - However, an implementation is allowed to assume that = is - symmetric -- that is, that - (= a b) => (= b a). - This means, for example, that if a comparison (= a b) produces - true for some a in A and b in B, both a and b may be removed from - inclusion in the result. - - In the n-ary case, the binary-xor operation is simply folded across - the lists. - - (lset-xor eq? '(a b c d e) '(a e i o u)) => (d c b i o u) - - ;; Trivial cases. - (lset-xor eq?) => () - (lset-xor eq? '(a b c d e)) => (a b c d e) - -lset-diff+intersection = list1 list2 ... -> [list list] - Returns two values -- the difference and the intersection of the lists. - Is equivalent to - (values (lset-difference = list1 list2 ...) - (lset-intersection = list1 - (lset-union = list2 ...))) - but can be implemented more efficiently. - - The = procedure's first argument is an element of LIST1; its second is - an element of one of the other LISTi. - - Either of the answer lists may share a common tail with LIST1. - This operation essentially partitions LIST1. - -lset-union! = list1 ... -> list -lset-intersection! = list1 list2 ... -> list -lset-difference! = list1 list2 ... -> list -lset-xor! = list1 ... -> list -lset-diff+intersection! = list1 list2 ... -> [list list] - These are linear-update variants. They are allowed, but not required, - to use the cons cells in their first list parameter to construct their - answer. LSET-UNION! is permitted to recycle cons cells from *any* of its - list arguments. - - -** Primitive side-effects -========================= - -These two procedures are the primitive, R5RS side-effect operations on pairs. - -set-car! pair object -> unspecified R5RS -set-cdr! pair object -> unspecified R5RS - These procedures store OBJECT in the car and cdr field of PAIR, - respectively. The value returned is unspecified. - - (define (f) (list 'not-a-constant-list)) - (define (g) '(constant-list)) - (set-car! (f) 3) ==> *unspecified* - (set-car! (g) 3) ==> *error* - - - -* Acknowledgements ------------------- - -The design of this library benefited greatly from the feedback provided during -the SRFI discussion phase. Among those contributing thoughtful commentary and -suggestions, both on the mailing list and by private discussion, were Mike -Ashley, Darius Bacon, Alan Bawden, Phil Bewig, Jim Blandy, Dan Bornstein, Per -Bothner, Anthony Carrico, Doug Currie, Kent Dybvig, Sergei Egorov, Doug Evans, -Marc Feeley, Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Dan Friedman, -Lars Thomas Hansen, Brian Harvey, Erik Hilsdale, Wolfgang Hukriede, Richard -Kelsey, Donovan Kolbly, Shriram Krishnamurthi, Dave Mason, Jussi Piitulainen, -David Pokorny, Duncan Smith, Mike Sperber, Maciej Stachowiak, Harvey J. Stein, -John David Stone, and Joerg F. Wittenberger. I am grateful to them for their -assistance. - -I am also grateful the authors, implementors and documentors of all the systems -mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted -for their work in producing Web-accessible versions of the R5RS and Common -Lisp spec, which was a tremendous aid. - -This is not to imply that these individuals necessarily endorse the final -results, of course. - - - -* References & Links --------------------- - -This document, in HTML: - http://srfi.schemers.org/srfi-1/srfi-1.html - ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html (draft) - -This document, in simple text format: - http://srfi.schemers.org/srfi-1/srfi-1.txt - ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt (draft) - -Source code for the reference implementation: - http://srfi.schemers.org/srfi-1/srfi-1-reference.scm - ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm (draft) - -Archive of SRFI-1 discussion-list email: - http://srfi.schemers.org/srfi-1/mail-archive/maillist.html - -SRFI web site: - http://srfi.schemers.org/ - - -[CommonLisp] - Common Lisp: the Language - Guy L. Steele Jr. (editor). - Digital Press, Maynard, Mass., second edition 1990. - Available at http://www.elwood.com/alu/table/references.htm#cltl2 - - The Common Lisp "HyperSpec," produced by Kent Pitman, is essentially - the ANSI spec for Common Lisp: - http://www.harlequin.com/education/books/HyperSpec/ - -[R5RS] - Revised^5 Report on the Algorithmic Language Scheme, - R. Kelsey, W. Clinger, J. Rees (editors). - Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998. - and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998. - - Available at http://www.schemers.org/Documents/Standards/ - - -* Copyright ------------ - -Certain portions of this document -- the specific, marked segments of text -describing the R5RS procedures -- were adapted with permission from the R5RS -report. - -All other text is copyright (C) Olin Shivers (1998, 1999). -All Rights Reserved. - -This document and translations of it may be copied and furnished to others, -and derivative works that comment on or otherwise explain it or assist in its -implementation may be prepared, copied, published and distributed, in whole or -in part, without restriction of any kind, provided that the above copyright -notice and this paragraph are included on all such copies and derivative -works. However, this document itself may not be modified in any way, such as -by removing the copyright notice or references to the Scheme Request For -Implementation process or editors, except as needed for the purpose of -developing SRFIs in which case the procedures for copyrights defined in the -SRFI process must be followed, or as required to translate it into languages -other than English. - -The limited permissions granted above are perpetual and will not be revoked by -the authors or their successors or assigns. - -This document and the information contained herein is provided on an "AS IS" -basis and THE AUTHORS AND THE SRFI EDITORS DISCLAIM ALL WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE -INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF -MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - -* Ispell "buffer local" dictionary ----------------------------------- - -Ispell dumps "buffer local" words here. Please ignore. - - LocalWords: RS SRFI Chez RScheme MzScheme slib Bigloo APL SML API CDR GC's Ei - LocalWords: EQ consing lib xcons unzip del delq delv mem lset lset xor diff lp - LocalWords: alist assq assv assoc cdr cdddar cddddr ref memq memv george iff - LocalWords: proc lis accessor ary TAIL's NCONS EQV rcons Contrariwise clist - LocalWords: paribus lexeme parallelise Destructuring init FP flist eof CLISTn - LocalWords: generalisation elt cadr caddr rev kons knil len rzero LZERO Ki Ith - LocalWords: arg LISTi pred cond LISTn ANY's EVERY's Uniquifying lg ridentity - LocalWords: eq netnews generalise Maciej Stachowiak al Bewig LocalWords ELTi - LocalWords: anamorphism apomorphism CLISTi ALIST's url ceteris eltn caar KNULL - LocalWords: deconstructor RIGHT's KAR KDR kar kdr knull HTML CLtL Clinger gen - LocalWords: Rees Bawden Blandy Bornstein Bothner Carrico Currie Dybvig expt - LocalWords: Egorov Feeley Matthias Felleisen Flatt Hilsdale Hukriede CLISTs - LocalWords: Kolbly Shriram Krishnamurthi Jussi Piitulainen Pokorny Joerg Todo - LocalWords: Sperber Wittenberger documentors Jaffer initialised consed IE - LocalWords: disarranging SIGPLAN CommonLisp cltl HyperSpec diff --git a/scsh/lib/string-lib.scm b/scsh/lib/string-lib.scm deleted file mode 100644 index beedda1..0000000 --- a/scsh/lib/string-lib.scm +++ /dev/null @@ -1,2023 +0,0 @@ -;;; SRFI 13 string library reference implementation -*- Scheme -*- -;;; Olin Shivers 7/2000 -;;; -;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. -;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved. -;;; The details of the copyrights appear at the end of the file. Short -;;; summary: BSD-style open source. - -;;; Exports: -;;; string-map string-map! -;;; string-fold string-unfold -;;; string-fold-right string-unfold-right -;;; string-tabulate string-for-each string-for-each-index -;;; string-every string-any -;;; string-hash string-hash-ci -;;; string-compare string-compare-ci -;;; string= string< string> string<= string>= string<> -;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> -;;; string-downcase string-upcase string-titlecase -;;; string-downcase! string-upcase! string-titlecase! -;;; string-take string-take-right -;;; string-drop string-drop-right -;;; string-pad string-pad-right -;;; string-trim string-trim-right string-trim-both -;;; string-filter string-delete -;;; string-index string-index-right -;;; string-skip string-skip-right -;;; string-count -;;; string-prefix-length string-prefix-length-ci -;;; string-suffix-length string-suffix-length-ci -;;; string-prefix? string-prefix-ci? -;;; string-suffix? string-suffix-ci? -;;; string-contains string-contains-ci -;;; string-copy! substring/shared -;;; string-reverse string-reverse! reverse-list->string -;;; string-concatenate string-concatenate/shared string-concatenate-reverse -;;; string-append/shared -;;; xsubstring string-xcopy! -;;; string-null? -;;; string-join -;;; string-tokenize -;;; string-replace -;;; -;;; R5RS extended: -;;; string->list string-copy string-fill! -;;; -;;; R5RS re-exports: -;;; string? make-string string-length string-ref string-set! -;;; -;;; R5RS re-exports (also defined here but commented-out): -;;; string string-append list->string -;;; -;;; Low-level routines: -;;; make-kmp-restart-vector string-kmp-partial-search kmp-step -;;; string-parse-start+end -;;; string-parse-final-start+end -;;; let-string-start+end -;;; check-substring-spec -;;; substring-spec-ok? - -;;; Imports -;;; This is a fairly large library. While it was written for portability, you -;;; must be aware of its dependencies in order to run it in a given scheme -;;; implementation. Here is a complete list of the dependencies it has and the -;;; assumptions it makes beyond stock R5RS Scheme: -;;; -;;; This code has the following non-R5RS dependencies: -;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro; -;;; -;;; - Various imports from the char-set library for the routines that can -;;; take char-set arguments; -;;; -;;; - An n-ary ERROR procedure; -;;; -;;; - BITWISE-AND for the hash functions; -;;; -;;; - A simple CHECK-ARG procedure for checking parameter values; it is -;;; (lambda (pred val proc) -;;; (if (pred val) val (error "Bad arg" val pred proc))) -;;; -;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting & -;;; type-checking optional parameters from a rest argument; -;;; -;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE & -;;; STRING-TITLECASE! procedures. The former returns true iff a character is -;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z. -;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII & -;;; Latin-1, it is the same as CHAR-UPCASE. -;;; -;;; The code depends upon a small set of core string primitives from R5RS: -;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING -;;; (Actually, SUBSTRING is not a primitive, but we assume that an -;;; implementation's native version is probably faster than one we could -;;; define, so we import it from R5RS.) -;;; -;;; The code depends upon a small set of R5RS character primitives: -;;; char? char=? char-ci=? charinteger (for the hash functions) -;;; -;;; We assume the following: -;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE -;;; - CHAR-CI=? is equivalent to -;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1)) -;;; (char-downcase (char-upcase c2)))) -;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive -;;; and consistent with Unicode's 1-1 char-mapping spec. -;;; These things are typically true, but if not, you would need to modify -;;; the case-mapping and case-insensitive routines. - -;;; Enough introductory blather. On to the source code. (But see the end of -;;; the file for further notes on porting & performance tuning.) - - -;;; Support for START/END substring specs -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; This macro parses optional start/end arguments from arg lists, defaulting -;;; them to 0/(string-length s), and checks them for correctness. - -(define-syntax let-string-start+end - (syntax-rules () - ((let-string-start+end (start end) proc s-exp args-exp body ...) - (receive (start end) (string-parse-final-start+end proc s-exp args-exp) - body ...)) - ((let-string-start+end (start end rest) proc s-exp args-exp body ...) - (receive (rest start end) (string-parse-start+end proc s-exp args-exp) - body ...)))) - -;;; This one parses out a *pair* of final start/end indices. -;;; Not exported; for internal use. -(define-syntax let-string-start+end2 - (syntax-rules () - ((l-s-s+e2 (start1 end1 start2 end2) proc s1 s2 args body ...) - (let ((procv proc)) ; Make sure PROC is only evaluated once. - (let-string-start+end (start1 end1 rest) procv s1 args - (let-string-start+end (start2 end2) procv s2 rest - body ...)))))) - - -;;; Returns three values: rest start end - -(define (string-parse-start+end proc s args) - (if (not (string? s)) (error "Non-string value" proc s)) - (let ((slen (string-length s))) - (if (pair? args) - - (let ((start (car args)) - (args (cdr args))) - (if (and (integer? start) (exact? start) (>= start 0)) - (receive (end args) - (if (pair? args) - (let ((end (car args)) - (args (cdr args))) - (if (and (integer? end) (exact? end) (<= end slen)) - (values end args) - (error "Illegal substring END spec" proc end s))) - (values slen args)) - (if (<= start end) (values args start end) - (error "Illegal substring START/END spec" - proc start end s))) - (error "Illegal substring START spec" proc start s))) - - (values '() 0 slen)))) - -(define (string-parse-final-start+end proc s args) - (receive (rest start end) (string-parse-start+end proc s args) - (if (pair? rest) (error "Extra arguments to procedure" proc rest) - (values start end)))) - -(define (substring-spec-ok? s start end) - (and (string? s) - (integer? start) - (exact? start) - (integer? end) - (exact? end) - (<= 0 start) - (<= start end) - (<= end (string-length s)))) - -(define (check-substring-spec proc s start end) - (if (not (substring-spec-ok? s start end)) - (error "Illegal substring spec." proc s start end))) - - -;;; Defined by R5RS, so commented out here. -;(define (string . chars) -; (let* ((len (length chars)) -; (ans (make-string len))) -; (do ((i 0 (+ i 1)) -; (chars chars (cdr chars))) -; ((>= i len)) -; (string-set! ans i (car chars))) -; ans)) -; -;(define (string . chars) (string-unfold null? car cdr chars)) - - - -;;; substring/shared S START [END] -;;; string-copy S [START END] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; All this goop is just arg parsing & checking surrounding a call to the -;;; actual primitive, %SUBSTRING/SHARED. - -(define (substring/shared s start . maybe-end) - (check-arg string? s substring/shared) - (let ((slen (string-length s))) - (check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start))) - start substring/shared) - (%substring/shared s start - (:optional maybe-end slen - (lambda (end) (and (integer? end) - (exact? end) - (<= start end) - (<= end slen))))))) - -;;; Split out so that other routines in this library can avoid arg-parsing -;;; overhead for END parameter. -(define (%substring/shared s start end) - (if (and (zero? start) (= end (string-length s))) s - (substring s start end))) - -(define (string-copy s . maybe-start+end) - (let-string-start+end (start end) string-copy s maybe-start+end - (substring s start end))) - -;This library uses the R5RS SUBSTRING, but doesn't export it. -;Here is a definition, just for completeness. -;(define (substring s start end) -; (check-substring-spec substring s start end) -; (let* ((slen (- end start)) -; (ans (make-string slen))) -; (do ((i 0 (+ i 1)) -; (j start (+ j 1))) -; ((>= i slen) ans) -; (string-set! ans i (string-ref s j))))) - -;;; Basic iterators and other higher-order abstractions -;;; (string-map proc s [start end]) -;;; (string-map! proc s [start end]) -;;; (string-fold kons knil s [start end]) -;;; (string-fold-right kons knil s [start end]) -;;; (string-unfold p f g seed [base make-final]) -;;; (string-unfold-right p f g seed [base make-final]) -;;; (string-for-each proc s [start end]) -;;; (string-for-each-index proc s [start end]) -;;; (string-every char-set/char/pred s [start end]) -;;; (string-any char-set/char/pred s [start end]) -;;; (string-tabulate proc len) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; You want compiler support for high-level transforms on fold and unfold ops. -;;; You'd at least like a lot of inlining for clients of these procedures. -;;; Don't hold your breath. - -(define (string-map proc s . maybe-start+end) - (check-arg procedure? proc string-map) - (let-string-start+end (start end) string-map s maybe-start+end - (%string-map proc s start end))) - -(define (%string-map proc s start end) ; Internal utility - (let* ((len (- end start)) - (ans (make-string len))) - (do ((i (- end 1) (- i 1)) - (j (- len 1) (- j 1))) - ((< j 0)) - (string-set! ans j (proc (string-ref s i)))) - ans)) - -(define (string-map! proc s . maybe-start+end) - (check-arg procedure? proc string-map!) - (let-string-start+end (start end) string-map! s maybe-start+end - (%string-map! proc s start end))) - -(define (%string-map! proc s start end) - (do ((i (- end 1) (- i 1))) - ((< i start)) - (string-set! s i (proc (string-ref s i))))) - -(define (string-fold kons knil s . maybe-start+end) - (check-arg procedure? kons string-fold) - (let-string-start+end (start end) string-fold s maybe-start+end - (let lp ((v knil) (i start)) - (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) - v)))) - -(define (string-fold-right kons knil s . maybe-start+end) - (check-arg procedure? kons string-fold-right) - (let-string-start+end (start end) string-fold-right s maybe-start+end - (let lp ((v knil) (i (- end 1))) - (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) - v)))) - -;;; (string-unfold p f g seed [base make-final]) -;;; This is the fundamental constructor for strings. -;;; - G is used to generate a series of "seed" values from the initial seed: -;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... -;;; - P tells us when to stop -- when it returns true when applied to one -;;; of these seed values. -;;; - F maps each seed value to the corresponding character -;;; in the result string. These chars are assembled into the -;;; string in a left-to-right order. -;;; - BASE is the optional initial/leftmost portion of the constructed string; -;;; it defaults to the empty string "". -;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns -;;; true) to produce the final/rightmost portion of the constructed string. -;;; It defaults to (LAMBDA (X) ""). -;;; -;;; In other words, the following (simple, inefficient) definition holds: -;;; (define (string-unfold p f g seed base make-final) -;;; (string-append base -;;; (let recur ((seed seed)) -;;; (if (p seed) (make-final seed) -;;; (string-append (string (f seed)) -;;; (recur (g seed))))))) -;;; -;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to -;;; reverse a string, copy a string, convert a list to a string, read -;;; a port into a string, and so forth. Examples: -;;; (port->string port) = -;;; (string-unfold (compose eof-object? peek-char) -;;; read-char values port) -;;; -;;; (list->string lis) = (string-unfold null? car cdr lis) -;;; -;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) - -;;; A problem with the following simple formulation is that it pushes one -;;; stack frame for every char in the result string -- an issue if you are -;;; using it to read a 100kchar string. So we don't use it -- but I include -;;; it to give a clear, straightforward description of what the function -;;; does. - -;(define (string-unfold p f g seed base make-final) -; (let ((ans (let recur ((seed seed) (i (string-length base))) -; (if (p seed) -; (let* ((final (make-final seed)) -; (ans (make-string (+ i (string-length final))))) -; (string-copy! ans i final) -; ans) -; -; (let* ((c (f seed)) -; (s (recur (g seed) (+ i 1)))) -; (string-set! s i c) -; s))))) -; (string-copy! ans 0 base) -; ans)) - -;;; The strategy is to allocate a series of chunks into which we stash the -;;; chars as we generate them. Chunk size goes up in powers of two starting -;;; with 40 and levelling out at 4k, i.e. -;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096... -;;; This should work pretty well for short strings, 1-line (80 char) strings, -;;; and longer ones. When done, we allocate an answer string and copy the -;;; chars over from the chunk buffers. - -(define (string-unfold p f g seed . base+make-final) - (check-arg procedure? p string-unfold) - (check-arg procedure? f string-unfold) - (check-arg procedure? g string-unfold) - (let-optionals* base+make-final - ((base "" (string? base)) - (make-final (lambda (x) "") (procedure? make-final))) - (let lp ((chunks '()) ; Previously filled chunks - (nchars 0) ; Number of chars in CHUNKS - (chunk (make-string 40)) ; Current chunk into which we write - (chunk-len 40) - (i 0) ; Number of chars written into CHUNK - (seed seed)) - (let lp2 ((i i) (seed seed)) - (if (not (p seed)) - (let ((c (f seed)) - (seed (g seed))) - (if (< i chunk-len) - (begin (string-set! chunk i c) - (lp2 (+ i 1) seed)) - - (let* ((nchars2 (+ chunk-len nchars)) - (chunk-len2 (min 4096 nchars2)) - (new-chunk (make-string chunk-len2))) - (string-set! new-chunk 0 c) - (lp (cons chunk chunks) (+ nchars chunk-len) - new-chunk chunk-len2 1 seed)))) - - ;; We're done. Make the answer string & install the bits. - (let* ((final (make-final seed)) - (flen (string-length final)) - (base-len (string-length base)) - (j (+ base-len nchars i)) - (ans (make-string (+ j flen)))) - (%string-copy! ans j final 0 flen) ; Install FINAL. - (let ((j (- j i))) - (%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I). - (let lp ((j j) (chunks chunks)) ; Install CHUNKS. - (if (pair? chunks) - (let* ((chunk (car chunks)) - (chunks (cdr chunks)) - (chunk-len (string-length chunk)) - (j (- j chunk-len))) - (%string-copy! ans j chunk 0 chunk-len) - (lp j chunks))))) - (%string-copy! ans 0 base 0 base-len) ; Install BASE. - ans)))))) - -(define (string-unfold-right p f g seed . base+make-final) - (let-optionals* base+make-final - ((base "" (string? base)) - (make-final (lambda (x) "") (procedure? make-final))) - (let lp ((chunks '()) ; Previously filled chunks - (nchars 0) ; Number of chars in CHUNKS - (chunk (make-string 40)) ; Current chunk into which we write - (chunk-len 40) - (i 40) ; Number of chars available in CHUNK - (seed seed)) - (let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right - (if (not (p seed)) ; to left. - (let ((c (f seed)) - (seed (g seed))) - (if (> i 0) - (let ((i (- i 1))) - (string-set! chunk i c) - (lp2 i seed)) - - (let* ((nchars2 (+ chunk-len nchars)) - (chunk-len2 (min 4096 nchars2)) - (new-chunk (make-string chunk-len2)) - (i (- chunk-len2 1))) - (string-set! new-chunk i c) - (lp (cons chunk chunks) (+ nchars chunk-len) - new-chunk chunk-len2 i seed)))) - - ;; We're done. Make the answer string & install the bits. - (let* ((final (make-final seed)) - (flen (string-length final)) - (base-len (string-length base)) - (chunk-used (- chunk-len i)) - (j (+ base-len nchars chunk-used)) - (ans (make-string (+ j flen)))) - (%string-copy! ans 0 final 0 flen) ; Install FINAL. - (%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,). - (let lp ((j (+ flen chunk-used)) ; Install CHUNKS. - (chunks chunks)) - (if (pair? chunks) - (let* ((chunk (car chunks)) - (chunks (cdr chunks)) - (chunk-len (string-length chunk))) - (%string-copy! ans j chunk 0 chunk-len) - (lp (+ j chunk-len) chunks)) - (%string-copy! ans j base 0 base-len))); Install BASE. - ans)))))) - - -(define (string-for-each proc s . maybe-start+end) - (check-arg procedure? proc string-for-each) - (let-string-start+end (start end) string-for-each s maybe-start+end - (let lp ((i start)) - (if (< i end) - (begin (proc (string-ref s i)) - (lp (+ i 1))))))) - -(define (string-for-each-index proc s . maybe-start+end) - (check-arg procedure? proc string-for-each-index) - (let-string-start+end (start end) string-for-each-index s maybe-start+end - (let lp ((i start)) - (if (< i end) (begin (proc i) (lp (+ i 1))))))) - -(define (string-every criterion s . maybe-start+end) - (let-string-start+end (start end) string-every s maybe-start+end - (cond ((char? criterion) - (let lp ((i start)) - (or (>= i end) - (and (char=? criterion (string-ref s i)) - (lp (+ i 1)))))) - - ((char-set? criterion) - (let lp ((i start)) - (or (>= i end) - (and (char-set-contains? criterion (string-ref s i)) - (lp (+ i 1)))))) - - ((procedure? criterion) ; Slightly funky loop so that - (or (= start end) ; final (PRED S[END-1]) call - (let lp ((i start)) ; is a tail call. - (let ((c (string-ref s i)) - (i1 (+ i 1))) - (if (= i1 end) (criterion c) ; Tail call. - (and (criterion c) (lp i1))))))) - - (else (error "Second param is neither char-set, char, or predicate procedure." - string-every criterion))))) - - -(define (string-any criterion s . maybe-start+end) - (let-string-start+end (start end) string-any s maybe-start+end - (cond ((char? criterion) - (let lp ((i start)) - (and (< i end) - (or (char=? criterion (string-ref s i)) - (lp (+ i 1)))))) - - ((char-set? criterion) - (let lp ((i start)) - (and (< i end) - (or (char-set-contains? criterion (string-ref s i)) - (lp (+ i 1)))))) - - ((procedure? criterion) ; Slightly funky loop so that - (and (< start end) ; final (PRED S[END-1]) call - (let lp ((i start)) ; is a tail call. - (let ((c (string-ref s i)) - (i1 (+ i 1))) - (if (= i1 end) (criterion c) ; Tail call - (or (criterion c) (lp i1))))))) - - (else (error "Second param is neither char-set, char, or predicate procedure." - string-any criterion))))) - - -(define (string-tabulate proc len) - (check-arg procedure? proc string-tabulate) - (check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val))) - len string-tabulate) - (let ((s (make-string len))) - (do ((i (- len 1) (- i 1))) - ((< i 0)) - (string-set! s i (proc i))) - s)) - - - -;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2] -;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Find the length of the common prefix/suffix. -;;; It is not required that the two substrings passed be of equal length. -;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. -;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons, -;;; so should be as tense as possible. - -(define (%string-prefix-length s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (end1 (+ start1 delta))) - - (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path - delta - - (let lp ((i start1) (j start2)) ; Regular path - (if (or (>= i end1) - (not (char=? (string-ref s1 i) - (string-ref s2 j)))) - (- i start1) - (lp (+ i 1) (+ j 1))))))) - -(define (%string-suffix-length s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (start1 (- end1 delta))) - - (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path - delta - - (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path - (if (or (< i start1) - (not (char=? (string-ref s1 i) - (string-ref s2 j)))) - (- (- end1 i) 1) - (lp (- i 1) (- j 1))))))) - -(define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (end1 (+ start1 delta))) - - (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path - delta - - (let lp ((i start1) (j start2)) ; Regular path - (if (or (>= i end1) - (not (char-ci=? (string-ref s1 i) - (string-ref s2 j)))) - (- i start1) - (lp (+ i 1) (+ j 1))))))) - -(define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (start1 (- end1 delta))) - - (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path - delta - - (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path - (if (or (< i start1) - (not (char-ci=? (string-ref s1 i) - (string-ref s2 j)))) - (- (- end1 i) 1) - (lp (- i 1) (- j 1))))))) - - -(define (string-prefix-length s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix-length s1 s2 maybe-starts+ends - (%string-prefix-length s1 start1 end1 s2 start2 end2))) - -(define (string-suffix-length s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix-length s1 s2 maybe-starts+ends - (%string-suffix-length s1 start1 end1 s2 start2 end2))) - -(define (string-prefix-length-ci s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix-length-ci s1 s2 maybe-starts+ends - (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) - -(define (string-suffix-length-ci s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix-length-ci s1 s2 maybe-starts+ends - (%string-suffix-length-ci s1 start1 end1 s2 start2 end2))) - - -;;; string-prefix? s1 s2 [start1 end1 start2 end2] -;;; string-suffix? s1 s2 [start1 end1 start2 end2] -;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2] -;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; These are all simple derivatives of the previous counting funs. - -(define (string-prefix? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix? s1 s2 maybe-starts+ends - (%string-prefix? s1 start1 end1 s2 start2 end2))) - -(define (string-suffix? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix? s1 s2 maybe-starts+ends - (%string-suffix? s1 start1 end1 s2 start2 end2))) - -(define (string-prefix-ci? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix-ci? s1 s2 maybe-starts+ends - (%string-prefix-ci? s1 start1 end1 s2 start2 end2))) - -(define (string-suffix-ci? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix-ci? s1 s2 maybe-starts+ends - (%string-suffix-ci? s1 start1 end1 s2 start2 end2))) - - -;;; Here are the internal routines that do the real work. - -(define (%string-prefix? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= (%string-prefix-length s1 start1 end1 - s2 start2 end2) - len1)))) - -(define (%string-suffix? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= len1 (%string-suffix-length s1 start1 end1 - s2 start2 end2))))) - -(define (%string-prefix-ci? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= len1 (%string-prefix-length-ci s1 start1 end1 - s2 start2 end2))))) - -(define (%string-suffix-ci? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= len1 (%string-suffix-length-ci s1 start1 end1 - s2 start2 end2))))) - - -;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2] -;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Primitive string-comparison functions. -;;; Continuation order is different from MIT Scheme. -;;; Continuations are applied to s1's mismatch index; -;;; in the case of equality, this is END1. - -(define (%string-compare s1 start1 end1 s2 start2 end2 - proc< proc= proc>) - (let ((size1 (- end1 start1)) - (size2 (- end2 start2))) - (let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2))) - (if (= match size1) - ((if (= match size2) proc= proc<) end1) - ((if (= match size2) - proc> - (if (char)) - (+ match start1)))))) - -(define (%string-compare-ci s1 start1 end1 s2 start2 end2 - proc< proc= proc>) - (let ((size1 (- end1 start1)) - (size2 (- end2 start2))) - (let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) - (if (= match size1) - ((if (= match size2) proc= proc<) end1) - ((if (= match size2) proc> - (if (char-ci)) - (+ start1 match)))))) - -(define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends) - (check-arg procedure? proc< string-compare) - (check-arg procedure? proc= string-compare) - (check-arg procedure? proc> string-compare) - (let-string-start+end2 (start1 end1 start2 end2) - string-compare s1 s2 maybe-starts+ends - (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>))) - -(define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends) - (check-arg procedure? proc< string-compare-ci) - (check-arg procedure? proc= string-compare-ci) - (check-arg procedure? proc> string-compare-ci) - (let-string-start+end2 (start1 end1 start2 end2) - string-compare-ci s1 s2 maybe-starts+ends - (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>))) - - - -;;; string= string<> string-ci= string-ci<> -;;; string< string> string-ci< string-ci> -;;; string<= string>= string-ci<= string-ci>= -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Simple definitions in terms of the previous comparison funs. -;;; I sure hope the %STRING-COMPARE calls get integrated. - -(define (string= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string= s1 s2 maybe-starts+ends - (and (= (- end1 start1) (- end2 start2)) ; Quick filter - (or (and (eq? s1 s2) (= start1 start2)) ; Fast path - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - values - (lambda (i) #f)))))) - -(define (string<> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string<> s1 s2 maybe-starts+ends - (or (not (= (- end1 start1) (- end2 start2))) ; Fast path - (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - values - (lambda (i) #f) - values))))) - -(define (string< s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string< s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (< end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - values - (lambda (i) #f) - (lambda (i) #f))))) - -(define (string> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string> s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (> end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) #f) - values)))) - -(define (string<= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string<= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (<= end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - values - values - (lambda (i) #f))))) - -(define (string>= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string>= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (>= end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - values - values)))) - -(define (string-ci= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci= s1 s2 maybe-starts+ends - (and (= (- end1 start1) (- end2 start2)) ; Quick filter - (or (and (eq? s1 s2) (= start1 start2)) ; Fast path - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - values - (lambda (i) #f)))))) - -(define (string-ci<> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci<> s1 s2 maybe-starts+ends - (or (not (= (- end1 start1) (- end2 start2))) ; Fast path - (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - values - (lambda (i) #f) - values))))) - -(define (string-ci< s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci< s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (< end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - values - (lambda (i) #f) - (lambda (i) #f))))) - -(define (string-ci> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci> s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (> end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) #f) - values)))) - -(define (string-ci<= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci<= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (<= end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - values - values - (lambda (i) #f))))) - -(define (string-ci>= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci>= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (>= end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - values - values)))) - - -;;; Hash -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in -;;; to keep the intermediate values small. (We do the calculation with just -;;; enough bits to represent BOUND, masking off high bits at each step in -;;; calculation. If this screws up any important properties of the hash -;;; function I'd like to hear about it. -Olin) -;;; -;;; If you keep BOUND small enough, the intermediate calculations will -;;; always be fixnums. How small is dependent on the underlying Scheme system; -;;; we use a default BOUND of 2^22 = 4194304, which should hack it in -;;; Schemes that give you at least 29 signed bits for fixnums. The core -;;; calculation that you don't want to overflow is, worst case, -;;; (+ 65535 (* 37 (- bound 1))) -;;; where 65535 is the max character code. Choose the default BOUND to be the -;;; biggest power of two that won't cause this expression to fixnum overflow, -;;; and everything will be copacetic. - -(define (%string-hash s char->int bound start end) - (let ((iref (lambda (s i) (char->int (string-ref s i)))) - ;; Compute a 111...1 mask that will cover BOUND-1: - (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? - (if (>= i bound) (- i 1) (lp (+ i i)))))) - (let lp ((i start) (ans 0)) - (if (>= i end) (modulo ans bound) - (lp (+ i 1) (bitwise-and mask (+ (* 37 ans) (iref s i)))))))) - -(define (string-hash s . maybe-bound+start+end) - (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound) - (exact? bound) - (<= 0 bound))) - rest) - (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default. - (let-string-start+end (start end) string-hash s rest - (%string-hash s char->integer bound start end))))) - -(define (string-hash-ci s . maybe-bound+start+end) - (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound) - (exact? bound) - (<= 0 bound))) - rest) - (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default. - (let-string-start+end (start end) string-hash-ci s rest - (%string-hash s (lambda (c) (char->integer (char-downcase c))) - bound start end))))) - -;;; Case hacking -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-upcase s [start end] -;;; string-upcase! s [start end] -;;; string-downcase s [start end] -;;; string-downcase! s [start end] -;;; -;;; string-titlecase s [start end] -;;; string-titlecase! s [start end] -;;; Capitalize every contiguous alpha sequence: capitalise -;;; first char, lowercase rest. - -(define (string-upcase s . maybe-start+end) - (let-string-start+end (start end) string-upcase s maybe-start+end - (%string-map char-upcase s start end))) - -(define (string-upcase! s . maybe-start+end) - (let-string-start+end (start end) string-upcase! s maybe-start+end - (%string-map! char-upcase s start end))) - -(define (string-downcase s . maybe-start+end) - (let-string-start+end (start end) string-downcase s maybe-start+end - (%string-map char-downcase s start end))) - -(define (string-downcase! s . maybe-start+end) - (let-string-start+end (start end) string-downcase! s maybe-start+end - (%string-map! char-downcase s start end))) - -(define (%string-titlecase! s start end) - (let lp ((i start)) - (cond ((string-index s char-cased? i end) => - (lambda (i) - (string-set! s i (char-titlecase (string-ref s i))) - (let ((i1 (+ i 1))) - (cond ((string-skip s char-cased? i1 end) => - (lambda (j) - (string-downcase! s i1 j) - (lp (+ j 1)))) - (else (string-downcase! s i1 end))))))))) - -(define (string-titlecase! s . maybe-start+end) - (let-string-start+end (start end) string-titlecase! s maybe-start+end - (%string-titlecase! s start end))) - -(define (string-titlecase s . maybe-start+end) - (let-string-start+end (start end) string-titlecase! s maybe-start+end - (let ((ans (substring s start end))) - (%string-titlecase! ans 0 (- end start)) - ans))) - - -;;; Cutting & pasting strings -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-take string nchars -;;; string-drop string nchars -;;; -;;; string-take-right string nchars -;;; string-drop-right string nchars -;;; -;;; string-pad string k [char start end] -;;; string-pad-right string k [char start end] -;;; -;;; string-trim string [char/char-set/pred start end] -;;; string-trim-right string [char/char-set/pred start end] -;;; string-trim-both string [char/char-set/pred start end] -;;; -;;; These trimmers invert the char-set meaning from MIT Scheme -- you -;;; say what you want to trim. - -(define (string-take s n) - (check-arg string? s string-take) - (check-arg (lambda (val) (and (integer? n) (exact? n) - (<= 0 n (string-length s)))) - n string-take) - (%substring/shared s 0 n)) - -(define (string-take-right s n) - (check-arg string? s string-take-right) - (let ((len (string-length s))) - (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) - n string-take-right) - (%substring/shared s (- len n) len))) - -(define (string-drop s n) - (check-arg string? s string-drop) - (let ((len (string-length s))) - (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) - n string-drop) - (%substring/shared s n len))) - -(define (string-drop-right s n) - (check-arg string? s string-drop-right) - (let ((len (string-length s))) - (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) - n string-drop-right) - (%substring/shared s 0 (- len n)))) - - -(define (string-trim s . criterion+start+end) - (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) - (let-string-start+end (start end) string-trim s rest - (cond ((string-skip s criterion start end) => - (lambda (i) (%substring/shared s i end))) - (else ""))))) - -(define (string-trim-right s . criterion+start+end) - (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) - (let-string-start+end (start end) string-trim-right s rest - (cond ((string-skip-right s criterion start end) => - (lambda (i) (%substring/shared s 0 (+ 1 i)))) - (else ""))))) - -(define (string-trim-both s . criterion+start+end) - (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) - (let-string-start+end (start end) string-trim-both s rest - (cond ((string-skip s criterion start end) => - (lambda (i) - (%substring/shared s i (+ 1 (string-skip-right s criterion i end))))) - (else ""))))) - - -(define (string-pad-right s n . char+start+end) - (let-optionals* char+start+end ((char #\space (char? char)) rest) - (let-string-start+end (start end) string-pad-right s rest - (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n))) - n string-pad-right) - (let ((len (- end start))) - (if (<= n len) - (%substring/shared s start (+ start n)) - (let ((ans (make-string n char))) - (%string-copy! ans 0 s start end) - ans)))))) - -(define (string-pad s n . char+start+end) - (let-optionals* char+start+end ((char #\space (char? char)) rest) - (let-string-start+end (start end) string-pad s rest - (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n))) - n string-pad) - (let ((len (- end start))) - (if (<= n len) - (%substring/shared s (- end n) end) - (let ((ans (make-string n char))) - (%string-copy! ans (- n len) s start end) - ans)))))) - - - -;;; Filtering strings -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-delete char/char-set/pred string [start end] -;;; string-filter char/char-set/pred string [start end] -;;; -;;; If the criterion is a char or char-set, we scan the string twice with -;;; string-fold -- once to determine the length of the result string, -;;; and once to do the filtered copy. -;;; If the criterion is a predicate, we don't do this double-scan strategy, -;;; because the predicate might have side-effects or be very expensive to -;;; compute. So we preallocate a temp buffer pessimistically, and only do -;;; one scan over S. This is likely to be faster and more space-efficient -;;; than consing a list. - -(define (string-delete criterion s . maybe-start+end) - (let-string-start+end (start end) string-delete s maybe-start+end - (if (procedure? criterion) - (let* ((slen (- end start)) - (temp (make-string slen)) - (ans-len (string-fold (lambda (c i) - (if (criterion c) i - (begin (string-set! temp i c) - (+ i 1)))) - 0 s start end))) - (if (= ans-len slen) temp (substring temp 0 ans-len))) - - (let* ((cset (cond ((char-set? criterion) criterion) - ((char? criterion) (char-set criterion)) - (else (error "string-delete criterion not predicate, char or char-set" criterion)))) - (len (string-fold (lambda (c i) (if (char-set-contains? cset c) - i - (+ i 1))) - 0 s start end)) - (ans (make-string len))) - (string-fold (lambda (c i) (if (char-set-contains? cset c) - i - (begin (string-set! ans i c) - (+ i 1)))) - 0 s start end) - ans)))) - -(define (string-filter criterion s . maybe-start+end) - (let-string-start+end (start end) string-filter s maybe-start+end - (if (procedure? criterion) - (let* ((slen (- end start)) - (temp (make-string slen)) - (ans-len (string-fold (lambda (c i) - (if (criterion c) - (begin (string-set! temp i c) - (+ i 1)) - i)) - 0 s start end))) - (if (= ans-len slen) temp (substring temp 0 ans-len))) - - (let* ((cset (cond ((char-set? criterion) criterion) - ((char? criterion) (char-set criterion)) - (else (error "string-delete criterion not predicate, char or char-set" criterion)))) - - (len (string-fold (lambda (c i) (if (char-set-contains? cset c) - (+ i 1) - i)) - 0 s start end)) - (ans (make-string len))) - (string-fold (lambda (c i) (if (char-set-contains? cset c) - (begin (string-set! ans i c) - (+ i 1)) - i)) - 0 s start end) - ans)))) - - -;;; String search -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-index string char/char-set/pred [start end] -;;; string-index-right string char/char-set/pred [start end] -;;; string-skip string char/char-set/pred [start end] -;;; string-skip-right string char/char-set/pred [start end] -;;; string-count string char/char-set/pred [start end] -;;; There's a lot of replicated code here for efficiency. -;;; For example, the char/char-set/pred discrimination has -;;; been lifted above the inner loop of each proc. - -(define (string-index str criterion . maybe-start+end) - (let-string-start+end (start end) string-index str maybe-start+end - (cond ((char? criterion) - (let lp ((i start)) - (and (< i end) - (if (char=? criterion (string-ref str i)) i - (lp (+ i 1)))))) - ((char-set? criterion) - (let lp ((i start)) - (and (< i end) - (if (char-set-contains? criterion (string-ref str i)) i - (lp (+ i 1)))))) - ((procedure? criterion) - (let lp ((i start)) - (and (< i end) - (if (criterion (string-ref str i)) i - (lp (+ i 1)))))) - (else (error "Second param is neither char-set, char, or predicate procedure." - string-index criterion))))) - -(define (string-index-right str criterion . maybe-start+end) - (let-string-start+end (start end) string-index-right str maybe-start+end - (cond ((char? criterion) - (let lp ((i (- end 1))) - (and (>= i 0) - (if (char=? criterion (string-ref str i)) i - (lp (- i 1)))))) - ((char-set? criterion) - (let lp ((i (- end 1))) - (and (>= i 0) - (if (char-set-contains? criterion (string-ref str i)) i - (lp (- i 1)))))) - ((procedure? criterion) - (let lp ((i (- end 1))) - (and (>= i 0) - (if (criterion (string-ref str i)) i - (lp (- i 1)))))) - (else (error "Second param is neither char-set, char, or predicate procedure." - string-index-right criterion))))) - -(define (string-skip str criterion . maybe-start+end) - (let-string-start+end (start end) string-skip str maybe-start+end - (cond ((char? criterion) - (let lp ((i start)) - (and (< i end) - (if (char=? criterion (string-ref str i)) - (lp (+ i 1)) - i)))) - ((char-set? criterion) - (let lp ((i start)) - (and (< i end) - (if (char-set-contains? criterion (string-ref str i)) - (lp (+ i 1)) - i)))) - ((procedure? criterion) - (let lp ((i start)) - (and (< i end) - (if (criterion (string-ref str i)) (lp (+ i 1)) - i)))) - (else (error "Second param is neither char-set, char, or predicate procedure." - string-skip criterion))))) - -(define (string-skip-right str criterion . maybe-start+end) - (let-string-start+end (start end) string-skip-right str maybe-start+end - (cond ((char? criterion) - (let lp ((i (- end 1))) - (and (>= i 0) - (if (char=? criterion (string-ref str i)) - (lp (- i 1)) - i)))) - ((char-set? criterion) - (let lp ((i (- end 1))) - (and (>= i 0) - (if (char-set-contains? criterion (string-ref str i)) - (lp (- i 1)) - i)))) - ((procedure? criterion) - (let lp ((i (- end 1))) - (and (>= i 0) - (if (criterion (string-ref str i)) (lp (- i 1)) - i)))) - (else (error "CRITERION param is neither char-set or char." - string-skip-right criterion))))) - - -(define (string-count s criterion . maybe-start+end) - (let-string-start+end (start end) string-count s maybe-start+end - (cond ((char? criterion) - (do ((i start (+ i 1)) - (count 0 (if (char=? criterion (string-ref s i)) - (+ count 1) - count))) - ((>= i end) count))) - - ((char-set? criterion) - (do ((i start (+ i 1)) - (count 0 (if (char-set-contains? criterion (string-ref s i)) - (+ count 1) - count))) - ((>= i end) count))) - - ((procedure? criterion) - (do ((i start (+ i 1)) - (count 0 (if (criterion (string-ref s i)) (+ count 1) count))) - ((>= i end) count))) - - (else (error "CRITERION param is neither char-set or char." - string-count criterion))))) - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-fill! string char [start end] -;;; -;;; string-copy! to tstart from [fstart fend] -;;; Guaranteed to work, even if s1 eq s2. - -(define (string-fill! s char . maybe-start+end) - (check-arg char? char string-fill!) - (let-string-start+end (start end) string-fill! s maybe-start+end - (do ((i (- end 1) (- i 1))) - ((< i start)) - (string-set! s i char)))) - -(define (string-copy! to tstart from . maybe-fstart+fend) - (let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend - (check-arg integer? tstart string-copy!) - (check-substring-spec string-copy! to tstart (+ tstart (- fend fstart))) - (%string-copy! to tstart from fstart fend))) - -;;; Library-internal routine -(define (%string-copy! to tstart from fstart fend) - (if (> fstart tstart) - (do ((i fstart (+ i 1)) - (j tstart (+ j 1))) - ((>= i fend)) - (string-set! to j (string-ref from i))) - - (do ((i (- fend 1) (- i 1)) - (j (+ -1 tstart (- fend fstart)) (- j 1))) - ((< i fstart)) - (string-set! to j (string-ref from i))))) - - - -;;; Returns starting-position in STRING or #f if not true. -;;; This implementation is slow & simple. It is useful as a "spec" or for -;;; comparison testing with fancier implementations. -;;; See below for fast KMP version. - -;(define (string-contains string substring . maybe-starts+ends) -; (let-string-start+end2 (start1 end1 start2 end2) -; string-contains string substring maybe-starts+ends -; (let* ((len (- end2 start2)) -; (i-bound (- end1 len))) -; (let lp ((i start1)) -; (and (< i i-bound) -; (if (string= string substring i (+ i len) start2 end2) -; i -; (lp (+ i 1)))))))) - - -;;; Searching for an occurrence of a substring -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (string-contains text pattern . maybe-starts+ends) - (let-string-start+end2 (t-start t-end p-start p-end) - string-contains text pattern maybe-starts+ends - (%kmp-search pattern text char=? p-start p-end t-start t-end))) - -(define (string-contains-ci text pattern . maybe-starts+ends) - (let-string-start+end2 (t-start t-end p-start p-end) - string-contains-ci text pattern maybe-starts+ends - (%kmp-search pattern text char-ci=? p-start p-end t-start t-end))) - - -;;; Knuth-Morris-Pratt string searching -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; See -;;; "Fast pattern matching in strings" -;;; SIAM J. Computing 6(2):323-350 1977 -;;; D. E. Knuth, J. H. Morris and V. R. Pratt -;;; also described in -;;; "Pattern matching in strings" -;;; Alfred V. Aho -;;; Formal Language Theory - Perspectives and Open Problems -;;; Ronald V. Brook (editor) -;;; This algorithm is O(m + n) where m and n are the -;;; lengths of the pattern and string respectively - -;;; KMP search source[start,end) for PATTERN. Return starting index of -;;; leftmost match or #f. - -(define (%kmp-search pattern text c= p-start p-end t-start t-end) - (let ((plen (- p-end p-start)) - (rv (make-kmp-restart-vector pattern c= p-start p-end))) - - ;; The search loop. TJ & PJ are redundant state. - (let lp ((ti t-start) (pi 0) - (tj (- t-end t-start)) ; (- tlen ti) -- how many chars left. - (pj plen)) ; (- plen pi) -- how many chars left. - - (if (= pi plen) (- ti plen) ; Win. - - (and (<= pj tj) ; Lose. - - (if (c= (string-ref text ti) ; Search. - (string-ref pattern (+ p-start pi))) - (lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance. - - (let ((pi (vector-ref rv pi))) ; Retreat. - (if (= pi -1) - (lp (+ ti 1) 0 (- tj 1) plen) ; Punt. - (lp ti pi tj (- plen pi)))))))))) - -;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Compute the KMP restart vector RV for string PATTERN. If -;;; we have matched chars 0..i-1 of PATTERN against a search string S, and -;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to -;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to -;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k]. -;;; -;;; In other words, if you have matched the first i chars of PATTERN, but -;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest -;;; prefix of PATTERN is that you have matched. -;;; -;;; - C= (default CHAR=?) is used to compare characters for equality. -;;; Pass in CHAR-CI=? for case-folded string search. -;;; -;;; - START & END restrict the pattern to the indicated substring; the -;;; returned vector will be of length END - START. The numbers stored -;;; in the vector will be values in the range [0,END-START) -- that is, -;;; they are valid indices into the restart vector; you have to add START -;;; to them to use them as indices into PATTERN. -;;; -;;; I've split this out as a separate function in case other constant-string -;;; searchers might want to use it. -;;; -;;; E.g.: -;;; a b d a b x -;;; #(-1 0 0 -1 1 2) - -(define (make-kmp-restart-vector pattern . maybe-c=+start+end) - (let-optionals* maybe-c=+start+end - ((c= char=? (procedure? c=)) - ((start end) (lambda (args) - (string-parse-start+end make-kmp-restart-vector - pattern args)))) - (let* ((rvlen (- end start)) - (rv (make-vector rvlen -1))) - (if (> rvlen 0) - (let ((rvlen-1 (- rvlen 1)) - (c0 (string-ref pattern start))) - - ;; Here's the main loop. We have set rv[0] ... rv[i]. - ;; K = I + START -- it is the corresponding index into PATTERN. - (let lp1 ((i 0) (j -1) (k start)) - (if (< i rvlen-1) - - (let ((ck (string-ref pattern k))) - ;; lp2 invariant: - ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1] - ;; or j = -1. - (let lp2 ((j j)) - - (cond ((= j -1) - (let ((i1 (+ i 1))) - (vector-set! rv i1 (if (c= ck c0) -1 0)) - (lp1 i1 0 (+ k 1)))) - - ;; pat[(k-j) .. k] matches pat[start..start+j]. - ((c= ck (string-ref pattern (+ j start))) - (let* ((i1 (+ 1 i)) - (j1 (+ 1 j))) - (vector-set! rv i1 j1) - (lp1 i1 j1 (+ k 1)))) - - (else (lp2 (vector-ref rv j)))))))))) - rv))) - - -;;; We've matched I chars from PAT. C is the next char from the search string. -;;; Return the new I after handling C. -;;; -;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START -;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched -;;; are -;;; PAT[PAT-START .. PAT-START + I]. -;;; -;;; It's *not* an oversight that there is no friendly error checking or -;;; defaulting of arguments. This is a low-level, inner-loop procedure -;;; that we want integrated/inlined into the point of call. - -(define (kmp-step pat rv c i c= p-start) - (let lp ((i i)) - (if (c= c (string-ref pat (+ i p-start))) ; Match => - (+ i 1) ; Done. - (let ((i (vector-ref rv i))) ; Back up in PAT. - (if (= i -1) 0 ; Can't back up further. - (lp i)))))) ; Keep trying for match. - -;;; Zip through S[start,end), looking for a match of PAT. Assume we've -;;; already matched the first I chars of PAT when we commence at S[start]. -;;; - <0: If we find a match *ending* at index J, return -J. -;;; - >=0: If we get to the end of the S[start,end) span without finding -;;; a complete match, return the number of chars from PAT we'd matched -;;; when we ran off the end. -;;; -;;; This is useful for searching *across* buffers -- that is, when your -;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop -;;; for speed. - -(define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end) - (check-arg vector? rv string-kmp-partial-search) - (let-optionals* c=+p-start+s-start+s-end - ((c= char=? (procedure? c=)) - (p-start 0 (and (integer? p-start) (exact? p-start) (<= 0 p-start))) - ((s-start s-end) (lambda (args) - (string-parse-start+end string-kmp-partial-search - s args)))) - (let ((patlen (vector-length rv))) - (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i) (< i patlen))) - i string-kmp-partial-search) - - ;; Enough prelude. Here's the actual code. - (let lp ((si s-start) ; An index into S. - (vi i)) ; An index into RV. - (cond ((= vi patlen) (- si)) ; Win. - ((= si s-end) vi) ; Ran off the end. - (else ; Match s[si] & loop. - (let ((c (string-ref s si))) - (lp (+ si 1) - (let lp2 ((vi vi)) ; This is just KMP-STEP. - (if (c= c (string-ref pat (+ vi p-start))) - (+ vi 1) - (let ((vi (vector-ref rv vi))) - (if (= vi -1) 0 - (lp2 vi))))))))))))) - - -;;; Misc -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; (string-null? s) -;;; (string-reverse s [start end]) -;;; (string-reverse! s [start end]) -;;; (reverse-list->string clist) -;;; (string->list s [start end]) - -(define (string-null? s) (zero? (string-length s))) - -(define (string-reverse s . maybe-start+end) - (let-string-start+end (start end) string-reverse s maybe-start+end - (let* ((len (- end start)) - (ans (make-string len))) - (do ((i start (+ i 1)) - (j (- len 1) (- j 1))) - ((< j 0)) - (string-set! ans j (string-ref s i))) - ans))) - -(define (string-reverse! s . maybe-start+end) - (let-string-start+end (start end) string-reverse! s maybe-start+end - (do ((i (- end 1) (- i 1)) - (j start (+ j 1))) - ((<= i j)) - (let ((ci (string-ref s i))) - (string-set! s i (string-ref s j)) - (string-set! s j ci))))) - - -(define (reverse-list->string clist) - (let* ((len (length clist)) - (s (make-string len))) - (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) - ((not (pair? clist))) - (string-set! s i (car clist))) - s)) - - -;(define (string->list s . maybe-start+end) -; (apply string-fold-right cons '() s maybe-start+end)) - -(define (string->list s . maybe-start+end) - (let-string-start+end (start end) string->list s maybe-start+end - (do ((i (- end 1) (- i 1)) - (ans '() (cons (string-ref s i) ans))) - ((< i start) ans)))) - -;;; Defined by R5RS, so commented out here. -;(define (list->string lis) (string-unfold null? car cdr lis)) - - -;;; string-concatenate string-list -> string -;;; string-concatenate/shared string-list -> string -;;; string-append/shared s ... -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; STRING-APPEND/SHARED has license to return a string that shares storage -;;; with any of its arguments. In particular, if there is only one non-empty -;;; string amongst its parameters, it is permitted to return that string as -;;; its result. STRING-APPEND, by contrast, always allocates new storage. -;;; -;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of -;;; strings, which they concatenate into a result string. STRING-CONCATENATE -;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may -;;; not) return a result that shares storage with any of its arguments. In -;;; particular, if it is applied to a singleton list, it is permitted to -;;; return the car of that list as its value. - -(define (string-append/shared . strings) (string-concatenate/shared strings)) - -(define (string-concatenate/shared strings) - (let lp ((strings strings) (nchars 0) (first #f)) - (cond ((pair? strings) ; Scan the args, add up total - (let* ((string (car strings)) ; length, remember 1st - (tail (cdr strings)) ; non-empty string. - (slen (string-length string))) - (if (zero? slen) - (lp tail nchars first) - (lp tail (+ nchars slen) (or first strings))))) - - ((zero? nchars) "") - - ;; Just one non-empty string! Return it. - ((= nchars (string-length (car first))) (car first)) - - (else (let ((ans (make-string nchars))) - (let lp ((strings first) (i 0)) - (if (pair? strings) - (let* ((s (car strings)) - (slen (string-length s))) - (%string-copy! ans i s 0 slen) - (lp (cdr strings) (+ i slen))))) - ans))))) - - -; Alas, Scheme 48's APPLY blows up if you have many, many arguments. -;(define (string-concatenate strings) (apply string-append strings)) - -;;; Here it is written out. I avoid using REDUCE to add up string lengths -;;; to avoid non-R5RS dependencies. -(define (string-concatenate strings) - (let* ((total (do ((strings strings (cdr strings)) - (i 0 (+ i (string-length (car strings))))) - ((not (pair? strings)) i))) - (ans (make-string total))) - (let lp ((i 0) (strings strings)) - (if (pair? strings) - (let* ((s (car strings)) - (slen (string-length s))) - (%string-copy! ans i s 0 slen) - (lp (+ i slen) (cdr strings))))) - ans)) - - -;;; Defined by R5RS, so commented out here. -;(define (string-append . strings) (string-concatenate strings)) - -;;; string-concatenate-reverse string-list [final-string end] -> string -;;; string-concatenate-reverse/shared string-list [final-string end] -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Return -;;; (string-concatenate -;;; (reverse -;;; (cons (substring final-string 0 end) string-list))) - -(define (string-concatenate-reverse string-list . maybe-final+end) - (let-optionals* maybe-final+end ((final "" (string? final)) - (end (string-length final) - (and (integer? end) - (exact? end) - (<= 0 end (string-length final))))) - (let ((len (let lp ((sum 0) (lis string-list)) - (if (pair? lis) - (lp (+ sum (string-length (car lis))) (cdr lis)) - sum)))) - - (%finish-string-concatenate-reverse len string-list final end)))) - -(define (string-concatenate-reverse/shared string-list . maybe-final+end) - (let-optionals* maybe-final+end ((final "" (string? final)) - (end (string-length final) - (and (integer? end) - (exact? end) - (<= 0 end (string-length final))))) - ;; Add up the lengths of all the strings in STRING-LIST; also get a - ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length - ;; string starts. - (let lp ((len 0) (nzlist #f) (lis string-list)) - (if (pair? lis) - (let ((slen (string-length (car string-list)))) - (lp (+ len slen) - (if (or nzlist (zero? slen)) nzlist lis) - (cdr lis))) - - (cond ((zero? len) (substring/shared final 0 end)) - - ;; LEN > 0, so NZLIST is non-empty. - - ((and (zero? end) (= len (string-length (car nzlist)))) - (car nzlist)) - - (else (%finish-string-concatenate-reverse len nzlist final end))))))) - -(define (%finish-string-concatenate-reverse len string-list final end) - (let ((ans (make-string (+ end len)))) - (%string-copy! ans len final 0 end) - (let lp ((i len) (lis string-list)) - (if (pair? lis) - (let* ((s (car lis)) - (lis (cdr lis)) - (slen (string-length s)) - (i (- i slen))) - (%string-copy! ans i s 0 slen) - (lp i lis)))) - ans)) - - - - -;;; string-replace s1 s2 start1 end1 [start2 end2] -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Replace S1[START1,END1) with S2[START2,END2). - -(define (string-replace s1 s2 start1 end1 . maybe-start+end) - (check-substring-spec string-replace s1 start1 end1) - (let-string-start+end (start2 end2) string-replace s2 maybe-start+end - (let* ((slen1 (string-length s1)) - (sublen2 (- end2 start2)) - (alen (+ (- slen1 (- end1 start1)) sublen2)) - (ans (make-string alen))) - (%string-copy! ans 0 s1 0 start1) - (%string-copy! ans start1 s2 start2 end2) - (%string-copy! ans (+ start1 sublen2) s1 end1 slen1) - ans))) - - -;;; string-tokenize s [token-set start end] -> list -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Break S up into a list of token strings, where a token is a maximal -;;; non-empty contiguous sequence of chars belonging to TOKEN-SET. -;;; (string-tokenize "hello, world") => ("hello," "world") - -(define (string-tokenize s . token-chars+start+end) - (let-optionals* token-chars+start+end - ((token-chars char-set:graphic (char-set? token-chars)) rest) - (let-string-start+end (start end) string-tokenize s rest - (let lp ((i end) (ans '())) - (cond ((and (< start i) (string-index-right s token-chars start i)) => - (lambda (tend-1) - (let ((tend (+ 1 tend-1))) - (cond ((string-skip-right s token-chars start tend-1) => - (lambda (tstart-1) - (lp tstart-1 - (cons (substring s (+ 1 tstart-1) tend) - ans)))) - (else (cons (substring s start tend) ans)))))) - (else ans)))))) - - -;;; xsubstring s from [to start end] -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; S is a string; START and END are optional arguments that demarcate -;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole -;;; string). Replicate this substring up and down index space, in both the -;; positive and negative directions. For example, if S = "abcdefg", START=3, -;;; and END=6, then we have the conceptual bidirectionally-infinite string -;;; ... d e f d e f d e f d e f d e f d e f d e f ... -;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... -;;; XSUBSTRING returns the substring of this string beginning at index FROM, -;;; and ending at TO (which defaults to FROM+(END-START)). -;;; -;;; You can use XSUBSTRING in many ways: -;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" -;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" -;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" -;;; -;;; Note that -;;; - The FROM/TO indices give a half-open range -- the characters from -;;; index FROM up to, but not including index TO. -;;; - The FROM/TO indices are not in terms of the index space for string S. -;;; They are in terms of the replicated index space of the substring -;;; defined by S, START, and END. -;;; -;;; It is an error if START=END -- although this is allowed by special -;;; dispensation when FROM=TO. - -(define (xsubstring s from . maybe-to+start+end) - (check-arg (lambda (val) (and (integer? val) (exact? val))) - from xsubstring) - (receive (to start end) - (if (pair? maybe-to+start+end) - (let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end) - (let ((to (car maybe-to+start+end))) - (check-arg (lambda (val) (and (integer? val) - (exact? val) - (<= from val))) - to xsubstring) - (values to start end))) - (let ((slen (string-length (check-arg string? s xsubstring)))) - (values (+ from slen) 0 slen))) - (let ((slen (- end start)) - (anslen (- to from))) - (cond ((zero? anslen) "") - ((zero? slen) (error "Cannot replicate empty (sub)string" - xsubstring s from to start end)) - - ((= 1 slen) ; Fast path for 1-char replication. - (make-string anslen (string-ref s start))) - - ;; Selected text falls entirely within one span. - ((= (floor (/ from slen)) (floor (/ to slen))) - (substring s (+ start (modulo from slen)) - (+ start (modulo to slen)))) - - ;; Selected text requires multiple spans. - (else (let ((ans (make-string anslen))) - (%multispan-repcopy! ans 0 s from to start end) - ans)))))) - - -;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Exactly the same as xsubstring, but the extracted text is written -;;; into the string TARGET starting at index TSTART. -;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy -;;; a string on top of itself. - -(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) - (check-arg (lambda (val) (and (integer? val) (exact? val))) - sfrom string-xcopy!) - (receive (sto start end) - (if (pair? maybe-sto+start+end) - (let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) - (let ((sto (car maybe-sto+start+end))) - (check-arg (lambda (val) (and (integer? val) (exact? val))) - sto string-xcopy!) - (values sto start end))) - (let ((slen (string-length s))) - (values (+ sfrom slen) 0 slen))) - - (let* ((tocopy (- sto sfrom)) - (tend (+ tstart tocopy)) - (slen (- end start))) - (check-substring-spec string-xcopy! target tstart tend) - (cond ((zero? tocopy)) - ((zero? slen) (error "Cannot replicate empty (sub)string" - string-xcopy! - target tstart s sfrom sto start end)) - - ((= 1 slen) ; Fast path for 1-char replication. - (string-fill! target (string-ref s start) tstart tend)) - - ;; Selected text falls entirely within one span. - ((= (floor (/ sfrom slen)) (floor (/ sto slen))) - (%string-copy! target tstart s - (+ start (modulo sfrom slen)) - (+ start (modulo sto slen)))) - - ;; Multi-span copy. - (else (%multispan-repcopy! target tstart s sfrom sto start end)))))) - -;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! -;;; Internal -- not exported, no careful arg checking. -(define (%multispan-repcopy! target tstart s sfrom sto start end) - (let* ((slen (- end start)) - (i0 (+ start (modulo sfrom slen))) - (total-chars (- sto sfrom))) - - ;; Copy the partial span @ the beginning - (%string-copy! target tstart s i0 end) - - (let* ((ncopied (- end i0)) ; We've copied this many. - (nleft (- total-chars ncopied)) ; # chars left to copy. - (nspans (quotient nleft slen))) ; # whole spans to copy - - ;; Copy the whole spans in the middle. - (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. - (nspans nspans (- nspans 1))) ; # spans to copy - ((zero? nspans) - ;; Copy the partial-span @ the end & we're done. - (%string-copy! target i s start (+ start (- total-chars (- i tstart))))) - - (%string-copy! target i s start end))))); Copy a whole span. - - - -;;; (string-join string-list [delimiter grammar]) => string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Paste strings together using the delimiter string. -;;; -;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" -;;; -;;; DELIMITER defaults to a single space " " -;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix} -;;; and defaults to 'infix. -;;; -;;; I could rewrite this more efficiently -- precompute the length of the -;;; answer string, then allocate & fill it in iteratively. Using -;;; STRING-CONCATENATE is less efficient. - -(define (string-join strings . delim+grammar) - (let-optionals* delim+grammar ((delim " " (string? delim)) - (grammar 'infix)) - (let ((buildit (lambda (lis final) - (let recur ((lis lis)) - (if (pair? lis) - (cons delim (cons (car lis) (recur (cdr lis)))) - final))))) - - (cond ((pair? strings) - (string-concatenate - (case grammar - - ((infix strict-infix) - (cons (car strings) (buildit (cdr strings) '()))) - - ((prefix) (buildit strings '())) - - ((suffix) - (cons (car strings) (buildit (cdr strings) (list delim)))) - - (else (error "Illegal join grammar" - grammar string-join))))) - - ((not (null? strings)) - (error "STRINGS parameter not list." strings string-join)) - - ;; STRINGS is () - - ((eq? grammar 'strict-infix) - (error "Empty list cannot be joined with STRICT-INFIX grammar." - string-join)) - - (else ""))))) ; Special-cased for infix grammar. - - -;;; Porting & performance-tuning notes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; See the section at the beginning of this file on external dependencies. -;;; -;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro. -;;; There are many, many optional arguments in this library; the complexity -;;; of parsing, defaulting & type-testing these parameters is handled with the -;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can -;;; rewrite the uses, port the hairy macro definition (which is implemented -;;; using a Clinger-Rees low-level explicit-renaming macro system), or port -;;; the simple, high-level definition, which is less efficient. -;;; -;;; There is a fair amount of argument checking. This is, strictly speaking, -;;; unnecessary -- the actual body of the procedures will blow up if, say, a -;;; START/END index is improper. However, the error message will not be as -;;; good as if the error were caught at the "higher level." Also, a very, very -;;; smart Scheme compiler may be able to exploit having the type checks done -;;; early, so that the actual body of the procedures can assume proper values. -;;; This isn't likely; this kind of compiler technology isn't common any -;;; longer. -;;; -;;; The overhead of optional-argument parsing is irritating. The optional -;;; arguments must be consed into a rest list on entry, and then parsed out. -;;; Function call should be a matter of a few register moves and a jump; it -;;; should not involve heap allocation! Your Scheme system may have a superior -;;; non-R5RS optional-argument system that can eliminate this overhead. If so, -;;; then this is a prime candidate for optimising these procedures, -;;; *especially* the many optional START/END index parameters. -;;; -;;; Note that optional arguments are also a barrier to procedure integration. -;;; If your Scheme system permits you to specify alternate entry points -;;; for a call when the number of optional arguments is known in a manner -;;; that enables inlining/integration, this can provide performance -;;; improvements. -;;; -;;; There is enough *explicit* error checking that *all* string-index -;;; operations should *never* produce a bounds error. Period. Feel like -;;; living dangerously? *Big* performance win to be had by replacing -;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops. -;;; Similarly, fixnum-specific operators can speed up the arithmetic done on -;;; the index values in the inner loops. The only arguments that are not -;;; completely error checked are -;;; - string lists (complete checking requires time proportional to the -;;; length of the list) -;;; - procedure arguments, such as char->char maps & predicates. -;;; There is no way to check the range & domain of procedures in Scheme. -;;; Procedures that take these parameters cannot fully check their -;;; arguments. But all other types to all other procedures are fully -;;; checked. -;;; -;;; This does open up the alternate possibility of simply *removing* these -;;; checks, and letting the safe primitives raise the errors. On a dumb -;;; Scheme system, this would provide speed (by eliminating the redundant -;;; error checks) at the cost of error-message clarity. -;;; -;;; See the comments preceding the hash function code for notes on tuning -;;; the default bound so that the code never overflows your implementation's -;;; fixnum size into bignum calculation. -;;; -;;; In an interpreted Scheme, some of these procedures, or the internal -;;; routines with % prefixes, are excellent candidates for being rewritten -;;; in C. Consider STRING-HASH, %STRING-COMPARE, the -;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX & -;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED, -;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!. -;;; -;;; It would also be nice to have the ability to mark some of these -;;; routines as candidates for inlining/integration. -;;; -;;; All the %-prefixed routines in this source code are written -;;; to be called internally to this library. They do *not* perform -;;; friendly error checks on the inputs; they assume everything is -;;; proper. They also do not take optional arguments. These two properties -;;; save calling overhead and enable procedure integration -- but they -;;; are not appropriate for exported routines. - - -;;; Copyright details -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; The prefix/suffix and comparison routines in this code had (extremely -;;; distant) origins in MIT Scheme's string lib, and was substantially -;;; reworked by Olin Shivers (shivers@ai.mit.edu) 9/98. As such, it is -;;; covered by MIT Scheme's open source copyright. See below for details. -;;; -;;; The KMP string-search code was influenced by implementations written -;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this -;;; version was written from scratch by myself. -;;; -;;; The remainder of this code was written from scratch by myself for scsh. -;;; The scsh copyright is a BSD-style open source copyright. See below for -;;; details. -;;; -Olin Shivers - -;;; MIT Scheme copyright terms -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; This material was developed by the Scheme project at the Massachusetts -;;; Institute of Technology, Department of Electrical Engineering and -;;; Computer Science. Permission to copy and modify this software, to -;;; redistribute either the original software or a modified version, and -;;; to use this software for any purpose is granted, subject to the -;;; following restrictions and understandings. -;;; -;;; 1. Any copy made of this software must include this copyright notice -;;; in full. -;;; -;;; 2. Users of this software agree to make their best efforts (a) to -;;; return to the MIT Scheme project any improvements or extensions that -;;; they make, so that these may be included in future releases; and (b) -;;; to inform MIT of noteworthy uses of this software. -;;; -;;; 3. All materials developed as a consequence of the use of this -;;; software shall duly acknowledge such use, in accordance with the usual -;;; standards of acknowledging credit in academic research. -;;; -;;; 4. MIT has made no warrantee or representation that the operation of -;;; this software will be error-free, and MIT is under no obligation to -;;; provide any services, by way of maintenance, update, or otherwise. -;;; -;;; 5. In conjunction with products arising from the use of this material, -;;; there shall be no use of the name of the Massachusetts Institute of -;;; Technology nor of any adaptation thereof in any advertising, -;;; promotional, or sales literature without prior written consent from -;;; MIT in each case. - -;;; Scsh copyright terms -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; All rights reserved. -;;; -;;; Redistribution and use in source and binary forms, with or without -;;; modification, are permitted provided that the following conditions -;;; are met: -;;; 1. Redistributions of source code must retain the above copyright -;;; notice, this list of conditions and the following disclaimer. -;;; 2. Redistributions in binary form must reproduce the above copyright -;;; notice, this list of conditions and the following disclaimer in the -;;; documentation and/or other materials provided with the distribution. -;;; 3. The name of the authors may not be used to endorse or promote products -;;; derived from this software without specific prior written permission. -;;; -;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR -;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, -;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/scsh/lib/string-pack.scm b/scsh/lib/string-pack.scm deleted file mode 100644 index b59beeb..0000000 --- a/scsh/lib/string-pack.scm +++ /dev/null @@ -1,315 +0,0 @@ -;;; string-lib -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-map string-map! -;;; string-fold string-fold-right -;;; string-unfold string-tabulate -;;; string-for-each string-iter -;;; string-every string-any -;;; string-compare string-compare-ci -;;; substring-compare substring-compare-ci -;;; string= string< string> string<= string>= string<> -;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> -;;; substring= substring<> substring-ci= substring-ci<> -;;; substring< substring> substring-ci< substring-ci> -;;; substring<= substring>= substring-ci<= substring-ci>= -;;; string-upper-case? string-lower-case? -;;; capitalize-string capitalize-words string-downcase string-upcase -;;; capitalize-string! capitalize-words! string-downcase! string-upcase! -;;; string-take string-drop -;;; string-pad string-pad-right -;;; string-trim string-trim-right string-trim-both -;;; string-filter string-delete -;;; string-index string-index-right string-skip string-skip-right -;;; string-prefix-count string-prefix-count-ci -;;; string-suffix-count string-suffix-count-ci -;;; substring-prefix-count substring-prefix-count-ci -;;; substring-suffix-count substring-suffix-count-ci -;;; string-prefix? string-prefix-ci? -;;; string-suffix? string-suffix-ci? -;;; substring-prefix? substring-prefix-ci? -;;; substring-suffix? substring-suffix-ci? -;;; substring? substring-ci? -;;; string-fill! string-copy! string-copy substring -;;; string-reverse string-reverse! reverse-list->string -;;; string->list -;;; string-concat string-concat/shared string-append/shared -;;; xsubstring string-xcopy! -;;; string-null? -;;; join-strings -;;; -;;; string? make-string string string-length string-ref string-set! -;;; string-append list->string - -(define-interface string-lib-interface - (export - ;; string-map proc s [start end] -> s - (string-map (proc ((proc (:char) :char) - :string - &opt :exact-integer :exact-integer) - :string)) - - ;; string-map! proc s [start end] -> unspecific - (string-map! (proc ((proc (:char) :values) - :string - &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-fold kons knil s [start end] -> value - ;; string-fold-right kons knil s [start end] -> value - ((string-fold string-fold-right) - (proc ((proc (:char :value) :value) - :value :string - &opt :exact-integer :exact-integer) - :value)) - - ;; string-unfold p f g seed -> string - (string-unfold (proc ((proc (:value) :boolean) - (proc (:value) :char) - (proc (:value) :value) - :value) - :string)) - -; Enough is enough. -; ;; string-unfoldn p f g seed ... -> string -; (string-unfoldn (proc ((procedure :values :boolean) -; (procedure :values :char) -; (procedure :values :values) -; &rest :value) -; :string)) - - ;; string-tabulate proc len -> string - (string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer) - :string)) - - ;; string-for-each proc s [start end] -> unspecific - ;; string-iter proc s [start end] -> unspecific - ((string-for-each string-iter) - (proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-every pred s [start end] - ;; string-any pred s [start end] - (string-every - (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) - :boolean)) - (string-any - (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) - :value)) - - ;; string-compare string1 string2 lt-proc eq-proc gt-proc - ;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc - ((string-compare string-compare-ci) - (proc (:string :string (proc (:exact-integer) :values) - (proc (:exact-integer) :values) - (proc (:exact-integer) :values)) - :values)) - - ;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt - ;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt - ((substring-compare substring-compare-ci) - (proc (:string :exact-integer :exact-integer - :string :exact-integer :exact-integer - (proc (:exact-integer) :values) - (proc (:exact-integer) :values) - (proc (:exact-integer) :values)) - :values)) - - ;; string< string1 string2 - ((string= string< string> string<= string>= string<> - string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>) - (proc (&rest :string) :value)) - - ;; substring< string1 start1 end1 string2 start2 end2 - ((substring= substring<> substring-ci= substring-ci<> - substring< substring> substring-ci< substring-ci> - substring<= substring>= substring-ci<= substring-ci>=) - (proc (:string :exact-integer :exact-integer - :string :exact-integer :exact-integer) - :value)) - - ;; string-upper-case? string [start end] - ;; string-lower-case? string [start end] - ((string-upper-case? string-lower-case?) - (proc (:string &opt :exact-integer :exact-integer) :boolean)) - - ;; capitalize-string string [start end] - ;; capitalize-words string [start end] - ;; string-downcase string [start end] - ;; string-upcase string [start end] - ;; capitalize-string! string [start end] - ;; capitalize-words! string [start end] - ;; string-downcase! string [start end] - ;; string-upcase! string [start end] - ((capitalize-string capitalize-words string-downcase string-upcase) - (proc (:string &opt :exact-integer :exact-integer) :string)) - ((capitalize-string! capitalize-words! string-downcase! string-upcase!) - (proc (:string &opt :exact-integer :exact-integer) :unspecific)) - - ;; string-take string nchars - ;; string-drop string nchars - ((string-take string-drop) (proc (:string :exact-integer) :string)) - - ;; string-pad string k [char start end] - ;; string-pad-right string k [char start end] - ((string-pad string-pad-right) - (proc (:string :exact-integer &opt :char :exact-integer :exact-integer) - :string)) - - ;; string-trim string [char/char-set/pred start end] - ;; string-trim-right string [char/char-set/pred start end] - ;; string-trim-both string [char/char-set/pred start end] - ((string-trim string-trim-right string-trim-both) - (proc (:string &opt :value :exact-integer :exact-integer) - :string)) - - ;; string-filter char/char-set/pred string [start end] - ;; string-delete char/char-set/pred string [start end] - ((string-filter string-delete) - (proc (:value :string &opt :exact-integer :exact-integer) :string)) - - ;; string-index string char/char-set/pred [start end] - ;; string-index-right string char/char-set/pred [end start] - ;; string-skip string char/char-set/pred [start end] - ;; string-skip-right string char/char-set/pred [end start] - ((string-index string-index-right string-skip string-skip-right) - (proc (:string :value &opt :exact-integer :exact-integer) - :value)) - - ;; string-prefix-count string1 string2 - ;; string-suffix-count string1 string2 - ;; string-prefix-count-ci string1 string2 - ;; string-suffix-count-ci string1 string2 - ((string-prefix-count string-prefix-count-ci - string-suffix-count string-suffix-count-ci) - (proc (:string :string) :exact-integer)) - - ;; substring-prefix-count string1 start1 end1 string2 start2 end2 - ;; substring-suffix-count string1 start1 end1 string2 start2 end2 - ;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2 - ;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2 - ((substring-prefix-count substring-prefix-count-ci - substring-suffix-count substring-suffix-count-ci) - (proc (:string :exact-integer :exact-integer - :string :exact-integer :exact-integer) - :exact-integer)) - - - ;; string-prefix? string1 string2 - ;; string-suffix? string1 string2 - ;; string-prefix-ci? string1 string2 - ;; string-suffix-ci? string1 string2 - ((string-prefix? string-prefix-ci? - string-suffix? string-suffix-ci?) - (proc (:string :string) :boolean)) - - ;; substring-prefix? string1 start1 end1 string2 start2 end2 - ;; substring-suffix? string1 start1 end1 string2 start2 end2 - ;; substring-prefix-ci? string1 start1 end1 string2 start2 end2 - ;; substring-suffix-ci? string1 start1 end1 string2 start2 end2 - ((substring-prefix? substring-prefix-ci? - substring-suffix? substring-suffix-ci?) - (proc (:string :exact-integer :exact-integer - :string :exact-integer :exact-integer) - :boolean)) - - ;; substring? pattern string [start end] - ;; substring-ci? pattern string [start end] - ((substring? substring-ci?) - (proc (:string :string &opt :exact-integer :exact-integer) - :value)) - - ;; string-fill! string char [start end] - (string-fill! (proc (:string :char &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-copy! to tstart from [fstart fend] - (string-copy! (proc (:string :exact-integer :string - &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-copy s [start end] -> string - ;; substring s start [end] -> string - (string-copy (proc (:string &opt :exact-integer :exact-integer) :string)) - (substring (proc (:string :exact-integer &opt :exact-integer) :string)) - - ;; string-reverse s [start end] - ;; string-reverse! s [start end] - (string-reverse (proc (:string &opt :exact-integer :exact-integer) :string)) - (string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific)) - - ;; reverse-list->string char-list - ;; string->list s [start end] - ;; string-concat string-list - ;; string-concat/shared string-list - ;; string-append/shared s ... - (reverse-list->string (proc (:value) :string)) - (string->list (proc (:string &opt :exact-integer :exact-integer) :value)) - ((string-concat string-concat/shared) (proc (:value) :string)) - (string-append/shared (proc (&rest :string) :string)) - - ;; xsubstring s from [to start end] - ;; string-xcopy! target tstart s from [to start end] - (xsubstring (proc (:string :exact-integer &opt - :exact-integer :exact-integer :exact-integer) - :string)) - (string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt - :exact-integer :exact-integer :exact-integer) - :unspecific)) - - ;; string-null? s - (string-null? (proc (:string) :boolean)) - - (join-strings (proc (:value &opt :string :symbol) :string)) - - ;; Here are the R4RS procs - (string? (proc (:value) :boolean)) - (make-string (proc (:exact-integer &opt :char) :string)) - (string (proc (&rest :char) :string)) - (string-length (proc (:string) :exact-integer)) - (string-ref (proc (:string :exact-integer) :char)) - (string-set! (proc (:string :exact-integer :char) :unspecific)) - - ; Not provided by string-lib. - ;((string=? string-ci=? string? string-ci>? string<=? string-ci<=? - ; string>=? string-ci>=?) (proc (:string :string) :boolean)) - - ;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!, - ;; and STRING->LIST. The string-lib types are different -- extended. - ;(substring (proc (:string :exact-integer :exact-integer) :string)) - ;(string-copy (proc (:string) :string)) - ;(string-fill! (proc (:string :char) :unspecific)) - ;(string->list (proc (:string) :value)) - - (string-append (proc (&rest :string) :string)) - (list->string (proc (:value) :string)) - )) - - -;;; make-kmp-restart-vector -;;; parse-final-start+end -;;; parse-start+end -;;; check-substring-spec - -(define-interface string-lib-internals-interface - (export - (parse-final-start+end (proc ((procedure :values :values) :string :value) - (some-values :exact-integer :exact-integer))) - (parse-start+end (proc ((procedure :values :values) :string :value) - (some-values :exact-integer :exact-integer :value))) - (check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer) - :unspecific)) - (make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean)) - :vector)))) - - -(define-structures ((string-lib string-lib-interface) - (string-lib-internals string-lib-internals-interface)) - (access scheme) ; Get at R5RS SUBSTRING - (open receiving ; RECEIVE - char-set-package; Various - error-package ; ERROR - let-opt ; LET-OPTIONALS :OPTIONAL - structure-refs ; STRUCTURE-REF - scheme) - (files string-lib)) diff --git a/scsh/lib/string-package.scm b/scsh/lib/string-package.scm deleted file mode 100644 index e29dc50..0000000 --- a/scsh/lib/string-package.scm +++ /dev/null @@ -1,350 +0,0 @@ -;;; Complete interface spec for the SRFI-13 string-lib and -*- Scheme -*- -;;; string-lib-internals libraries in the Scheme48 interface -;;; and module language. The interfaces are fully typed, in -;;; the Scheme48 type notation. The structure definitions also -;;; provide a formal description of the external dependencies -;;; of the source code. - -;;; string-lib -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-map string-map! -;;; string-fold string-unfold -;;; string-fold-right string-unfold-right -;;; string-tabulate string-for-each string-for-each-index -;;; string-every string-any -;;; string-hash string-hash-ci -;;; string-compare string-compare-ci -;;; string= string< string> string<= string>= string<> -;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> -;;; string-downcase string-upcase string-titlecase -;;; string-downcase! string-upcase! string-titlecase! -;;; string-take string-take-right -;;; string-drop string-drop-right -;;; string-pad string-pad-right -;;; string-trim string-trim-right string-trim-both -;;; string-filter string-delete -;;; string-index string-index-right -;;; string-skip string-skip-right -;;; string-count -;;; string-prefix-length string-prefix-length-ci -;;; string-suffix-length string-suffix-length-ci -;;; string-prefix? string-prefix-ci? -;;; string-suffix? string-suffix-ci? -;;; string-contains string-contains-ci -;;; string-fill! string-copy! -;;; string-copy substring/shared -;;; string-reverse string-reverse! reverse-list->string -;;; string->list -;;; string-concatenate string-concatenate/shared -;;; string-concatenate-reverse string-concatenate-reverse/shared -;;; string-append/shared -;;; xsubstring string-xcopy! -;;; string-null? -;;; string-join -;;; string-tokenize -;;; string-replace -;;; -;;; string? make-string string string-length string-ref string-set! -;;; string-append list->string -;;; -;;; make-kmp-restart-vector string-kmp-partial-search kmp-step -;;; string-parse-start+end -;;; string-parse-final-start+end -;;; let-string-start+end -;;; check-substring-spec -;;; substring-spec-ok? - -(define-interface string-lib-interface - (export - ;; string-map proc s [start end] -> s - (string-map (proc ((proc (:char) :char) - :string - &opt :exact-integer :exact-integer) - :string)) - - ;; string-map! proc s [start end] -> unspecific - (string-map! (proc ((proc (:char) :values) - :string - &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-fold kons knil s [start end] -> value - ;; string-fold-right kons knil s [start end] -> value - ((string-fold string-fold-right) - (proc ((proc (:char :value) :value) - :value :string - &opt :exact-integer :exact-integer) - :value)) - - ;; string-unfold p f g seed [base make-final] -> string - ;; string-unfold-right p f g seed [base make-final] -> string - ((string-unfold string-unfold) - (proc ((proc (:value) :boolean) - (proc (:value) :char) - (proc (:value) :value) - :value - &opt :string (proc (:value) :string)) - :string)) - -; Enough is enough. -; ;; string-unfoldn p f g seed ... -> string -; (string-unfoldn (proc ((procedure :values :boolean) -; (procedure :values :char) -; (procedure :values :values) -; &rest :value) -; :string)) - - ;; string-tabulate proc len -> string - (string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer) - :string)) - - ;; string-for-each proc s [start end] -> unspecific - ;; string-for-each-index proc s [start end] -> unspecific - ((string-for-each string-for-each-index) - (proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-every pred s [start end] - ;; string-any pred s [start end] - (string-every - (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) - :boolean)) - (string-any - (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) - :value)) - - ;; string-hash s [bound start end] - ;; string-hash-ci s [bound start end] - ((string-hash string-hash-ci) - (proc (:string &opt :exact-integer :exact-integer :exact-integer) - :exact-integer)) - - ;; string-compare string1 string2 lt-proc eq-proc gt-proc [start end] - ;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc [start end] - ((string-compare string-compare-ci) - (proc (:string :string (proc (:exact-integer) :values) - (proc (:exact-integer) :values) - (proc (:exact-integer) :values) - &opt :exact-integer :exact-integer) - :values)) - - ;; string< string1 string2 [start1 end1 start2 end2] - ((string= string< string> string<= string>= string<> - string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>) - (proc (:string :string &opt :exact-integer :exact-integer - :exact-integer :exact-integer) - :boolean)) - - ;; string-titlecase string [start end] - ;; string-upcase string [start end] - ;; string-downcase string [start end] - ;; string-titlecase! string [start end] - ;; string-upcase! string [start end] - ;; string-downcase! string [start end] - ((string-titlecase string-upcase string-downcase) - (proc (:string &opt :exact-integer :exact-integer) :string)) - ((string-titlecase! string-upcase! string-downcase!) - (proc (:string &opt :exact-integer :exact-integer) :unspecific)) - - ;; string-take string nchars - ;; string-drop string nchars - ;; string-take-right string nchars - ;; string-drop-right string nchars - ((string-take string-drop string-take-right string-drop-right) - (proc (:string :exact-integer) :string)) - - ;; string-pad string k [char start end] - ;; string-pad-right string k [char start end] - ((string-pad string-pad-right) - (proc (:string :exact-integer &opt :char :exact-integer :exact-integer) - :string)) - - ;; string-trim string [char/char-set/pred start end] - ;; string-trim-right string [char/char-set/pred start end] - ;; string-trim-both string [char/char-set/pred start end] - ((string-trim string-trim-right string-trim-both) - (proc (:string &opt :value :exact-integer :exact-integer) - :string)) - - ;; string-filter char/char-set/pred string [start end] - ;; string-delete char/char-set/pred string [start end] - ((string-filter string-delete) - (proc (:value :string &opt :exact-integer :exact-integer) :string)) - - ;; string-index string char/char-set/pred [start end] - ;; string-index-right string char/char-set/pred [end start] - ;; string-skip string char/char-set/pred [start end] - ;; string-skip-right string char/char-set/pred [end start] - ((string-index string-index-right string-skip string-skip-right) - (proc (:string :value &opt :exact-integer :exact-integer) - :value)) - - ;; string-count string char/char-set/pred [start end] - (string-count (proc (:string :value &opt :exact-integer :exact-integer) - :exact-integer)) - - ;; string-prefix-length string1 string2 [start1 end1 start2 end2] - ;; string-suffix-length string1 string2 [start1 end1 start2 end2] - ;; string-prefix-length-ci string1 string2 [start1 end1 start2 end2] - ;; string-suffix-length-ci string1 string2 [start1 end1 start2 end2] - ((string-prefix-length string-prefix-length-ci - string-suffix-length string-suffix-length-ci) - (proc (:string :string &opt - :exact-integer :exact-integer :exact-integer :exact-integer) - :exact-integer)) - - ;; string-prefix? string1 string2 [start1 end1 start2 end2] - ;; string-suffix? string1 string2 [start1 end1 start2 end2] - ;; string-prefix-ci? string1 string2 [start1 end1 start2 end2] - ;; string-suffix-ci? string1 string2 [start1 end1 start2 end2] - ((string-prefix? string-prefix-ci? - string-suffix? string-suffix-ci?) - (proc (:string :string &opt - :exact-integer :exact-integer :exact-integer :exact-integer) - :boolean)) - - ;; string-contains string pattern [s-start s-end p-start p-end] - ;; string-contains-ci string pattern [s-start s-end p-start p-end] - ((string-contains string-contains-ci) - (proc (:string :string &opt :exact-integer :exact-integer - :exact-integer :exact-integer) - :value)) - - ;; string-fill! string char [start end] - (string-fill! (proc (:string :char &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-copy! to tstart from [fstart fend] - (string-copy! (proc (:string :exact-integer :string - &opt :exact-integer :exact-integer) - :unspecific)) - - ;; string-copy s [start end] -> string - ;; substring/shared s start [end] -> string - (string-copy (proc (:string &opt :exact-integer :exact-integer) :string)) - (substring/shared (proc (:string :exact-integer &opt :exact-integer) :string)) - - ;; string-reverse s [start end] - ;; string-reverse! s [start end] - (string-reverse (proc (:string &opt :exact-integer :exact-integer) :string)) - (string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific)) - - ;; reverse-list->string char-list - ;; string->list s [start end] - ;; string-concatenate string-list - ;; string-concatenate/shared string-list - ;; string-append/shared s ... - (reverse-list->string (proc (:value) :string)) - (string->list (proc (:string &opt :exact-integer :exact-integer) :value)) - ((string-concatenate string-concatenate/shared) (proc (:value) :string)) - (string-append/shared (proc (&rest :string) :string)) - - ;; string-concatenate-reverse string-list [final-string end] - ;; string-concatenate-reverse/shared string-list [final-string end] - ((string-concatenate-reverse string-concatenate-reverse/shared) - (proc (:value &opt :string :exact-integer) :string)) - - ;; xsubstring s from [to start end] - ;; string-xcopy! target tstart s from [to start end] - (xsubstring (proc (:string :exact-integer &opt - :exact-integer :exact-integer :exact-integer) - :string)) - (string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt - :exact-integer :exact-integer :exact-integer) - :unspecific)) - - ;; string-null? s - (string-null? (proc (:string) :boolean)) - - ;; string-join string-list [delim grammar] - (string-join (proc (:value &opt :string :symbol) :string)) - - ;; string-tokenize string [token-chars start end] - (string-tokenize (proc (:string &opt :value :exact-integer :exact-integer) - :value)) - - ;; string-replace s1 s2 start1 end1 [start2 end2] - (string-replace (proc (:string :string :exact-integer :exact-integer - &opt :exact-integer :exact-integer) - :string)) - - ;; Here are the R4RS/R5RS procs - (string? (proc (:value) :boolean)) - (make-string (proc (:exact-integer &opt :char) :string)) - (string (proc (&rest :char) :string)) - (string-length (proc (:string) :exact-integer)) - (string-ref (proc (:string :exact-integer) :char)) - (string-set! (proc (:string :exact-integer :char) :unspecific)) - (string-append (proc (&rest :string) :string)) - (list->string (proc (:value) :string)) - - ;; These are the R4RS types for STRING-COPY, STRING-FILL!, and - ;; STRING->LIST. The string-lib types are different -- extended. - ;(string-copy (proc (:string) :string)) - ;(string-fill! (proc (:string :char) :unspecific)) - ;(string->list (proc (:string) :value)) - - )) - - -;;; make-kmp-restart-vector -;;; string-kmp-partial-search -;;; kmp-step -;;; string-parse-start+end -;;; string-parse-final-start+end -;;; let-string-start+end -;;; check-substring-spec -;;; substring-spec-ok? - -(define-interface string-lib-internals-interface - (export - (let-string-start+end :syntax) - (string-parse-start+end (proc ((procedure :values :values) :string :value) - (some-values :exact-integer :exact-integer :value))) - (string-parse-final-start+end (proc ((procedure :values :values) :string :value) - (some-values :exact-integer :exact-integer))) - (check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer) - :unspecific)) - (substring-spec-ok? (proc ((procedure :values :values) :string :exact-integer :exact-integer) - :boolean)) - - ;; string-kmp-partial-search pat rv s i [c= p-start s-start s-end] -> integer - (string-kmp-partial-search (proc (:string :vector :string :exact-integer - &opt (proc (:char :char) :boolean) - :exact-integer :exact-integer :exact-integer) - :exact-integer)) - - ;; make-kmp-restart-vector s [c= start end] -> vector - (make-kmp-restart-vector (proc (:string &opt (proc (:char :char) :boolean) - :exact-integer :exact-integer) - :vector)) - - ;; kmp-step pat rv c i c= p-start -> integer - (kmp-step (proc (:string :vector :char :exact-integer - (proc (:char :char) :boolean) - :exact-integer) - :exact-integer)) - )) - - -(define-structures ((string-lib string-lib-interface) - (string-lib-internals string-lib-internals-interface)) - (access scheme) ; Get at R5RS SUBSTRING - (open receiving ; RECEIVE - char-set-lib ; Various - bitwise ; BITWISE-AND for hashing - error-package ; ERROR - let-opt ; LET-OPTIONALS* :OPTIONAL - scheme) - - ;; A few cheesy S48/scsh definitions for string-lib dependencies: - (begin (define (check-arg pred val caller) - (let lp ((val val)) - (if (pred val) val (lp (error "Bad argument" val pred caller))))) - - ;; These two internal procedures are correctly defined for ASCII or - ;; Latin-1. They are *not* correct for Unicode. - (define (char-cased? c) (char-set-contains? char-set:letter c)) - (define (char-titlecase c) (char-upcase c))) - - (files string-lib)) diff --git a/scsh/rdelim.scm b/scsh/rdelim.scm index 475862a..d7ae751 100644 --- a/scsh/rdelim.scm +++ b/scsh/rdelim.scm @@ -33,7 +33,7 @@ (let ((substr (lambda (s end) ; Smart substring. (if (= end (string-length s)) s (substring s 0 end)))) - (delims (->char-set delims)) + (delims (x->char-set delims)) (gobble? (not (eq? delim-action 'peek)))) ;; BUFLEN is total amount of buffer space allocated to date. (let lp ((strs '()) (buflen 80) (buf (make-string 80))) @@ -127,7 +127,7 @@ (if (char? last) (string-set! buf (+ start num-read) last)) (and (or (eof-object? last) - (char-set-contains? (->char-set delims) + (char-set-contains? (x->char-set delims) last)) (+ num-read 1))))))))) @@ -192,15 +192,14 @@ (start 0) (end (string-length buf))) - (let* ((delims (->char-set delims)) - (sdelims (char-set:s delims))) + (let ((delims (x->char-set delims))) (let lp ((start start) (total 0)) - (receive (terminator num-read) - (port-buffer-read-delimited delims buf gobble? port start end) - (if (not (eq? terminator 'port-buffer-exhausted)) - (values terminator (+ num-read total)) - (begin (peek-char port) ; kludge to fill the buffer - (lp (+ start num-read) (+ total num-read))))))))) + (receive (terminator num-read) + (port-buffer-read-delimited delims buf gobble? port start end) + (if (not (eq? terminator 'port-buffer-exhausted)) + (values terminator (+ num-read total)) + (begin (peek-char port) ; kludge to fill the buffer + (lp (+ start num-read) (+ total num-read))))))))) @@ -211,16 +210,15 @@ (define (skip-char-set skip-chars . maybe-port) - (let* ((port (:optional maybe-port (current-input-port))) - (cset (->char-set skip-chars)) - (scset (char-set:s cset))) + (let ((port (:optional maybe-port (current-input-port))) + (cset (x->char-set skip-chars))) (let lp ((total 0)) (receive (succ num-read) (buffer-skip-char-set cset port) - (if (not succ) - (+ total num-read) ; eof - (begin (peek-char port); kludge to fill the buffer - (lp (+ total num-read)))))))) + (if (not succ) + (+ total num-read) ; eof + (begin (peek-char port) ; kludge to fill the buffer + (lp (+ total num-read)))))))) (define (buffer-skip-char-set cset port) (let ((the-port-limit (port-limit port))) diff --git a/scsh/rx/loadem.scm b/scsh/rx/loadem.scm deleted file mode 100644 index e33dc74..0000000 --- a/scsh/rx/loadem.scm +++ /dev/null @@ -1,7 +0,0 @@ -;;; ,exec ,load loadem.scm - -(config '(load "packages2.scm")) -(config '(load "cond-package.scm")) -;(map load-package '(rx-lib re-basics re-low-exports re-high-tools -; sre-parser-package re-posix-parsers sre-syntax-tools -; rx-syntax)) diff --git a/scsh/rx/modules.scm b/scsh/rx/modules.scm deleted file mode 100644 index fdb2735..0000000 --- a/scsh/rx/modules.scm +++ /dev/null @@ -1,26 +0,0 @@ -(define-structure re-package (export) - (open scsh - formats - define-record-types ; re - defrec-package ; re - scsh-utilities ; - define-foreign-syntax ; re-low - weak ; re-low - let-opt ; re - sort ; posixstr - receiving ; all of them - scheme) - - (files "/usr/home/shivers/src/scm/conditionals.scm" - re - re-low - simp - re-high - parse - posixstr - spencer - ;re-syntax - ) - - (optimize auto-integrate) - ) diff --git a/scsh/rx/packages.scm b/scsh/rx/packages.scm index 50884e5..5e65c43 100644 --- a/scsh/rx/packages.scm +++ b/scsh/rx/packages.scm @@ -228,20 +228,20 @@ (open scsh-utilities defrec-package weak - ;re-posix-parsers ; regexp->posix-string + ;; re-posix-parsers ; regexp->posix-string let-opt - sort ; Posix renderer + sort ; Posix renderer conditionals define-record-types defrec-package receiving - char-set-lib + srfi-14 error-package ascii - primitives ; JMG add-finalizer! - define-record-types ; JMG debugging + primitives ; JMG add-finalizer! + define-record-types ; JMG debugging external-calls - string-lib ; string-fold + srfi-13 ; string-fold posix-regexps scheme) @@ -275,7 +275,7 @@ (open re-internals conditionals re-level-0 - char-set-lib + srfi-14 scsh-utilities ; fold error-package ascii @@ -291,7 +291,7 @@ (define-structure rx-syntax rx-syntax-interface (open re-level-0 - char-set-lib + srfi-14 rx-lib standard-char-sets scheme) @@ -332,7 +332,7 @@ posix-regexps scsh-utilities ; fold & some string utilities that need to be moved. scsh-level-0 ; write-string - string-lib ; string-copy! + srfi-13 ; string-copy! scheme) (files re-subst) ; (optimize auto-integrate) diff --git a/scsh/rx/regexp.scm b/scsh/rx/regexp.scm index f772df7..c394b45 100644 --- a/scsh/rx/regexp.scm +++ b/scsh/rx/regexp.scm @@ -107,7 +107,9 @@ (regexp-submatches? regexp) (regexp-newline? regexp)))) (error (if message - (string-append "Posix regexp: " message) + (string-append "Posix regexp (" + (regexp-pattern regexp) + ") : " message) "inconsistent results from Posix regexp compiler") regexp)))))) diff --git a/scsh/scsh-package.scm b/scsh/scsh-package.scm index 2cd2fc8..7f94e49 100644 --- a/scsh/scsh-package.scm +++ b/scsh/scsh-package.scm @@ -46,6 +46,11 @@ tables) (files weaktables)) +(define list-lib srfi-1) +(define string-lib srfi-13) +(define char-set-lib srfi-14) + + ;;; This guy goes into the FOR-SYNTAX part of scsh's syntax exports. (define-structure scsh-syntax-helpers (export transcribe-extended-process-form) @@ -125,7 +130,8 @@ scsh-sockets-interface ; new in 0.3 tty-interface ; new in 0.4 scsh-version-interface - char-set-interface + (interface-of char-set-lib) + (export ->char-set) signal-handler-interface ;; This stuff would probably be better off kept ;; in separate modules, but we'll toss it in for now. @@ -167,10 +173,9 @@ fluids thread-fluids weak-tables - scsh-char-set-low-level-lib ; rdelim.scm needs it. + srfi-14 ; scsh-regexp-package ; scsh-regexp-internals - char-set-lib scsh-version tty-flags scsh-internal-tty-flags ; Not exported @@ -184,7 +189,7 @@ re-level-0 rx-syntax - string-lib + srfi-13 thread-fluids ; For exec-path-list loopholes ; For my bogus CALL-TERMINALLY implementation. @@ -243,6 +248,9 @@ rdelim ) ; (optimize auto-integrate) + (begin + ;; work around for SRFI 14 naming fuckage + (define ->char-set x->char-set)) ) (define-structure defrec-package (export (define-record :syntax)) @@ -325,7 +333,11 @@ ; with-current-output-port exit scsh-level-0-internals ; set-command-line-args! init-scsh-vars threads - list-lib ; any + (subset srfi-1 (any)) + (subset srfi-14 (char-set + char-set-complement! + char-set-contains? + string->char-set)) root-scheduler ; scheme-exit-now scheme) (files top meta-arg)) @@ -335,7 +347,12 @@ (open receiving ; receive scsh-utilities ; deprecated-proc error-package ; error - string-lib ; string-join for obsolete join-strings + (subset srfi-13 (string-join)) + (subset srfi-14 (char-set? + char-set:whitespace + char-set + x->char-set + char-set-complement)) scsh-level-0 ; delimited readers ; scsh-regexp-package re-exports @@ -345,7 +362,7 @@ (files fr) ;; Handle a little bit of backwards compatibility. (begin (define join-strings (deprecated-proc string-join 'join-strings - "Use STRING-LIB STRING-JOIN."))) + "Use SRFI-13 STRING-JOIN."))) ) @@ -392,12 +409,10 @@ (export repl) awk-interface char-predicates-interface; Urk -- Some of this is R5RS! - obsolete-char-set-interface dot-locking-interface ) (open structure-refs - obsolete-char-set-lib scsh-level-0 scsh-level-0-internals re-exports @@ -421,6 +436,7 @@ receiving scsh ; Just need the delimited readers. features ; make-immutable! + (subset srfi-14 (char-set)) scheme) (files here))