diff --git a/scsh/fr.scm b/scsh/fr.scm index a50782f..452fb74 100644 --- a/scsh/fr.scm +++ b/scsh/fr.scm @@ -307,7 +307,7 @@ (let-optionals args ((delims default-record-delims) (elide? #f) (handle-delim 'trim)) - (let ((delims (->char-set delims))) + (let ((delims (x->char-set delims))) (case handle-delim ((trim) ; TRIM-delimiter reader. diff --git a/scsh/lib/ccp-pack.scm b/scsh/lib/ccp-pack.scm index 50f4d1c..13e6be0 100644 --- a/scsh/lib/ccp-pack.scm +++ b/scsh/lib/ccp-pack.scm @@ -93,13 +93,13 @@ )) (define-structure ccp-lib ccp-lib-interface - (open char-set-lib + (open srfi-14 ascii defrec-package - string-lib + srfi-13 let-opt receiving - list-lib ; EVERY + (subset srfi-1 (every fold)) error-package scheme) (files ccp) diff --git a/scsh/lib/char-package.scm b/scsh/lib/char-package.scm index 318aafd..47722cb 100644 --- a/scsh/lib/char-package.scm +++ b/scsh/lib/char-package.scm @@ -27,7 +27,7 @@ (define-structure char-predicates-lib char-predicates-interface (open error-package ; ERROR scsh-utilities ; DEPRECATED-PROC - char-set-lib + srfi-14 scheme) (begin diff --git a/scsh/lib/cset-lib.html b/scsh/lib/cset-lib.html deleted file mode 100644 index b7eb80f..0000000 --- a/scsh/lib/cset-lib.html +++ /dev/null @@ -1,2016 +0,0 @@ - - - - - -
- - -- -The ability to efficiently represent and manipulate sets of characters is an -unglamorous but very useful capability for text-processing code -- one that -tends to pop up in the definitions of other libraries. Hence it is useful to -specify a general substrate for this functionality early. This SRFI defines a -general library that provides this functionality. - -It is accompanied by a reference implementation for the spec. The reference -implementation is fairly efficient, straightforwardly portable, and has a -"free software" copyright. The implementation is tuned for "small" 7 or 8 -bit character types, such as ASCII or Latin-1; the data structures and -algorithms would have to be altered for larger 16 or 32 bit character types -such as Unicode -- however, the specs have been carefully designed with these -larger character types in mind. - -Several forthcoming SRFIs can be defined in terms of this one: -
read-line
)
- -Here is the complete set of bindings -- procedural and otherwise -- -exported by this library. In a Scheme system that has a module or package -system, these procedures should be contained in a module named "char-set-lib". - -
-char-set? char-set= char-set<= char-set-hash -- -
-char-set-cursor char-set-ref char-set-cursor-next end-of-char-set? -char-set-fold char-set-unfold char-set-unfold! -char-set-for-each char-set-map -- -
-char-set-copy char-set - -list->char-set string->char-set -list->char-set! string->char-set! - -char-set-filter ucs-range->char-set -char-set-filter! ucs-range->char-set! - -->char-set -- -
-char-set->list char-set->string -char-set-size char-set-count char-set-contains? -char-set-every char-set-any -- -
-char-set-adjoin char-set-delete -char-set-adjoin! char-set-delete! - -char-set-complement char-set-union char-set-intersection -char-set-complement! char-set-union! char-set-intersection! - -char-set-difference char-set-xor char-set-diff+intersection -char-set-difference! char-set-xor! char-set-diff+intersection! -- -
-char-set:lower-case char-set:upper-case char-set:title-case -char-set:letter char-set:digit char-set:letter+digit -char-set:graphic char-set:printing char-set:whitespace -char-set:iso-control char-set:punctuation char-set:symbol -char-set:hex-digit char-set:blank char-set:ascii -char-set:empty char-set:full -- -
-The ability to efficiently manipulate sets of characters is quite -useful for text-processing code. Encapsulating this functionality in -a general, efficiently implemented library can assist all such code. -This library defines a new data structure to represent these sets, called -a "char-set." The char-set type is distinct from all other types. - -
-This library is designed to be portable across implementations that use
-different character types and representations, especially ASCII, Latin-1
-and Unicode. Some effort has been made to preserve compatibility with Java
-in the Unicode case (see the definition of char-set:whitespace
for the
-single real deviation).
-
-
-
-The procedures of this SRFI, by default, are "pure functional" -- they do not -alter their parameters. However, this SRFI defines a set of "linear-update" -procedures which have a hybrid pure-functional/side-effecting semantics: they -are allowed, but not required, to side-effect one of their parameters in order -to construct their result. An implementation may legally implement these -procedures as pure, side-effect-free functions, or it may implement them using -side effects, depending upon the details of what is the most efficient or -simple to implement in terms of the underlying representation. - -
-The linear-update routines all have names ending with "!". - -
-Clients of these procedures may not rely upon these procedures working by -side effect. For example, this is not guaranteed to work: -
-(let* ((cs1 (char-set #\a #\b #\c)) ; cs1 = {a,b,c}. - (cs2 (char-set-adjoin! cs1 #\d))) ; Add d to {a,b,c}. - cs1) ; Could be either {a,b,c} or {a,b,c,d}. --
-However, this is well-defined: -
-(let ((cs (char-set #\a #\b #\c))) - (char-set-adjoin! cs #\d)) ; Add d to {a,b,c}. -- -
-So clients of these procedures write in a functional style, but must -additionally be sure that, when the procedure is called, there are no other -live pointers to the potentially-modified character set (hence the term -"linear update"). - -
-There are two benefits to this convention: -
-Note that pure functional representations are the right thing for -ASCII- or Latin-1-based Scheme implementations, since a char-set can -be represented in an ASCII Scheme with 4 32-bit words. Pure set-algebra -operations on such a representation are very fast and efficient. Programmers -who code using linear-update operations are guaranteed the system will -provide the best implementation across multiple platforms. - -
-In practice, these procedures are most useful for efficiently constructing -character sets in a side-effecting manner, in some limited local context, -before passing the character set outside the local construction scope to be -used in a functional manner. - -
-Scheme provides no assistance in checking the linearity of the potentially -side-effected parameters passed to these functions --- there's no linear -type checker or run-time mechanism for detecting violations. (But -sophisticated programming environments, such as DrScheme, might help.) - - -
-Users are cautioned that the R5RS predicates -
-char-alphabetic?
-char-numeric?
-char-whitespace?
-char-upper-case?
-char-lower-case?
-
--may or may not be in agreement with the SRFI 14 base character sets -
-char-set:letter
-char-set:digit
-char-set:whitespace
-char-set:upper-case
-char-set:lower-case
-
--Implementors are strongly encouraged to bring these predicates into -agreement with the base character sets of this SRFI; not to do so risks -major confusion. - - - -
-In the following procedure specifications: -
-Passing values to procedures with these parameters that do not satisfy these -types is an error. - -
-Unless otherwise noted in the specification of a procedure, procedures
-always return character sets that are distinct (from the point of view
-of the linear-update operations) from the parameter character sets. For
-example, char-set-adjoin
is guaranteed to provide a fresh character set,
-even if it is not given any character parameters.
-
-
-Parameters given in square brackets are optional. Unless otherwise noted in the -text describing the procedure, any prefix of these optional parameters may -be supplied, from zero arguments to the full list. When a procedure returns -multiple values, this is shown by listing the return values in square -brackets, as well. So, for example, the procedure with signature -
-halts? f [x init-store] -> [boolean integer] --would take one (f), two (f, x) -or three (f, x, init-store) input parameters, -and return two values, a boolean and an integer. - -
-A parameter followed by "...
" means zero-or-more elements.
-So the procedure with the signature
-
-sum-squares x ... -> number --takes zero or more arguments (x ...), -while the procedure with signature -
-spell-check doc dict1 dict2 ... -> string-list --takes two required parameters -(doc and dict1) -and zero or more optional parameters (dict2 ...). - - - -
char-set?
obj -> boolean
-char-set=
cs1 ... -> boolean
-- Boundary cases: -
-(char-set=) => true -(char-set= cs) => true -- -
- Rationale: transitive binary relations are generally extended to n-ary - relations in Scheme, which enables clearer, more concise code to be - written. While the zero-argument and one-argument cases will almost - certainly not arise in first-order uses of such relations, they may well - arise in higher-order cases or macro-generated code. - E.g., consider -
-(apply char-set= cset-list) --
- This is well-defined if the list is empty or a singleton list. Hence - we extend these relations to any number of arguments. Implementors - have reported actual uses of n-ary relations in higher-order cases - allowing for fewer than two arguments. The way of Scheme is to handle the - general case; we provide the fully general extension. -
- A counter-argument to this extension is that
- R5RS's
- transitive binary arithmetic relations
- (=
, <
, etc.)
- require at least two arguments, hence
- this decision is a break with the prior convention -- although it is
- at least one that is backwards-compatible.
-
-
-
char-set<=
cs1 ... -> boolean
--Boundary cases: -
-(char-set<=) => true -(char-set<= cs) => true --
-Rationale: See char-set=
for discussion of zero- and one-argument
-applications. Consider testing a list of char-sets for monotonicity
-with
-
-(apply char-set<= cset-list) -- - -
char-set-hash
cs [bound] -> integer
-- If bound is either zero or not given, the implementation may use - an implementation-specific default value, chosen to be as large as - is efficiently practical. For instance, the default range might be chosen - for a given implementation to map all strings into the range of - integers that can be represented with a single machine word. - - -
- Invariant: -
-(char-set= cs1 cs2) => (= (char-set-hash cs1 b) (char-set-hash cs2 b)) -- -
- A legal but nonetheless discouraged implementation: -
-(define (char-set-hash cs . maybe-bound) 1) -- -
- Rationale: allowing the user to specify an explicit bound simplifies user - code by removing the mod operation that typically accompanies every hash - computation, and also may allow the implementation of the hash function to - exploit a reduced range to efficiently compute the hash value. - E.g., for - small bounds, the hash function may be computed in a fashion such that - intermediate values never overflow into bignum integers, allowing the - implementor to provide a fixnum-specific "fast path" for computing the - common cases very rapidly. - -
char-set-cursor
cset -> cursor
-char-set-ref
cset cursor -> char
-char-set-cursor-next
cset cursor -> cursor
-end-of-char-set?
cursor -> boolean
-char-set-cursor
produces a new cursor for a given char set.
- The set element indexed by the cursor is fetched with
- char-set-ref
.
- A cursor index is incremented with char-set-cursor-next
;
- in this way, code can step through every character in a char set.
- Stepping a cursor "past the end" of a char set produces a cursor that
- answers true to end-of-char-set?
.
- It is an error to pass such a cursor to char-set-ref
or to
- char-set-cursor-next
.
-
-
- A cursor value may not be used in conjunction with a different character
- set; if it is passed to char-set-ref
or
- char-set-cursor-next
with
- a character set other than the one used to create it, the results and
- effects are undefined.
-
-
- Cursor values are not necessarily distinct from other types. - They may be - integers, linked lists, records, procedures or other values. This license - is granted to allow cursors to be very "lightweight" values suitable for - tight iteration, even in fairly simple implementations. - -
- Note that these primitives are necessary to export an iteration facility - for char sets to loop macros. - -
- Example: -
-(define cs (char-set #\G #\a #\T #\e #\c #\h)) - -;; Collect elts of CS into a list. -(let lp ((cur (char-set-cursor cs)) (ans '())) - (if (end-of-char-set? cur) ans - (lp (char-set-cursor-next cs cur) - (cons (char-set-ref cs cur) ans)))) - => (#\G #\T #\a #\c #\e #\h) - -;; Equivalently, using a list unfold (from SRFI 1): -(unfold-right end-of-char-set? - (curry char-set-ref cs) - (curry char-set-cursor-next cs) - (char-set-cursor cs)) - => (#\G #\T #\a #\c #\e #\h) -- -
- Rationale: Note that the cursor API's four functions "fit" the functional
- protocol used by the unfolders provided by the list, string and char-set
- SRFIs (see the example above). By way of contrast, here is a simpler,
- two-function API that was rejected for failing this criterion. Besides
- char-set-cursor
, it provided a single
- function that mapped a cursor and a character set to two values, the
- indexed character and the next cursor. If the cursor had exhausted the
- character set, then this function returned false instead of the character
- value, and another end-of-char-set cursor. In this way, the other three
- functions of the current API were combined together.
-
-
-
char-set-fold
kons knil cs -> object
--(char-set-fold kons (kons c knil) cs') --
- Examples: -
-;; CHAR-SET-MEMBERS -(lambda (cs) (char-set-fold cons '() cs)) - -;; CHAR-SET-SIZE -(lambda (cs) (char-set-fold (lambda (c i) (+ i 1)) 0 cs)) - -;; How many vowels in the char set? -(lambda (cs) - (char-set-fold (lambda (c i) (if (vowel? c) (+ i 1) i)) - 0 cs)) -- - -
char-set-unfold
f p g seed [base-cs] -> char-set
-char-set-unfold!
f p g seed base-cs -> char-set
-char-set-unfold!
adds the characters to base-cs in a
- linear-update -- it is allowed, but not required, to side-effect
- and use base-cs's storage to construct the result.
-- More precisely, the following definitions hold, ignoring the - optional-argument issues: - -
-(define (char-set-unfold p f g seed base-cs) - (char-set-unfold! p f g seed (char-set-copy base-cs))) - -(define (char-set-unfold! p f g seed base-cs) - (let lp ((seed seed) (cs base-cs)) - (if (p seed) cs ; P says we are done. - (lp (g seed) ; Loop on (G SEED). - (char-set-adjoin! cs (f seed)))))) ; Add (F SEED) to set. -- - (Note that the actual implementation may be more efficient.) - -
- Examples: -
-(port->char-set p) = (char-set-unfold eof-object? values - (lambda (x) (read-char p)) - (read-char p)) - -(list->char-set lis) = (char-set-unfold null? car cdr lis) -- -
char-set-for-each
proc cs -> unspecified
-
- Nothing at all is specified about the value returned by this procedure; it
- is not even required to be consistent from call to call. It is simply
- required to be a value (or values) that may be passed to a command
- continuation, e.g. as the value of an expression appearing as a
- non-terminal subform of a begin
expression.
- Note that in
- R5RS,
- this restricts the procedure to returning a single value;
- non-R5RS systems may not even provide this restriction.
-
-
-
char-set-map
proc cs -> char-set
-- Essentially lifts proc from a char->char procedure to a char-set -> - char-set procedure. - -
- Example: -
-(char-set-map char-downcase cset) --
char-set-copy
cs -> char-set
-
- A system that provides pure-functional implementations of the
- linear-operator suite could implement this procedure as the identity
- function -- so copies are not guaranteed to be distinct by eq?
.
-
-
-
char-set
char1 ... -> char-set
-list->char-set
char-list [base-cs] -> char-set
-list->char-set!
char-list base-cs -> char-set
-
- If character set base-cs is provided, the characters from char-list
- are added to it. list->char-set!
is allowed, but not required,
- to side-effect and reuse the storage in base-cs;
- list->char-set
produces a fresh character set.
-
-
-
string->char-set
s [base-cs] -> char-set
-string->char-set!
s base-cs -> char-set
-
- If character set base-cs is provided, the characters from s are added to
- it. string->char-set!
is allowed, but not required, to side-effect and
- reuse the storage in base-cs; string->char-set
produces a fresh character
- set.
-
-
-
char-set-filter
pred cs [base-cs] -> char-set
-char-set-filter!
pred cs base-cs -> char-set
-(pred c)
- returns true.
-
-
- If character set base-cs is provided, the characters specified
- by pred are added to it.
- char-set-filter!
is allowed, but not required,
- to side-effect and reuse the storage in base-cs;
- char-set-filter
produces a fresh character set.
-
-
- An implementation may not save away a reference to pred and
- invoke it after char-set-filter
or
- char-set-filter!
returns -- that is, "lazy,"
- on-demand implementations are not allowed, as pred may have
- external dependencies on mutable data or have other side-effects.
-
-
- Rationale: This procedure provides a means of converting a character
- predicate into its equivalent character set; the cs parameter
- allows the programmer to bound the predicate's domain. Programmers should
- be aware that filtering a character set such as char-set:full
- could be a very expensive operation in an implementation that provided an
- extremely large character type, such as 32-bit Unicode. An earlier draft
- of this library provided a simple predicate->char-set
- procedure, which was rejected in favor of char-set-filter
for
- this reason.
-
-
-
-
ucs-range->char-set
lower upper [error? base-cs] -> char-set
-ucs-range->char-set!
lower upper error? base-cs -> char-set
-- Returns a character set containing every character whose ISO/IEC 10646 - UCS-4 code lies in the half-open range [lower,upper). - -
- If character set base-cs is provided, the characters specified by the
- range are added to it. ucs-range->char-set!
is allowed, but not required,
- to side-effect and reuse the storage in base-cs;
- ucs-range->char-set
produces a fresh character set.
-
-
- Note that ASCII codes are a subset of the Latin-1 codes, which are in turn - a subset of the 16-bit Unicode codes, which are themselves a subset of the - 32-bit UCS-4 codes. We commit to a specific encoding in this routine, - regardless of the underlying representation of characters, so that client - code using this library will be portable. I.e., a conformant Scheme - implementation may use EBCDIC or SHIFT-JIS to encode characters; it must - simply map the UCS characters from the given range into the native - representation when possible, and report errors when not possible. - - -
->char-set
x -> char-set
-char-set-size
cs -> integer
-char-set-count
pred cs -> integer
-char-set->list
cs -> character-list
-char-set->string
cs -> string
-char-set-contains?
cs char -> boolean
-- The MIT Scheme character-set package called this procedure - char-set-member?, but the argument order isn't consistent with the name. - - -
char-set-every
pred cs -> boolean
-char-set-any
pred cs -> boolean
-char-set-every
procedure returns true if predicate pred
- returns true of every character in the character set cs.
- Likewise, char-set-any
applies pred to every character in
- character set cs, and returns the first true value it finds.
- If no character produces a true value, it returns false.
- The order in which these procedures sequence through the elements of
- cs is not specified.
-
-
- Note that if you need to determine the actual character on which a
- predicate returns true, use char-set-any
and arrange for the predicate
- to return the character parameter as its true value, e.g.
-
-(char-set-any (lambda (c) (and (char-upper-case? c) c)) - cs) --
char-set-adjoin
cs char1 ... -> char-set
-char-set-delete
cs char1 ... -> char-set
-char-set-adjoin!
cs char1 ... -> char-set
-char-set-delete!
cs char1 ... -> char-set
-char-set-complement
cs -> char-set
-char-set-union
cs1 ... -> char-set
-char-set-intersection
cs1 ... -> char-set
-char-set-difference
cs1 cs2 ... -> char-set
-char-set-xor
cs1 ... -> char-set
-char-set-diff+intersection
cs1 cs2 ... -> [char-set char-set]
-- Boundary cases: -
-(char-set-union) => char-set:empty -(char-set-intersection) => char-set:full -(char-set-xor) => char-set:empty -(char-set-difference cs) => cs -- -
- char-set-diff+intersection
returns both the difference and the
- intersection of the arguments -- it partitions its first parameter.
- It is equivalent to
-
-(values (char-set-difference cs1 cs2 ...) - (char-set-intersection cs1 (char-set-union cs2 ...))) -- but can be implemented more efficiently. - -
- Programmers should be aware that char-set-complement
could potentially
- be a very expensive operation in Scheme implementations that provide
- a very large character type, such as 32-bit Unicode. If this is a
- possibility, sets can be complimented with respect to a smaller
- universe using char-set-difference
.
-
-
-
-
char-set-complement!
cs -> char-set
-char-set-union!
cs1 cs2 ... -> char-set
-char-set-intersection!
cs1 cs2 ... -> char-set
-char-set-difference!
cs1 cs2 ... -> char-set
-char-set-xor!
cs1 cs2 ... -> char-set
-char-set-diff+intersection!
cs1 cs2 cs3 ... -> [char-set char-set]
-
- char-set-diff+intersection!
is allowed to side-effect both
- of its two required parameters, cs1
- and cs2.
-
-Several character sets are predefined for convenience: - - - - - - - - - - - - - - - - - - -
char-set:lower-case | Lower-case letters |
char-set:upper-case | Upper-case letters |
char-set:title-case | Title-case letters |
char-set:letter | Letters |
char-set:digit | Digits |
char-set:letter+digit | Letters and digits |
char-set:graphic | Printing characters except spaces |
char-set:printing | Printing characters including spaces |
char-set:whitespace | Whitespace characters |
char-set:iso-control | The ISO control characters |
char-set:punctuation | Punctuation characters |
char-set:symbol | Symbol characters |
char-set:hex-digit | A hexadecimal digit: 0-9, A-F, a-f |
char-set:blank | Blank characters -- horizontal whitespace |
char-set:ascii | All characters in the ASCII set. |
char-set:empty | Empty set |
char-set:full | All characters |
-Note that there may be characters in char-set:letter
that are neither upper or
-lower case---this might occur in implementations that use a character type
-richer than ASCII, such as Unicode. A "graphic character" is one that would
-put ink on your page. While the exact composition of these sets may vary
-depending upon the character type provided by the underlying Scheme system,
-here are the definitions for some of the sets in an ASCII implementation:
-
char-set:lower-case | a-z |
char-set:upper-case | A-Z |
char-set:letter | A-Z and a-z |
char-set:digit | 0123456789 |
char-set:punctuation | !"#%&'()*,-./:;?@[\]_{} |
char-set:symbol | $+<=>^`|~ |
char-set:whitespace | Space, newline, tab, form feed, |
vertical tab, carriage return | |
char-set:blank | Space and tab |
char-set:graphic | letter + digit + punctuation + symbol |
char-set:printing | graphic + whitespace |
char-set:iso-control | ASCII 0-31 and 127 |
-Note that the existence of the char-set:ascii
set implies that the underlying
-character set is required to be at least as rich as ASCII (including
-ASCII's control characters).
-
-
-Rationale: The name choices reflect a shift from the older "alphabetic/numeric" -terms found in -R5RS -and Posix to newer, Unicode-influenced "letter/digit" lexemes. - - -
-In Unicode Scheme implementations, the base character sets are compatible with -Java's Unicode specifications. For ASCII or Latin-1, we simply restrict the -Unicode set specifications to their first 128 or 256 codes, respectively. -Scheme implementations that are not based on ASCII, Latin-1 or Unicode should -attempt to preserve the sense or spirit of these definitions. - -
-The following descriptions frequently make reference to the "Unicode character -database." This is a file, available at URL -
--Each line contains a description of a Unicode character. The first -semicolon-delimited field of the line gives the hex value of the character's -code; the second field gives the name of the character, and the third field -gives a two-letter category. Other fields give simple 1-1 case-mappings for -the character and other information; see -
--for further description of the file's format. Note in particular the -two-letter category specified in the the third field, which is referenced -frequently in the descriptions below. - - -
-For Unicode, we follow Java's specification: a character is lowercase if -
-The lower-case ASCII characters are -
-Latin-1 adds another 33 lower-case characters to the ASCII set: -
00B5 | MICRO SIGN |
00DF | LATIN SMALL LETTER SHARP S |
00E0 | LATIN SMALL LETTER A WITH GRAVE |
00E1 | LATIN SMALL LETTER A WITH ACUTE |
00E2 | LATIN SMALL LETTER A WITH CIRCUMFLEX |
00E3 | LATIN SMALL LETTER A WITH TILDE |
00E4 | LATIN SMALL LETTER A WITH DIAERESIS |
00E5 | LATIN SMALL LETTER A WITH RING ABOVE |
00E6 | LATIN SMALL LETTER AE |
00E7 | LATIN SMALL LETTER C WITH CEDILLA |
00E8 | LATIN SMALL LETTER E WITH GRAVE |
00E9 | LATIN SMALL LETTER E WITH ACUTE |
00EA | LATIN SMALL LETTER E WITH CIRCUMFLEX |
00EB | LATIN SMALL LETTER E WITH DIAERESIS |
00EC | LATIN SMALL LETTER I WITH GRAVE |
00ED | LATIN SMALL LETTER I WITH ACUTE |
00EE | LATIN SMALL LETTER I WITH CIRCUMFLEX |
00EF | LATIN SMALL LETTER I WITH DIAERESIS |
00F0 | LATIN SMALL LETTER ETH |
00F1 | LATIN SMALL LETTER N WITH TILDE |
00F2 | LATIN SMALL LETTER O WITH GRAVE |
00F3 | LATIN SMALL LETTER O WITH ACUTE |
00F4 | LATIN SMALL LETTER O WITH CIRCUMFLEX |
00F5 | LATIN SMALL LETTER O WITH TILDE |
00F6 | LATIN SMALL LETTER O WITH DIAERESIS |
00F8 | LATIN SMALL LETTER O WITH STROKE |
00F9 | LATIN SMALL LETTER U WITH GRAVE |
00FA | LATIN SMALL LETTER U WITH ACUTE |
00FB | LATIN SMALL LETTER U WITH CIRCUMFLEX |
00FC | LATIN SMALL LETTER U WITH DIAERESIS |
00FD | LATIN SMALL LETTER Y WITH ACUTE |
00FE | LATIN SMALL LETTER THORN |
00FF | LATIN SMALL LETTER Y WITH DIAERESIS |
-Note that three of these have no corresponding Latin-1 upper-case character: -
00B5 | MICRO SIGN |
00DF | LATIN SMALL LETTER SHARP S |
00FF | LATIN SMALL LETTER Y WITH DIAERESIS |
-(The compatibility micro character uppercases to the non-Latin-1 Greek capital -mu; the German sharp s character uppercases to the pair of characters "SS," -and the capital y-with-diaeresis is non-Latin-1.) - -
-(Note that the Java spec for lowercase characters given at -
--is inconsistent. U+00B5 MICRO SIGN fulfills the requirements for a lower-case -character (as of Unicode 3.0), but is not given in the numeric list of -lower-case character codes.) - -
-(Note that the Java spec for isLowerCase()
given at
-
-gives three mutually inconsistent definitions of "lower case." The first is -the definition used in this SRFI. Following text says "A character is -considered to be lowercase if and only if it is specified to be lowercase by -the Unicode 2.0 standard (category Ll in the Unicode specification data -file)." The former spec excludes U+00AA FEMININE ORDINAL INDICATOR and -U+00BA MASCULINE ORDINAL INDICATOR; the later spec includes them. Finally, -the spec enumerates a list of characters in the Latin-1 subset; this list -excludes U+00B5 MICRO SIGN, which is included in both of the previous specs.) - - -
-For Unicode, we follow Java's specification: a character is uppercase if -
-The upper-case ASCII characters are -
-Latin-1 adds another 30 upper-case characters to the ASCII set: -
00C0 | LATIN CAPITAL LETTER A WITH GRAVE |
00C1 | LATIN CAPITAL LETTER A WITH ACUTE |
00C2 | LATIN CAPITAL LETTER A WITH CIRCUMFLEX |
00C3 | LATIN CAPITAL LETTER A WITH TILDE |
00C4 | LATIN CAPITAL LETTER A WITH DIAERESIS |
00C5 | LATIN CAPITAL LETTER A WITH RING ABOVE |
00C6 | LATIN CAPITAL LETTER AE |
00C7 | LATIN CAPITAL LETTER C WITH CEDILLA |
00C8 | LATIN CAPITAL LETTER E WITH GRAVE |
00C9 | LATIN CAPITAL LETTER E WITH ACUTE |
00CA | LATIN CAPITAL LETTER E WITH CIRCUMFLEX |
00CB | LATIN CAPITAL LETTER E WITH DIAERESIS |
00CC | LATIN CAPITAL LETTER I WITH GRAVE |
00CD | LATIN CAPITAL LETTER I WITH ACUTE |
00CE | LATIN CAPITAL LETTER I WITH CIRCUMFLEX |
00CF | LATIN CAPITAL LETTER I WITH DIAERESIS |
00D0 | LATIN CAPITAL LETTER ETH |
00D1 | LATIN CAPITAL LETTER N WITH TILDE |
00D2 | LATIN CAPITAL LETTER O WITH GRAVE |
00D3 | LATIN CAPITAL LETTER O WITH ACUTE |
00D4 | LATIN CAPITAL LETTER O WITH CIRCUMFLEX |
00D5 | LATIN CAPITAL LETTER O WITH TILDE |
00D6 | LATIN CAPITAL LETTER O WITH DIAERESIS |
00D8 | LATIN CAPITAL LETTER O WITH STROKE |
00D9 | LATIN CAPITAL LETTER U WITH GRAVE |
00DA | LATIN CAPITAL LETTER U WITH ACUTE |
00DB | LATIN CAPITAL LETTER U WITH CIRCUMFLEX |
00DC | LATIN CAPITAL LETTER U WITH DIAERESIS |
00DD | LATIN CAPITAL LETTER Y WITH ACUTE |
00DE | LATIN CAPITAL LETTER THORN |
-In Unicode, a character is titlecase if it has the category Lt in -the character attribute database. There are very few of these characters; -here is the entire 31-character list as of Unicode 3.0: -
01C5 | LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON - |
01C8 | LATIN CAPITAL LETTER L WITH SMALL LETTER J - |
01CB | LATIN CAPITAL LETTER N WITH SMALL LETTER J - |
01F2 | LATIN CAPITAL LETTER D WITH SMALL LETTER Z - |
1F88 | GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI - |
1F89 | GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI - |
1F8A | GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI - |
1F8B | GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI - |
1F8C | GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI - |
1F8D | GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI - |
1F8E | GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - |
1F8F | GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - |
1F98 | GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI - |
1F99 | GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI - |
1F9A | GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI - |
1F9B | GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI - |
1F9C | GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI - |
1F9D | GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI - |
1F9E | GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - |
1F9F | GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - |
1FA8 | GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI - |
1FA9 | GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI - |
1FAA | GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI - |
1FAB | GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI - |
1FAC | GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI - |
1FAD | GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI - |
1FAE | GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - |
1FAF | GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - |
1FBC | GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI - |
1FCC | GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI - |
1FFC | GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI - |
-There are no ASCII or Latin-1 titlecase characters. - - - -
-In Unicode, a letter is any character with one of the letter categories -(Lu, Ll, Lt, Lm, Lo) in the Unicode character database. - -
-There are 52 ASCII letters -
-There are 117 Latin-1 letters. These are the 115 characters that are
-members of the Latin-1 char-set:lower-case
and char-set:upper-case
sets,
-plus
-
00AA | FEMININE ORDINAL INDICATOR |
00BA | MASCULINE ORDINAL INDICATOR |
-(These two letters are considered lower-case by Unicode, but not by -Java or SRFI 14.) - - -
-In Unicode, a character is a digit if it has the category Nd in -the character attribute database. In Latin-1 and ASCII, the only -such characters are 0123456789. In Unicode, there are other digit -characters in other code blocks, such as Gujarati digits and Tibetan -digits. - - - -
-The only hex digits are 0123456789abcdefABCDEF. - - - -
-The union of char-set:letter
and char-set:digit.
-
-
-
-A graphic character is one that would put ink on paper. The ASCII and Latin-1 -graphic characters are the members of -
char-set:letter |
char-set:digit |
char-set:punctuation |
char-set:symbol |
-A printing character is one that would occupy space when printed, i.e.,
-a graphic character or a space character. char-set:printing
is the union
-of char-set:whitespace
and char-set:graphic.
-
-
-
-In Unicode, a whitespace character is either -
-There are 24 whitespace characters in Unicode 3.0: -
0009 | HORIZONTAL TABULATION | \t control-I |
000A | LINE FEED | \n control-J |
000B | VERTICAL TABULATION | \v control-K |
000C | FORM FEED | \f control-L |
000D | CARRIAGE RETURN | \r control-M |
0020 | SPACE | Zs |
00A0 | NO-BREAK SPACE | Zs |
1680 | OGHAM SPACE MARK | Zs |
2000 | EN QUAD | Zs |
2001 | EM QUAD | Zs |
2002 | EN SPACE | Zs |
2003 | EM SPACE | Zs |
2004 | THREE-PER-EM SPACE | Zs |
2005 | FOUR-PER-EM SPACE | Zs |
2006 | SIX-PER-EM SPACE | Zs |
2007 | FIGURE SPACE | Zs |
2008 | PUNCTUATION SPACE | Zs |
2009 | THIN SPACE | Zs |
200A | HAIR SPACE | Zs |
200B | ZERO WIDTH SPACE | Zs |
2028 | LINE SEPARATOR | Zl |
2029 | PARAGRAPH SEPARATOR | Zp |
202F | NARROW NO-BREAK SPACE | Zs |
3000 | IDEOGRAPHIC SPACE | Zs |
-The ASCII whitespace characters are the first six characters in the above list
--- line feed, horizontal tabulation, vertical tabulation, form feed, carriage
-return, and space. These are also exactly the characters recognised by the
-Posix isspace()
procedure. Latin-1 adds the no-break space.
-
-
-Note: Java's isWhitespace()
method is incompatible, including
-
0009 | HORIZONTAL TABULATION | (\t control-I) |
001C | FILE SEPARATOR | (control-\) |
001D | GROUP SEPARATOR | (control-]) |
001E | RECORD SEPARATOR | (control-^) |
001F | UNIT SEPARATOR | (control-_) |
-and excluding -
00A0 | NO-BREAK SPACE |
-Java's excluding the no-break space means that tokenizers can simply break
-character streams at "whitespace" boundaries. However, the exclusion introduces
-exceptions in other places, e.g. char-set:printing
is no longer simply the
-union of char-set:graphic
and char-set:whitespace.
-
-
-
-
-The ISO control characters are the Unicode/Latin-1 characters in the ranges -[U+0000,U+001F] and [U+007F,U+009F]. - -
-ASCII restricts this set to the characters in the range [U+0000,U+001F] -plus the character U+007F. - -
-Note that Unicode defines other control characters which do not belong to this
-set (hence the qualifying prefix "iso-" in the name). This restriction is
-compatible with the Java IsISOControl()
method.
-
-
-
-
-In Unicode, a punctuation character is any character that has one of the -punctuation categories in the Unicode character database (Pc, Pd, Ps, -Pe, Pi, Pf, or Po.) - -
-ASCII has 23 punctuation characters: -
-!"#%&'()*,-./:;?@[\]_{} --
-Latin-1 adds six more: -
00A1 | INVERTED EXCLAMATION MARK - |
00AB | LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - |
00AD | SOFT HYPHEN - |
00B7 | MIDDLE DOT - |
00BB | RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - |
00BF | INVERTED QUESTION MARK - |
-Note that the nine ASCII characters $+<=>^`|~
are not
-punctuation. They are "symbols."
-
-
-
-
-In Unicode, a symbol is any character that has one of the symbol categories -in the Unicode character database (Sm, Sc, Sk, or So). There -are nine ASCII symbol characters: -
-$+<=>^`|~ --
-Latin-1 adds 18 more: -
00A2 | CENT SIGN |
00A3 | POUND SIGN |
00A4 | CURRENCY SIGN |
00A5 | YEN SIGN |
00A6 | BROKEN BAR |
00A7 | SECTION SIGN |
00A8 | DIAERESIS |
00A9 | COPYRIGHT SIGN |
00AC | NOT SIGN |
00AE | REGISTERED SIGN |
00AF | MACRON |
00B0 | DEGREE SIGN |
00B1 | PLUS-MINUS SIGN |
00B4 | ACUTE ACCENT |
00B6 | PILCROW SIGN |
00B8 | CEDILLA |
00D7 | MULTIPLICATION SIGN |
00F7 | DIVISION SIGN |
-Blank chars are horizontal whitespace. In Unicode, a blank character is either -
-There are eighteen blank characters in Unicode 3.0: -
0009 | HORIZONTAL TABULATION | \t control-I |
0020 | SPACE | Zs |
00A0 | NO-BREAK SPACE | Zs |
1680 | OGHAM SPACE MARK | Zs |
2000 | EN QUAD | Zs |
2001 | EM QUAD | Zs |
2002 | EN SPACE | Zs |
2003 | EM SPACE | Zs |
2004 | THREE-PER-EM SPACE | Zs |
2005 | FOUR-PER-EM SPACE | Zs |
2006 | SIX-PER-EM SPACE | Zs |
2007 | FIGURE SPACE | Zs |
2008 | PUNCTUATION SPACE | Zs |
2009 | THIN SPACE | Zs |
200A | HAIR SPACE | Zs |
200B | ZERO WIDTH SPACE | Zs |
202F | NARROW NO-BREAK SPACE | Zs |
3000 | IDEOGRAPHIC SPACE | Zs |
-The ASCII blank characters are the first two characters above -- -horizontal tab and space. Latin-1 adds the no-break space. - -
-Java doesn't have the concept of "blank" characters, so there are no -compatibility issues. - - - -
-This SRFI comes with a reference implementation. It resides at: -
--I have placed this source on the Net with an unencumbered, "open" copyright. -Some of the code in the reference implementation bears a distant family -relation to the MIT Scheme implementation, and being derived from that code, -is covered by the MIT Scheme copyright (which is a generic BSD-style -open-source copyright -- see the source file for details). The remainder of -the code was written by myself for scsh or for this SRFI; I have placed this -code under the scsh copyright, which is also a generic BSD-style open-source -copyright. - -
-The code is written for portability and should be simple to port to -any Scheme. It has only the following deviations from R4RS, clearly -discussed in the comments: -
error
procedure;
- values
procedure for producing multiple return values;
- check-arg
procedure for argument checking;
- let-optionals*
and :optional
macros for for parsing, checking and defaulting
- optional arguments from rest lists;
- define-record-type
form;
- bitwise-and
for the hash function;
- %latin1->char
and %char->latin1
.
--The library is written for clarity and well-commented; the current source is -about 375 lines of source code and 375 lines of comments and white space. -It is also written for efficiency. Fast paths are provided for common cases. - -
-This is not to say that the implementation can't be tuned up for -a specific Scheme implementation. There are notes in comments addressing -ways implementors can tune the reference implementation for performance. - -
-In short, I've written the reference implementation to make it as painless -as possible for an implementor -- or a regular programmer -- to adopt this -library and get good results with it. - -
-The code uses a rather simple-minded, inefficient representation for -ASCII/Latin-1 char-sets -- a 256-character string. The character whose code is -i is in the set if s[i] = ASCII 1 (soh, or ^a); -not in the set if s[i] = ASCII 0 (nul). -A much faster and denser representation would be 16 or 32 bytes worth -of bit string. A portable implementation using bit sets awaits standards for -bitwise logical-ops and byte vectors. - -
-"Large" character types, such as Unicode, should use a sparse representation, -taking care that the Latin-1 subset continues to be represented with a -dense 32-byte bit set. - - - -
-The design of this library benefited greatly from the feedback provided during -the SRFI discussion phase. Among those contributing thoughtful commentary and -suggestions, both on the mailing list and by private discussion, were Paolo -Amoroso, Lars Arvestad, Alan Bawden, Jim Bender, Dan Bornstein, Per Bothner, -Will Clinger, Brian Denheyer, Kent Dybvig, Sergei Egorov, Marc Feeley, -Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Arthur A. Gleckler, Ben -Goetter, Sven Hartrumpf, Erik Hilsdale, Shiro Kawai, Richard Kelsey, Oleg -Kiselyov, Bengt Kleberg, Donovan Kolbly, Bruce Korb, Shriram Krishnamurthi, -Bruce Lewis, Tom Lord, Brad Lucier, Dave Mason, David Rush, Klaus Schilling, -Jonathan Sobel, Mike Sperber, Mikael Staldal, Vladimir Tsyshevsky, Donald -Welsh, and Mike Wilson. I am grateful to them for their assistance. - -
-I am also grateful the authors, implementors and documentors of all the -systems mentioned in the introduction. Aubrey Jaffer should be noted for his -work in producing Web-accessible versions of the R5RS spec, which was a -tremendous aid. - -
-This is not to imply that these individuals necessarily endorse the final -results, of course. - -
-During this document's long development period, great patience was exhibited -by Mike Sperber, who is the editor for the SRFI, and by Hillary Sullivan, -who is not. - - -
-Certain portions of this document -- the specific, marked segments of text -describing the R5RS procedures -- were adapted with permission from the R5RS -report. - -
-All other text is copyright (C) Olin Shivers (1998, 1999, 2000). -All Rights Reserved. - -
-This document and translations of it may be copied and furnished to others, -and derivative works that comment on or otherwise explain it or assist in its -implementation may be prepared, copied, published and distributed, in whole or -in part, without restriction of any kind, provided that the above copyright -notice and this paragraph are included on all such copies and derivative -works. However, this document itself may not be modified in any way, such as -by removing the copyright notice or references to the Scheme Request For -Implementation process or editors, except as needed for the purpose of -developing SRFIs in which case the procedures for copyrights defined in the -SRFI process must be followed, or as required to translate it into languages -other than English. - -
-The limited permissions granted above are perpetual and will not be revoked by -the authors or their successors or assigns. - -
-This document and the information contained herein is provided on an
-"as is" basis and the authors and the SRFI editors
-disclaim all warranties, express or implied, including but not limited to any
-warranty that the use of the information herein will not infringe any rights
-or any implied warranties of merchantability or fitness for a particular
-purpose.
-
-
-
-
diff --git a/scsh/lib/cset-lib.scm b/scsh/lib/cset-lib.scm
deleted file mode 100644
index 2effd4b..0000000
--- a/scsh/lib/cset-lib.scm
+++ /dev/null
@@ -1,804 +0,0 @@
-;;; SRFI-14 character-sets library -*- Scheme -*-
-;;;
-;;; - Ported from MIT Scheme runtime by Brian D. Carlstrom.
-;;; - Massively rehacked & extended by Olin Shivers 6/98.
-;;; - Massively redesigned and rehacked 5/2000 during SRFI process.
-;;; At this point, the code bears the following relationship to the
-;;; MIT Scheme code: "This is my grandfather's axe. My father replaced
-;;; the head, and I have replaced the handle." Nonetheless, we preserve
-;;; the MIT Scheme copyright:
-;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology
-;;; The MIT Scheme license is a "free software" license. See the end of
-;;; this file for the tedious details.
-
-;;; Exports:
-;;; char-set? char-set= char-set<=
-;;; char-set-hash
-;;; char-set-cursor char-set-ref char-set-cursor-next end-of-char-set?
-;;; char-set-fold char-set-unfold char-set-unfold!
-;;; char-set-for-each char-set-map
-;;; char-set-copy char-set
-;;;
-;;; list->char-set string->char-set
-;;; list->char-set! string->char-set!
-;;;
-;;; filterchar-set ucs-range->char-set ->char-set
-;;; filterchar-set! ucs-range->char-set!
-;;;
-;;; char-set->list char-set->string
-;;;
-;;; char-set-size char-set-count char-set-contains?
-;;; char-set-every char-set-any
-;;;
-;;; char-set-adjoin char-set-delete
-;;; char-set-adjoin! char-set-delete!
-;;;
-
-;;; char-set-complement char-set-union char-set-intersection
-;;; char-set-complement! char-set-union! char-set-intersection!
-;;;
-;;; char-set-difference char-set-xor char-set-diff+intersection
-;;; char-set-difference! char-set-xor! char-set-diff+intersection!
-;;;
-;;; char-set:lower-case char-set:upper-case char-set:title-case
-;;; char-set:letter char-set:digit char-set:letter+digit
-;;; char-set:graphic char-set:printing char-set:whitespace
-;;; char-set:iso-control char-set:punctuation char-set:symbol
-;;; char-set:hex-digit char-set:blank char-set:ascii
-;;; char-set:empty char-set:full
-
-;;; Imports
-;;; This code has the following non-R5RS dependencies:
-;;; - ERROR
-;;; - %LATIN1->CHAR %CHAR->LATIN1
-;;; - LET-OPTIONALS* and :OPTIONAL macros for parsing, checking & defaulting
-;;; optional arguments from rest lists.
-;;; - BITWISE-AND for CHAR-SET-HASH
-;;; - The SRFI-19 DEFINE-RECORD-TYPE record macro
-;;; - A simple CHECK-ARG procedure:
-;;; (lambda (pred val caller) (if (not (pred val)) (error val caller)))
-
-;;; This is simple code, not great code. Char sets are represented as 256-char
-;;; strings. If char I is ASCII/Latin-1 0, then it isn't in the set; if char I
-;;; is ASCII/Latin-1 1, then it is in the set.
-;;; - Should be rewritten to use bit strings or byte vecs.
-;;; - Is Latin-1 specific. Would certainly have to be rewritten for Unicode.
-
-;;; See the end of the file for porting and performance-tuning notes.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define-record-type :char-set
- (make-char-set s)
- char-set?
- (s char-set:s))
-
-
-(define (%string-copy s) (substring s 0 (string-length s)))
-
-;;; Parse, type-check & default a final optional BASE-CS parameter from
-;;; a rest argument. Return a *fresh copy* of the underlying string.
-;;; The default is the empty set. The PROC argument is to help us
-;;; generate informative error exceptions.
-
-(define (%default-base maybe-base proc)
- (if (pair? maybe-base)
- (let ((bcs (car maybe-base))
- (tail (cdr maybe-base)))
- (if (null? tail)
- (if (char-set? bcs) (%string-copy (char-set:s bcs))
- (error "BASE-CS parameter not a char-set" proc bcs))
- (error "Expected final base char set -- too many parameters"
- proc maybe-base)))
- (make-string 256 (%latin1->char 0))))
-
-;;; If CS is really a char-set, do CHAR-SET:S, otw report an error msg on
-;;; behalf of our caller, PROC. This procedure exists basically to provide
-;;; explicit error-checking & reporting.
-
-(define (%char-set:s/check cs proc)
- (let lp ((cs cs))
- (if (char-set? cs) (char-set:s cs)
- (lp (error "Not a char-set" cs proc)))))
-
-
-
-;;; These internal functions hide a lot of the dependency on the
-;;; underlying string representation of char sets. They should be
-;;; inlined if possible.
-
-(define (si=0? s i) (zero? (%char->latin1 (string-ref s i))))
-(define (si=1? s i) (not (si=0? s i)))
-(define c0 (%latin1->char 0))
-(define c1 (%latin1->char 1))
-(define (si s i) (%char->latin1 (string-ref s i)))
-(define (%set0! s i) (string-set! s i c0))
-(define (%set1! s i) (string-set! s i c1))
-
-;;; These do various "s[i] := s[i] op val" operations -- see
-;;; %CHAR-SET-ALGEBRA. They are used to implement the various
-;;; set-algebra procedures.
-(define (setv! s i v) (string-set! s i (%latin1->char v))) ; SET to a Value.
-(define (%not! s i v) (setv! s i (- 1 v)))
-(define (%and! s i v) (if (zero? v) (%set0! s i)))
-(define (%or! s i v) (if (not (zero? v)) (%set1! s i)))
-(define (%minus! s i v) (if (not (zero? v)) (%set0! s i)))
-(define (%xor! s i v) (if (not (zero? v)) (setv! s i (- 1 (si s i)))))
-
-
-(define (char-set-copy cs)
- (make-char-set (%string-copy (%char-set:s/check cs char-set-copy))))
-
-(define (char-set= . rest)
- (or (null? rest)
- (let* ((cs1 (car rest))
- (rest (cdr rest))
- (s1 (%char-set:s/check cs1 char-set=)))
- (let lp ((rest rest))
- (or (not (pair? rest))
- (and (string=? s1 (%char-set:s/check (car rest) char-set=))
- (lp (cdr rest))))))))
-
-(define (char-set<= . rest)
- (or (null? rest)
- (let ((cs1 (car rest))
- (rest (cdr rest)))
- (let lp ((s1 (%char-set:s/check cs1 char-set<=)) (rest rest))
- (or (not (pair? rest))
- (let ((s2 (%char-set:s/check (car rest) char-set<=))
- (rest (cdr rest)))
- (if (eq? s1 s2) (lp s2 rest) ; Fast path
- (let lp2 ((i 255)) ; Real test
- (if (< i 0) (lp s2 rest)
- (and (<= (si s1 i) (si s2 i))
- (lp2 (- i 1))))))))))))
-
-;;; Hash
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in
-;;; to keep the intermediate values small. (We do the calculation with just
-;;; enough bits to represent BOUND, masking off high bits at each step in
-;;; calculation. If this screws up any important properties of the hash
-;;; function I'd like to hear about it. -Olin)
-;;;
-;;; If you keep BOUND small enough, the intermediate calculations will
-;;; always be fixnums. How small is dependent on the underlying Scheme system;
-;;; we use a default BOUND of 2^22 = 4194304, which should hack it in
-;;; Schemes that give you at least 29 signed bits for fixnums. The core
-;;; calculation that you don't want to overflow is, worst case,
-;;; (+ 65535 (* 37 (- bound 1)))
-;;; where 65535 is the max character code. Choose the default BOUND to be the
-;;; biggest power of two that won't cause this expression to fixnum overflow,
-;;; and everything will be copacetic.
-
-(define (char-set-hash cs . maybe-bound)
- (let* ((bound (:optional maybe-bound 4194304 (lambda (n) (and (integer? n)
- (exact? n)
- (<= 0 n)))))
- (bound (if (zero? bound) 4194304 bound)) ; 0 means default.
- (s (%char-set:s/check cs char-set-hash))
- ;; Compute a 111...1 mask that will cover BOUND-1:
- (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh?
- (if (>= i bound) (- i 1) (lp (+ i i))))))
-
- (let lp ((i 255) (ans 0))
- (if (< i 0) (modulo ans bound)
- (lp (- i 1)
- (if (si=0? s i) ans
- (bitwise-and mask (+ (* 37 ans) i))))))))
-
-
-(define (char-set-contains? cs char)
- (si=1? (%char-set:s/check cs char-set-contains?)
- (%char->latin1 (check-arg char? char char-set-contains?))))
-
-(define (char-set-size cs)
- (let ((s (%char-set:s/check cs char-set-size)))
- (let lp ((i 255) (size 0))
- (if (< i 0) size
- (lp (- i 1) (+ size (si s i)))))))
-
-(define (char-set-count pred cset)
- (check-arg procedure? pred char-set-count)
- (let ((s (%char-set:s/check cset char-set-count)))
- (let lp ((i 255) (count 0))
- (if (< i 0) count
- (lp (- i 1)
- (if (and (si=1? s i) (pred (%latin1->char i)))
- (+ count 1)
- count))))))
-
-
-;;; -- Adjoin & delete
-
-(define (%set-char-set set proc cs chars)
- (let ((s (%string-copy (%char-set:s/check cs proc))))
- (for-each (lambda (c) (set s (%char->latin1 c)))
- chars)
- (make-char-set s)))
-
-(define (%set-char-set! set proc cs chars)
- (let ((s (%char-set:s/check cs proc)))
- (for-each (lambda (c) (set s (%char->latin1 c)))
- chars))
- cs)
-
-(define (char-set-adjoin cs . chars)
- (%set-char-set %set1! char-set-adjoin cs chars))
-(define (char-set-adjoin! cs . chars)
- (%set-char-set! %set1! char-set-adjoin! cs chars))
-(define (char-set-delete cs . chars)
- (%set-char-set %set0! char-set-delete cs chars))
-(define (char-set-delete! cs . chars)
- (%set-char-set! %set0! char-set-delete! cs chars))
-
-
-;;; Cursors
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Simple implementation. A cursors is an integer index into the
-;;; mark vector, and -1 for the end-of-char-set cursor.
-;;;
-;;; If we represented char sets as a bit set, we could do the following
-;;; trick to pick the lowest bit out of the set:
-;;; (count-bits (xor (- cset 1) cset))
-;;; (But first mask out the bits already scanned by the cursor first.)
-
-(define (char-set-cursor cset)
- (%char-set-cursor-next cset 256 char-set-cursor))
-
-(define (end-of-char-set? cursor) (< cursor 0))
-
-(define (char-set-ref cset cursor) (%latin1->char cursor))
-
-(define (char-set-cursor-next cset cursor)
- (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i 255))) cursor
- char-set-cursor-next)
- (%char-set-cursor-next cset cursor char-set-cursor-next))
-
-(define (%char-set-cursor-next cset cursor proc) ; Internal
- (let ((s (%char-set:s/check cset proc)))
- (let lp ((cur cursor))
- (let ((cur (- cur 1)))
- (if (or (< cur 0) (si=1? s cur)) cur
- (lp cur))))))
-
-
-;;; -- for-each map fold unfold every any
-
-(define (char-set-for-each proc cs)
- (check-arg procedure? proc char-set-for-each)
- (let ((s (%char-set:s/check cs char-set-for-each)))
- (let lp ((i 255))
- (cond ((>= i 0)
- (if (si=1? s i) (proc (%latin1->char i)))
- (lp (- i 1)))))))
-
-(define (char-set-map proc cs)
- (check-arg procedure? proc char-set-map)
- (let ((s (%char-set:s/check cs char-set-map))
- (ans (make-string 256 c0)))
- (let lp ((i 255))
- (cond ((>= i 0)
- (if (si=1? s i)
- (%set1! ans (%char->latin1 (proc (%latin1->char i)))))
- (lp (- i 1)))))
- (make-char-set ans)))
-
-(define (char-set-fold kons knil cs)
- (check-arg procedure? kons char-set-fold)
- (let ((s (%char-set:s/check cs char-set-fold)))
- (let lp ((i 255) (ans knil))
- (if (< i 0) ans
- (lp (- i 1)
- (if (si=0? s i) ans
- (kons (%latin1->char i) ans)))))))
-
-(define (char-set-every pred cs)
- (check-arg procedure? pred char-set-every)
- (let ((s (%char-set:s/check cs char-set-every)))
- (let lp ((i 255))
- (or (< i 0)
- (and (or (si=0? s i) (pred (%latin1->char i)))
- (lp (- i 1)))))))
-
-(define (char-set-any pred cs)
- (check-arg procedure? pred char-set-any)
- (let ((s (%char-set:s/check cs char-set-any)))
- (let lp ((i 255))
- (and (>= i 0)
- (or (and (si=1? s i) (pred (%latin1->char i)))
- (lp (- i 1)))))))
-
-
-(define (%char-set-unfold! proc p f g s seed)
- (check-arg procedure? p proc)
- (check-arg procedure? f proc)
- (check-arg procedure? g proc)
- (let lp ((seed seed))
- (cond ((not (p seed)) ; P says we are done.
- (%set1! s (%char->latin1 (f seed))) ; Add (F SEED) to set.
- (lp (g seed)))))) ; Loop on (G SEED).
-
-(define (char-set-unfold p f g seed . maybe-base)
- (let ((bs (%default-base maybe-base char-set-unfold)))
- (%char-set-unfold! char-set-unfold p f g bs seed)
- (make-char-set bs)))
-
-(define (char-set-unfold! p f g seed base-cset)
- (%char-set-unfold! char-set-unfold! p f g
- (%char-set:s/check base-cset char-set-unfold!)
- seed)
- base-cset)
-
-
-
-;;; list <--> char-set
-
-(define (%list->char-set! chars s)
- (for-each (lambda (char) (%set1! s (%char->latin1 char)))
- chars))
-
-(define (char-set . chars)
- (let ((s (make-string 256 c0)))
- (%list->char-set! chars s)
- (make-char-set s)))
-
-(define (list->char-set chars . maybe-base)
- (let ((bs (%default-base maybe-base list->char-set)))
- (%list->char-set! chars bs)
- (make-char-set bs)))
-
-(define (list->char-set! chars base-cs)
- (%list->char-set! chars (%char-set:s/check base-cs list->char-set!))
- base-cs)
-
-
-(define (char-set->list cs)
- (let ((s (%char-set:s/check cs char-set->list)))
- (let lp ((i 255) (ans '()))
- (if (< i 0) ans
- (lp (- i 1)
- (if (si=0? s i) ans
- (cons (%latin1->char i) ans)))))))
-
-
-
-;;; string <--> char-set
-
-(define (%string->char-set! str bs proc)
- (check-arg string? str proc)
- (do ((i (- (string-length str) 1) (- i 1)))
- ((< i 0))
- (%set1! bs (%char->latin1 (string-ref str i)))))
-
-(define (string->char-set str . maybe-base)
- (let ((bs (%default-base maybe-base string->char-set)))
- (%string->char-set! str bs string->char-set)
- (make-char-set bs)))
-
-(define (string->char-set! str base-cs)
- (%string->char-set! str (%char-set:s/check base-cs string->char-set!)
- string->char-set!)
- base-cs)
-
-
-(define (char-set->string cs)
- (let* ((s (%char-set:s/check cs char-set->string))
- (ans (make-string (char-set-size cs))))
- (let lp ((i 255) (j 0))
- (if (< i 0) ans
- (let ((j (if (si=0? s i) j
- (begin (string-set! ans j (%latin1->char i))
- (+ j 1)))))
- (lp (- i 1) j))))))
-
-
-;;; -- UCS-range -> char-set
-
-(define (%ucs-range->char-set! lower upper error? bs proc)
- (check-arg (lambda (x) (and (integer? x) (exact? x) (<= 0 x))) lower proc)
- (check-arg (lambda (x) (and (integer? x) (exact? x) (<= lower x))) upper proc)
-
- (if (and (< lower upper) (< 256 upper) error?)
- (error "Requested UCS range contains unavailable characters -- this implementation only supports Latin-1"
- proc lower upper))
-
- (let lp ((i (- (min upper 256) 1)))
- (cond ((<= lower i) (%set1! bs i) (lp (- i 1))))))
-
-(define (ucs-range->char-set lower upper . rest)
- (let-optionals* rest ((error? #f) rest)
- (let ((bs (%default-base rest ucs-range->char-set)))
- (%ucs-range->char-set! lower upper error? bs ucs-range->char-set)
- (make-char-set bs))))
-
-(define (ucs-range->char-set! lower upper error? base-cs)
- (%ucs-range->char-set! lower upper error?
- (%char-set:s/check base-cs ucs-range->char-set!)
- ucs-range->char-set)
- base-cs)
-
-
-;;; -- predicate -> char-set
-
-(define (%char-set-filter! pred ds bs proc)
- (check-arg procedure? pred proc)
- (let lp ((i 255))
- (cond ((>= i 0)
- (if (and (si=1? ds i) (pred (%latin1->char i)))
- (%set1! bs i))
- (lp (- i 1))))))
-
-(define (char-set-filter predicate domain . maybe-base)
- (let ((bs (%default-base maybe-base char-set-filter)))
- (%char-set-filter! predicate
- (%char-set:s/check domain char-set-filter!)
- bs
- char-set-filter)
- (make-char-set bs)))
-
-(define (char-set-filter! predicate domain base-cs)
- (%char-set-filter! predicate
- (%char-set:s/check domain char-set-filter!)
- (%char-set:s/check base-cs char-set-filter!)
- char-set-filter!)
- base-cs)
-
-
-;;; {string, char, char-set, char predicate} -> char-set
-
-(define (->char-set x)
- (cond ((char-set? x) x)
- ((string? x) (string->char-set x))
- ((char? x) (char-set x))
- (else (error "->char-set: Not a charset, string or char." x))))
-
-
-
-;;; Set algebra
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; The exported ! procs are "linear update" -- allowed, but not required, to
-;;; side-effect their first argument when computing their result. In other
-;;; words, you must use them as if they were completely functional, just like
-;;; their non-! counterparts, and you must additionally ensure that their
-;;; first arguments are "dead" at the point of call. In return, we promise a
-;;; more efficient result, plus allowing you to always assume char-sets are
-;;; unchangeable values.
-
-;;; Apply P to each index and its char code in S: (P I VAL).
-;;; Used by the set-algebra ops.
-
-(define (%string-iter p s)
- (let lp ((i (- (string-length s) 1)))
- (cond ((>= i 0)
- (p i (%char->latin1 (string-ref s i)))
- (lp (- i 1))))))
-
-;;; String S represents some initial char-set. (OP s i val) does some
-;;; kind of s[i] := s[i] op val update. Do
-;;; S := S OP CSETi
-;;; for all the char-sets in the list CSETS. The n-ary set-algebra ops
-;;; all use this internal proc.
-
-(define (%char-set-algebra s csets op proc)
- (for-each (lambda (cset)
- (let ((s2 (%char-set:s/check cset proc)))
- (let lp ((i 255))
- (cond ((>= i 0)
- (op s i (si s2 i))
- (lp (- i 1)))))))
- csets))
-
-
-;;; -- Complement
-
-(define (char-set-complement cs)
- (let ((s (%char-set:s/check cs char-set-complement))
- (ans (make-string 256)))
- (%string-iter (lambda (i v) (%not! ans i v)) s)
- (make-char-set ans)))
-
-(define (char-set-complement! cset)
- (let ((s (%char-set:s/check cset char-set-complement!)))
- (%string-iter (lambda (i v) (%not! s i v)) s))
- cset)
-
-
-;;; -- Union
-
-(define (char-set-union! cset1 . csets)
- (%char-set-algebra (%char-set:s/check cset1 char-set-union!)
- csets %or! char-set-union!)
- cset1)
-
-(define (char-set-union . csets)
- (if (pair? csets)
- (let ((s (%string-copy (%char-set:s/check (car csets) char-set-union))))
- (%char-set-algebra s (cdr csets) %or! char-set-union)
- (make-char-set s))
- (char-set-copy char-set:empty)))
-
-
-;;; -- Intersection
-
-(define (char-set-intersection! cset1 . csets)
- (%char-set-algebra (%char-set:s/check cset1 char-set-intersection!)
- csets %and! char-set-intersection!)
- cset1)
-
-(define (char-set-intersection . csets)
- (if (pair? csets)
- (let ((s (%string-copy (%char-set:s/check (car csets) char-set-intersection))))
- (%char-set-algebra s (cdr csets) %and! char-set-intersection)
- (make-char-set s))
- (char-set-copy char-set:full)))
-
-
-;;; -- Difference
-
-(define (char-set-difference! cset1 . csets)
- (%char-set-algebra (%char-set:s/check cset1 char-set-difference!)
- csets %minus! char-set-difference!)
- cset1)
-
-(define (char-set-difference cs1 . csets)
- (if (pair? csets)
- (let ((s (%string-copy (%char-set:s/check cs1 char-set-difference))))
- (%char-set-algebra s csets %minus! char-set-difference)
- (make-char-set s))
- (char-set-copy cs1)))
-
-
-;;; -- Xor
-
-(define (char-set-xor! cset1 . csets)
- (%char-set-algebra (%char-set:s/check cset1 char-set-xor!)
- csets %xor! char-set-xor!)
- cset1)
-
-(define (char-set-xor . csets)
- (if (pair? csets)
- (let ((s (%string-copy (%char-set:s/check (car csets) char-set-xor))))
- (%char-set-algebra s (cdr csets) %xor! char-set-xor)
- (make-char-set s))
- (char-set-copy char-set:empty)))
-
-
-;;; -- Difference & intersection
-
-(define (%char-set-diff+intersection! diff int csets proc)
- (for-each (lambda (cs)
- (%string-iter (lambda (i v)
- (if (not (zero? v))
- (cond ((si=1? diff i)
- (%set0! diff i)
- (%set1! int i)))))
- (%char-set:s/check cs proc)))
- csets))
-
-(define (char-set-diff+intersection! cs1 cs2 . csets)
- (let ((s1 (%char-set:s/check cs1 char-set-diff+intersection!))
- (s2 (%char-set:s/check cs2 char-set-diff+intersection!)))
- (%string-iter (lambda (i v) (if (zero? v)
- (%set0! s2 i)
- (if (si=1? s2 i) (%set0! s1 i))))
- s1)
- (%char-set-diff+intersection! s1 s2 csets char-set-diff+intersection!))
- (values cs1 cs2))
-
-(define (char-set-diff+intersection cs1 . csets)
- (let ((diff (string-copy (%char-set:s/check cs1 char-set-diff+intersection)))
- (int (make-string 256 c0)))
- (%char-set-diff+intersection! diff int csets char-set-diff+intersection)
- (values (make-char-set diff) (make-char-set int))))
-
-
-;;;; System character sets
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; These definitions are for Latin-1.
-;;;
-;;; If your Scheme implementation allows you to mark the underlying strings
-;;; as immutable, you should do so -- it would be very, very bad if a client's
-;;; buggy code corrupted these constants.
-
-(define char-set:empty (char-set))
-(define char-set:full (char-set-complement char-set:empty))
-
-(define char-set:lower-case
- (let* ((a-z (ucs-range->char-set #x61 #x7B))
- (latin1 (ucs-range->char-set! #xdf #xf7 #t a-z))
- (latin2 (ucs-range->char-set! #xf8 #x100 #t latin1)))
- (char-set-adjoin! latin2 (%latin1->char #xb5))))
-
-(define char-set:upper-case
- (let ((A-Z (ucs-range->char-set #x41 #x5B)))
- ;; Add in the Latin-1 upper-case chars.
- (ucs-range->char-set! #xd8 #xdf #t
- (ucs-range->char-set! #xc0 #xd7 #t A-Z))))
-
-(define char-set:title-case char-set:empty)
-
-(define char-set:letter
- (let ((u/l (char-set-union char-set:upper-case char-set:lower-case)))
- (char-set-adjoin! u/l
- (%latin1->char #xaa) ; FEMININE ORDINAL INDICATOR
- (%latin1->char #xba)))) ; MASCULINE ORDINAL INDICATOR
-
-(define char-set:digit (string->char-set "0123456789"))
-(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF"))
-
-(define char-set:letter+digit
- (char-set-union char-set:letter char-set:digit))
-
-(define char-set:punctuation
- (let ((ascii (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}"))
- (latin-1-chars (map %latin1->char '(#xA1 ; INVERTED EXCLAMATION MARK
- #xAB ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- #xAD ; SOFT HYPHEN
- #xB7 ; MIDDLE DOT
- #xBB ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- #xBF)))) ; INVERTED QUESTION MARK
- (list->char-set! latin-1-chars ascii)))
-
-(define char-set:symbol
- (let ((ascii (string->char-set "$+<=>^`|~"))
- (latin-1-chars (map %latin1->char '(#x00A2 ; CENT SIGN
- #x00A3 ; POUND SIGN
- #x00A4 ; CURRENCY SIGN
- #x00A5 ; YEN SIGN
- #x00A6 ; BROKEN BAR
- #x00A7 ; SECTION SIGN
- #x00A8 ; DIAERESIS
- #x00A9 ; COPYRIGHT SIGN
- #x00AC ; NOT SIGN
- #x00AE ; REGISTERED SIGN
- #x00AF ; MACRON
- #x00B0 ; DEGREE SIGN
- #x00B1 ; PLUS-MINUS SIGN
- #x00B4 ; ACUTE ACCENT
- #x00B6 ; PILCROW SIGN
- #x00B8 ; CEDILLA
- #x00D7 ; MULTIPLICATION SIGN
- #x00F7)))) ; DIVISION SIGN
- (list->char-set! latin-1-chars ascii)))
-
-
-(define char-set:graphic
- (char-set-union char-set:letter+digit char-set:punctuation char-set:symbol))
-
-(define char-set:whitespace
- (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION
- #x0A ; LINE FEED
- #x0B ; VERTICAL TABULATION
- #x0C ; FORM FEED
- #x0D ; CARRIAGE RETURN
- #x20 ; SPACE
- #xA0))))
-
-(define char-set:printing (char-set-union char-set:whitespace char-set:graphic)) ; NO-BREAK SPACE
-
-(define char-set:blank
- (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION
- #x20 ; SPACE
- #xA0)))) ; NO-BREAK SPACE
-
-
-(define char-set:iso-control
- (ucs-range->char-set! #x7F #xA0 #t (ucs-range->char-set 0 32)))
-
-(define char-set:ascii (ucs-range->char-set 0 128))
-
-
-;;; Porting & performance-tuning notes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; See the section at the beginning of this file on external dependencies.
-;;;
-;;; First and foremost, rewrite this code to use bit vectors of some sort.
-;;; This will give big speedup and memory savings.
-;;;
-;;; - LET-OPTIONALS* macro.
-;;; This is only used once. You can rewrite the use, port the hairy macro
-;;; definition (which is implemented using a Clinger-Rees low-level
-;;; explicit-renaming macro system), or port the simple, high-level
-;;; definition, which is less efficient.
-;;;
-;;; - :OPTIONAL macro
-;;; Very simply defined using an R5RS high-level macro.
-;;;
-;;; Implementations that can arrange for the base char sets to be immutable
-;;; should do so. (E.g., Scheme 48 allows one to mark a string as immutable,
-;;; which can be used to protect the underlying strings.) It would be very,
-;;; very bad if a client's buggy code corrupted these constants.
-;;;
-;;; There is a fair amount of argument checking. This is, strictly speaking,
-;;; unnecessary -- the actual body of the procedures will blow up if an
-;;; illegal value is passed in. However, the error message will not be as good
-;;; as if the error were caught at the "higher level." Also, a very, very
-;;; smart Scheme compiler may be able to exploit having the type checks done
-;;; early, so that the actual body of the procedures can assume proper values.
-;;; This isn't likely; this kind of compiler technology isn't common any
-;;; longer.
-;;;
-;;; The overhead of optional-argument parsing is irritating. The optional
-;;; arguments must be consed into a rest list on entry, and then parsed out.
-;;; Function call should be a matter of a few register moves and a jump; it
-;;; should not involve heap allocation! Your Scheme system may have a superior
-;;; non-R5RS optional-argument system that can eliminate this overhead. If so,
-;;; then this is a prime candidate for optimising these procedures,
-;;; *especially* the many optional BASE-CS parameters.
-;;;
-;;; Note that optional arguments are also a barrier to procedure integration.
-;;; If your Scheme system permits you to specify alternate entry points
-;;; for a call when the number of optional arguments is known in a manner
-;;; that enables inlining/integration, this can provide performance
-;;; improvements.
-;;;
-;;; There is enough *explicit* error checking that *all* internal operations
-;;; should *never* produce a type or index-range error. Period. Feel like
-;;; living dangerously? *Big* performance win to be had by replacing string
-;;; and record-field accessors and setters with unsafe equivalents in the
-;;; code. Similarly, fixnum-specific operators can speed up the arithmetic
-;;; done on the index values in the inner loops. The only arguments that are
-;;; not completely error checked are
-;;; - string lists (complete checking requires time proportional to the
-;;; length of the list)
-;;; - procedure arguments, such as char->char maps & predicates.
-;;; There is no way to check the range & domain of procedures in Scheme.
-;;; Procedures that take these parameters cannot fully check their
-;;; arguments. But all other types to all other procedures are fully
-;;; checked.
-;;;
-;;; This does open up the alternate possibility of simply *removing* these
-;;; checks, and letting the safe primitives raise the errors. On a dumb
-;;; Scheme system, this would provide speed (by eliminating the redundant
-;;; error checks) at the cost of error-message clarity.
-;;;
-;;; In an interpreted Scheme, some of these procedures, or the internal
-;;; routines with % prefixes, are excellent candidates for being rewritten
-;;; in C.
-;;;
-;;; It would also be nice to have the ability to mark some of these
-;;; routines as candidates for inlining/integration.
-;;;
-;;; See the comments preceding the hash function code for notes on tuning
-;;; the default bound so that the code never overflows your implementation's
-;;; fixnum size into bignum calculation.
-;;;
-;;; All the %-prefixed routines in this source code are written
-;;; to be called internally to this library. They do *not* perform
-;;; friendly error checks on the inputs; they assume everything is
-;;; proper. They also do not take optional arguments. These two properties
-;;; save calling overhead and enable procedure integration -- but they
-;;; are not appropriate for exported routines.
-
-;;; Copyright notice
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology
-;;;
-;;; This material was developed by the Scheme project at the Massachusetts
-;;; Institute of Technology, Department of Electrical Engineering and
-;;; Computer Science. Permission to copy and modify this software, to
-;;; redistribute either the original software or a modified version, and
-;;; to use this software for any purpose is granted, subject to the
-;;; following restrictions and understandings.
-;;;
-;;; 1. Any copy made of this software must include this copyright notice
-;;; in full.
-;;;
-;;; 2. Users of this software agree to make their best efforts (a) to
-;;; return to the MIT Scheme project any improvements or extensions that
-;;; they make, so that these may be included in future releases; and (b)
-;;; to inform MIT of noteworthy uses of this software.
-;;;
-;;; 3. All materials developed as a consequence of the use of this
-;;; software shall duly acknowledge such use, in accordance with the usual
-;;; standards of acknowledging credit in academic research.
-;;;
-;;; 4. MIT has made no warrantee or representation that the operation of
-;;; this software will be error-free, and MIT is under no obligation to
-;;; provide any services, by way of maintenance, update, or otherwise.
-;;;
-;;; 5. In conjunction with products arising from the use of this material,
-;;; there shall be no use of the name of the Massachusetts Institute of
-;;; Technology nor of any adaptation thereof in any advertising,
-;;; promotional, or sales literature without prior written consent from
-;;; MIT in each case.
diff --git a/scsh/lib/cset-lib.txt b/scsh/lib/cset-lib.txt
deleted file mode 100644
index 75a77d5..0000000
--- a/scsh/lib/cset-lib.txt
+++ /dev/null
@@ -1,1271 +0,0 @@
-The SRFI 14 character-set library -*- outline -*-
-Olin Shivers
-98/11/8
-Last Update: 2000/7/4
-
-Emacs should display this document in outline mode. Say c-h m for
-instructions on how to move through it by sections (e.g., c-c c-n, c-c c-p).
-
-* Table of contents
--------------------
-Abstract
-Variable index
-Rationale
- Linear-update operations
- Extra-SRFI recommendations
-Specification
- General procedures
- Iterating over character sets
- Creating character sets
- Querying character sets
- Character-set algebra
- Standard character sets
-Unicode, Latin-1 and ASCII definitions of the standard character sets
-Reference implementation
-Acknowledgements
-References & links
-Copyright
-
-
--------------------------------------------------------------------------------
-* Abstract
-----------
-
-The ability to efficiently represent and manipulate sets of characters is an
-unglamorous but very useful capability for text-processing code -- one that
-tends to pop up in the definitions of other libraries. Hence it is useful to
-specify a general substrate for this functionality early. This SRFI defines a
-general library that provides this functionality.
-
-It is accompanied by a reference implementation for the spec. The reference
-implementation is fairly efficient, straightforwardly portable, and has a
-"free software" copyright. The implementation is tuned for "small" 7 or 8
-bit character types, such as ASCII or Latin-1; the data structures and
-algorithms would have to be altered for larger 16 or 32 bit character types
-such as Unicode -- however, the specs have been carefully designed with these
-larger character types in mind.
-
-Several forthcoming SRFIs can be defined in terms of this one:
- - string library
- - delimited input procedures (e.g., READ-LINE)
- - regular expressions
-
-
--------------------------------------------------------------------------------
-* Variable index
------------------
-Here is the complete set of bindings -- procedural and otherwise --
-exported by this library. In a Scheme system that has a module or package
-system, these procedures should be contained in a module named "char-set-lib".
-
-char-set? char-set= char-set<=
-
-char-set-hash
-
-char-set-cursor char-set-ref char-set-cursor-next end-of-char-set?
-char-set-fold char-set-unfold char-set-unfold!
-char-set-for-each char-set-map
-
-char-set-copy char-set
-
-list->char-set string->char-set
-list->char-set! string->char-set!
-
-char-set-filter ucs-range->char-set
-char-set-filter! ucs-range->char-set!
-
-->char-set
-
-char-set->list char-set->string
-
-char-set-size char-set-count char-set-contains?
-
-char-set-every char-set-any
-
-char-set-adjoin char-set-delete
-char-set-adjoin! char-set-delete!
-
-char-set-complement char-set-union char-set-intersection
-char-set-complement! char-set-union! char-set-intersection!
-
-char-set-difference char-set-xor char-set-diff+intersection
-char-set-difference! char-set-xor! char-set-diff+intersection!
-
-char-set:lower-case char-set:upper-case char-set:title-case
-char-set:letter char-set:digit char-set:letter+digit
-char-set:graphic char-set:printing char-set:whitespace
-char-set:iso-control char-set:punctuation char-set:symbol
-char-set:hex-digit char-set:blank char-set:ascii
-char-set:empty char-set:full
-
-
--------------------------------------------------------------------------------
-* Rationale
------------
-
-The ability to efficiently manipulate sets of characters is quite
-useful for text-processing code. Encapsulating this functionality in
-a general, efficiently implemented library can assist all such code.
-This library defines a new data structure to represent these sets, called
-a "char-set." The char-set type is distinct from all other types.
-
-This library is designed to be portable across implementations that use
-different character types and representations, especially ASCII, Latin-1
-and Unicode. Some effort has been made to preserve compatibility with Java
-in the Unicode case (see the definition of CHAR-SET:WHITESPACE for the
-single real deviation).
-
-
-** Linear-update operations
-===========================
-The procedures of this SRFI, by default, are "pure functional" -- they do not
-alter their parameters. However, this SRFI defines a set of "linear-update"
-procedures which have a hybrid pure-functional/side-effecting semantics: they
-are allowed, but not required, to side-effect one of their parameters in order
-to construct their result. An implementation may legally implement these
-procedures as pure, side-effect-free functions, or it may implement them using
-side effects, depending upon the details of what is the most efficient or
-simple to implement in terms of the underlying representation.
-
-The linear-update routines all have names ending with "!".
-
-Clients of these procedures *may not* rely upon these procedures working by
-side effect. For example, this is not guaranteed to work:
-
- (let* ((cs1 (char-set #\a #\b #\c)) ; cs1 = {a,b,c}.
- (cs2 (char-set-adjoin! cs1 #\d))) ; Add d to {a,b,c}.
- cs1) ; Could be either {a,b,c} or {a,b,c,d}.
-
-However, this is well-defined:
-
- (let ((cs (char-set #\a #\b #\c)))
- (char-set-adjoin! cs #\d)) ; Add d to {a,b,c}.
-
-So clients of these procedures write in a functional style, but must
-additionally be sure that, when the procedure is called, there are no other
-live pointers to the potentially-modified character set (hence the term
-"linear update").
-
-There are two benefits to this convention:
- - Implementations are free to provide the most efficient possible
- implementation, either functional or side-effecting.
- - Programmers may nonetheless continue to assume that character sets
- are purely functional data structures: they may be reliably shared
- without needing to be copied, uniquified, and so forth.
-
-Note that pure functional representations are the right thing for
-ASCII- or Latin-1-based Scheme implementations, since a char-set can
-be represented in an ASCII Scheme with 4 32-bit words. Pure set-algebra
-operations on such a representation are very fast and efficient. Programmers
-who code using linear-update operations are guaranteed the system will
-provide the best implementation across multiple platforms.
-
-In practice, these procedures are most useful for efficiently constructing
-character sets in a side-effecting manner, in some limited local context,
-before passing the character set outside the local construction scope to be
-used in a functional manner.
-
-Scheme provides no assistance in checking the linearity of the potentially
-side-effected parameters passed to these functions --- there's no linear
-type checker or run-time mechanism for detecting violations. (But
-sophisticated programming environments, such as DrScheme, might help.)
-
-** Extra-SRFI recommendations
-=============================
-Users are cautioned that the R5RS predicates
- CHAR-ALPHABETIC?
- CHAR-NUMERIC?
- CHAR-WHITESPACE?
- CHAR-UPPER-CASE?
- CHAR-LOWER-CASE?
-may or may not be in agreement with the SRFI 14 base character sets
- CHAR-SET:LETTER
- CHAR-SET:DIGIT
- CHAR-SET:WHITESPACE
- CHAR-SET:UPPER-CASE
- CHAR-SET:LOWER-CASE
-Implementors are strongly encouraged to bring these predicates into
-agreement with the base character sets of this SRFI; not to do so risks
-major confusion.
-
-
--------------------------------------------------------------------------------
-* Specification
----------------
-
-In the following procedure specifications:
- - A CS parameter is a character set.
-
- - An S parameter is a string.
-
- - A CHAR parameter is a character.
-
- - A CHAR-LIST parameter is a list of characters.
-
- - A PRED parameter is a unary character predicate procedure, returning
- a true/false value when applied to a character.
-
- - An OBJ parameter may be any value at all.
-
-Passing values to procedures with these parameters that do not satisfy these
-types is an error.
-
-Unless otherwise noted in the specification of a procedure, procedures
-always return character sets that are distinct (from the point of view
-of the linear-update operations) from the parameter character sets. For
-example, CHAR-SET-ADJOIN is guaranteed to provide a fresh character set,
-even if it is not given any character parameters.
-
-Parameters given in square brackets are optional. Unless otherwise noted in
-the text describing the procedure, any prefix of these optional parameters may
-be supplied, from zero arguments to the full list. When a procedure returns
-multiple values, this is shown by listing the return values in square
-brackets, as well. So, for example, the procedure with signature
-
- halts? f [x init-store] -> [boolean integer]
-
-would take one (F), two (F, X) or three (F, X, INPUT-STORE) input parameters,
-and return two values, a boolean and an integer.
-
-A parameter followed by "..." means zero-or-more elements. So the procedure
-with the signature
- sum-squares x ... -> number
-takes zero or more arguments (X ...), while the procedure with signature
- spell-check doc dict1 dict2 ... -> string-list
-takes two required parameters (DOC and DICT1) and zero or more
-optional parameters (DICT2 ...).
-
-
-** General procedures
-=====================
-char-set? obj -> boolean
- Is the object OBJ a character set?
-
-char-set= cs1 ... -> boolean
- Are the character sets equal?
-
- Boundary cases:
- (char-set=) => true
- (char-set= cs) => true
-
- Rationale: transitive binary relations are generally extended to n-ary
- relations in Scheme, which enables clearer, more concise code to be
- written. While the zero-argument and one-argument cases will almost
- certainly not arise in first-order uses of such relations, they may well
- arise in higher-order cases or macro-generated code. E.g., consider
- (apply char-set= cset-list)
- This is well-defined if the list is empty or a singleton list. Hence
- we extend these relations to any number of arguments. Implementors
- have reported actual uses of n-ary relations in higher-order cases
- allowing for fewer than two arguments. The way of Scheme is to handle the
- general case; we provide the fully general extension.
-
- A counter-argument to this extension is that R5RS's transitive binary
- arithmetic relations (=, <, etc.) require at least two arguments, hence
- this decision is a break with the prior convention -- although it is
- at least one that is backwards-compatible.
-
-char-set<= cs1 ... -> boolean
- Returns true if every character set CSi is a subset of character set CSi+1.
-
- Boundary cases:
- (char-set<=) => true
- (char-set<= cs) => true
-
- Rationale: See CHAR-SET= for discussion of zero- and one-argument
- applications. Consider testing a list of char-sets for monotonicity
- with (APPLY CHAR-SET<= CSET-LIST).
-
-char-set-hash cs [bound] -> integer
- Compute a hash value for the character set CS. BOUND is a non-negative
- exact integer specifying the range of the hash function. A positive
- value restricts the return value to the range [0,BOUND).
-
- If BOUND is either zero or not given, the implementation may use
- an implementation-specific default value, chosen to be as large as
- is efficiently practical. For instance, the default range might be chosen
- for a given implementation to map all character sets into the range of
- integers that can be represented with a single machine word.
-
- Invariant:
- (char-set= cs1 cs2) => (= (char-set-hash cs1 b) (char-set-hash cs2 b))
-
- A legal but nonetheless discouraged implementation:
- (define (char-set-hash cs . maybe-bound) 1)
-
- Rationale: allowing the user to specify an explicit bound simplifies user
- code by removing the mod operation that typically accompanies every hash
- computation, and also may allow the implementation of the hash function to
- exploit a reduced range to efficiently compute the hash value. E.g., for
- small bounds, the hash function may be computed in a fashion such that
- intermediate values never overflow into bignum integers, allowing the
- implementor to provide a fixnum-specific "fast path" for computing the
- common cases very rapidly.
-
-** Iterating over character sets
-===================================
-
-char-set-cursor cset -> cursor
-char-set-ref cset cursor -> char
-char-set-cursor-next cset cursor -> cursor
-end-of-char-set? cursor -> boolean
- Cursors are a low-level facility for iterating over the characters in a
- set. A cursor is a value that indexes a character in a char set.
- CHAR-SET-CURSOR produces a new cursor for a given char set. The set
- element indexed by the cursor is fetched with CHAR-SET-REF. A cursor index
- is incremented with CHAR-SET-CURSOR-NEXT; in this way, code can step
- through every character in a char set. Stepping a cursor "past the end" of
- a char set produces a cursor that answers true to END-OF-CHAR-SET?. It is
- an error to pass such a cursor to CHAR-SET-REF or to CHAR-SET-CURSOR-NEXT.
-
- A cursor value may not be used in conjunction with a different character
- set; if it is passed to CHAR-SET-REF or CHAR-SET-CURSOR-NEXT with
- a character set other than the one used to create it, the results and
- effects are undefined.
-
- Cursor values are *not* necessarily distinct from other types. They may be
- integers, linked lists, records, procedures or other values. This license
- is granted to allow cursors to be very "lightweight" values suitable for
- tight iteration, even in fairly simple implementations.
-
- Note that these primitives are necessary to export an iteration facility
- for char sets to loop macros.
-
- Example:
-
- (define cs (char-set #\G #\a #\T #\e #\c #\h))
-
- ;; Collect elts of CS into a list.
- (let lp ((cur (char-set-cursor cs)) (ans '()))
- (if (end-of-char-set? cur) ans
- (lp (char-set-cursor-next cs cur)
- (cons (char-set-ref cs cur) ans))))
- => (#\G #\T #\a #\c #\e #\h)
-
- ;; Equivalently, using a list unfold (from SRFI 1):
- (unfold-right end-of-char-set?
- (curry char-set-ref cs)
- (curry char-set-cursor-next cs)
- (char-set-cursor cs))
- => (#\G #\T #\a #\c #\e #\h)
-
- Rationale: Note that the cursor API's four functions "fit" the functional
- protocol used by the unfolders provided by the list, string and char-set
- SRFIs (see the example above). By way of contrast, here is a simpler,
- two-function API that was rejected for failing this criterion. Besides
- CHAR-SET-CURSOR, it provided a single function that mapped a cursor and a
- character set to two values, the indexed character and the next cursor. If
- the cursor had exhausted the character set, then this function returned
- false instead of the character value, and another end-of-char-set cursor.
- In this way, the other three functions of the current API were combined
- together.
-
-char-set-fold kons knil cs -> object
- This is the fundamental iterator for character sets. Applies the function
- KONS across the character set CS using initial state value KNIL. That is,
- if CS is the empty set, the procedure returns KNIL. Otherwise, some
- element c of CS is chosen; let cs' be the remaining, unchosen characters.
- The procedure returns
- (char-set-fold KONS (KONS c KNIL) cs')
-
- Examples:
- ;; CHAR-SET-MEMBERS
- (lambda (cs) (char-set-fold cons '() cs))
-
- ;; CHAR-SET-SIZE
- (lambda (cs) (char-set-fold (lambda (c i) (+ i 1)) 0 cs))
-
- ;; How many vowels in the char set?
- (lambda (cs)
- (char-set-fold (lambda (c i) (if (vowel? c) (+ i 1) i))
- 0 cs))
-
-char-set-unfold f p g seed [base-cs] -> char-set
-char-set-unfold! f p g seed base-cs -> char-set
- This is a fundamental constructor for char-sets.
- - G is used to generate a series of "seed" values from the initial seed:
- SEED, (G SEED), (G^2 SEED), (G^3 SEED), ...
- - P tells us when to stop -- when it returns true when applied to one
- of these seed values.
- - F maps each seed value to a character. These characters are added
- to the base character set BASE-CS to form the result; BASE-CS defaults to
- the empty set. CHAR-SET-UNFOLD! adds the characters to BASE-CS in a
- linear-update -- it is allowed, but not required, to side-effect
- and use BASE-CS's storage to construct the result.
-
- More precisely, the following definitions hold, ignoring the
- optional-argument issues:
-
- (define (char-set-unfold p f g seed base-cs)
- (char-set-unfold! p f g seed (char-set-copy base-cs)))
-
- (define (char-set-unfold! p f g seed base-cs)
- (let lp ((seed seed) (cs base-cs))
- (if (p seed) cs ; P says we are done.
- (lp (g seed) ; Loop on (G SEED).
- (char-set-adjoin! cs (f seed)))))) ; Add (F SEED) to set.
-
- (Note that the actual implementation may be more efficient.)
-
- Examples:
-
- (port->char-set p) = (char-set-unfold eof-object? values
- (lambda (x) (read-char p))
- (read-char p))
-
- (list->char-set lis) = (char-set-unfold null? car cdr lis)
-
-char-set-for-each proc cs -> unspecified
- Apply procedure PROC to each character in the character set CS.
- Note that the order in which PROC is applied to the characters in the
- set is not specified, and may even change from one procedure application
- to another.
-
- Nothing at all is specified about the value returned by this procedure; it
- is not even required to be consistent from call to call. It is simply
- required to be a value (or values) that may be passed to a command
- continuation, e.g. as the value of an expression appearing as a
- non-terminal subform of a BEGIN expression. Note that in R5RS, this
- restricts the procedure to returning a single value; non-R5RS systems may
- not even provide this restriction.
-
-char-set-map proc cs -> char-set
- PROC is a char->char procedure. Apply it to all the characters in
- the char-set CS, and collect the results into a new character set.
-
- Essentially lifts PROC from a char->char procedure to a char-set ->
- char-set procedure.
-
- Example:
- (char-set-map char-downcase cset)
-
-
-** Creating character sets
-==========================
-char-set-copy cs -> char-set
- Returns a copy of the character set CS. "Copy" means that if either the
- input parameter or the result value of this procedure is passed to one of
- the linear-update procedures described below, the other character set is
- guaranteed not to be altered.
-
- A system that provides pure-functional implementations of the
- linear-operator suite could implement this procedure as the identity
- function -- so copies are *not* guaranteed to be distinct by EQ?.
-
-char-set char1 ... -> char-set
- Return a character set containing the given characters.
-
-list->char-set char-list [base-cs] -> char-set
-list->char-set! char-list base-cs -> char-set
- Return a character set containing the characters in the list of
- characters CHAR-LIST.
-
- If character set BASE-CS is provided, the characters from CHAR-LIST
- are added to it. LIST->CHAR-SET! is allowed, but not required,
- to side-effect and reuse the storage in BASE-CS; LIST->CHAR-SET
- produces a fresh character set.
-
-string->char-set s [base-cs] -> char-set
-string->char-set! s base-cs -> char-set
- Return a character set containing the characters in the string S.
-
- If character set BASE-CS is provided, the characters from S are added to
- it. STRING->CHAR-SET! is allowed, but not required, to side-effect and
- reuse the storage in BASE-CS; STRING->CHAR-SET produces a fresh character
- set.
-
-char-set-filter pred cs [base-cs] -> char-set
-char-set-filter! pred cs base-cs -> char-set
- Returns a character set containing every character c in CS
- such that (PRED c) returns true.
-
- If character set BASE-CS is provided, the characters specified by PRED
- are added to it. CHAR-SET-FILTER! is allowed, but not required,
- to side-effect and reuse the storage in BASE-CS; CHAR-SET-FILTER
- produces a fresh character set.
-
- An implementation may not save away a reference to PRED and invoke it
- after CHAR-SET-FILTER or CHAR-SET-FILTER! returns -- that is, "lazy,"
- on-demand implementations are not allowed, as PRED may have external
- dependencies on mutable data or have other side-effects.
-
- Rationale: This procedure provides a means of converting a character
- predicate into its equivalent character set; the CS parameter allows the
- programmer to bound the predicate's domain. Programmers should be aware
- that filtering a character set such as CHAR-SET:FULL could be a very
- expensive operation in an implementation that provided an extremely large
- character type, such as 32-bit Unicode. An earlier draft of this library
- provided a simple PREDICATE->CHAR-SET procedure, which was rejected in
- favor of CHAR-SET-FILTER for this reason.
-
-ucs-range->char-set lower upper [error? base-cs] -> char-set
-ucs-range->char-set! lower upper error? base-cs -> char-set
- LOWER and UPPER are exact non-negative integers; LOWER <= UPPER.
-
- Returns a character set containing every character whose ISO/IEC 10646
- UCS-4 code lies in the half-open range [LOWER,UPPER).
-
- - If the requested range includes unassigned UCS values, these are
- silently ignored (the current UCS specification has "holes" in the
- space of assigned codes).
-
- - If the requested range includes "private" or "user space" codes, these
- are handled in an implementation-specific manner; however, a UCS- or
- Unicode-based Scheme implementation should pass them through
- transparently.
-
- - If any code from the requested range specifies a valid, assigned
- UCS character that has no corresponding representative in the
- implementation's character type, then (1) an error is raised if ERROR?
- is true, and (2) the code is ignored if ERROR? is false (the default).
- This might happen, for example, if the implementation uses ASCII
- characters, and the requested range includes non-ASCII characters.
-
- If character set BASE-CS is provided, the characters specified by the
- range are added to it. UCS-RANGE->CHAR-SET! is allowed, but not required,
- to side-effect and reuse the storage in BASE-CS; UCS-RANGE->CHAR-SET
- produces a fresh character set.
-
- Note that ASCII codes are a subset of the Latin-1 codes, which are in turn
- a subset of the 16-bit Unicode codes, which are themselves a subset of the
- 32-bit UCS-4 codes. We commit to a specific encoding in this routine,
- regardless of the underlying representation of characters, so that client
- code using this library will be portable. I.e., a conformant Scheme
- implementation may use EBCDIC or SHIFT-JIS to encode characters; it must
- simply map the UCS characters from the given range into the native
- representation when possible, and report errors when not possible.
-
-->char-set x -> char-set
- Coerces X into a char-set. X may be a string, character or char-set. A
- string is converted to the set of its constituent characters; a character
- is converted to a singleton set; a char-set is returned as-is. This
- procedure is intended for use by other procedures that want to provide
- "user-friendly," wide-spectrum interfaces to their clients.
-
-
-** Querying character sets
-==========================
-char-set-size cs -> integer
- Returns the number of elements in character set CS.
-
-char-set-count pred cs -> integer
- Apply PRED to the chars of character set CS, and return the number
- of chars that caused the predicate to return true.
-
-char-set->list cs -> character-list
- This procedure returns a list of the members of character set CS.
- The order in which CS's characters appear in the list is not defined,
- and may be different from one call to another.
-
-char-set->string cs -> string
- This procedure returns a string containing the members of character set CS.
- The order in which CS's characters appear in the string is not defined,
- and may be different from one call to another.
-
-char-set-contains? cs char -> boolean
- This procedure tests CHAR for membership in character set CS.
-
- The MIT Scheme character-set package called this procedure
- CHAR-SET-MEMBER?, but the argument order isn't consistent with the name.
-
-char-set-every pred cs -> boolean
-char-set-any pred cs -> object
- The CHAR-SET-EVERY procedure returns true if predicate PRED
- returns true of every character in the character set CS.
-
- Likewise, CHAR-SET-ANY applies PRED to every character in
- character set CS, and returns the first true value it finds.
- If no character produces a true value, it returns false.
-
- The order in which these procedures sequence through the elements of
- CS is not specified.
-
- Note that if you need to determine the actual character on which a
- predicate returns true, use CHAR-SET-ANY and arrange for the predicate
- to return the character parameter as its true value, e.g.
- (char-set-any (lambda (c) (and (char-upper-case? c) c))
- cs)
-
-
-** Character-set algebra
-========================
-char-set-adjoin cs char1 ... -> char-set
-char-set-delete cs char1 ... -> char-set
- Add/delete the CHARi characters to/from character set CS.
-
-char-set-adjoin! cs char1 ... -> char-set
-char-set-delete! cs char1 ... -> char-set
- Linear-update variants. These procedures are allowed, but not
- required, to side-effect their first parameter.
-
-char-set-complement cs -> char-set
-char-set-union cs1 ... -> char-set
-char-set-intersection cs1 ... -> char-set
-char-set-difference cs1 cs2 ... -> char-set
-char-set-xor cs1 ... -> char-set
-char-set-diff+intersection cs1 cs2 ... -> [char-set char-set]
- These procedures implement set complement, union, intersection,
- difference, and exclusive-or for character sets. The union, intersection
- and xor operations are n-ary. The difference function is also n-ary,
- associates to the left (that is, it computes the difference between
- its first argument and the union of all the other arguments),
- and requires at least one argument.
-
- Boundary cases:
- (char-set-union) => char-set:empty
- (char-set-intersection) => char-set:all
- (char-set-xor) => char-set:empty
- (char-set-difference cs) => cs
-
- CHAR-SET-DIFF+INTERSECTION returns both the difference and the
- intersection of the arguments -- it partitions its first parameter.
- It is equivalent to
- (values (char-set-difference cs1 cs2 ...)
- (char-set-intersection cs1 (char-set-union cs2 ...)))
- but can be implemented more efficiently.
-
- Programmers should be aware that CHAR-SET-COMPLEMENT could potentially
- be a very expensive operation in Scheme implementations that provide
- a very large character type, such as 32-bit Unicode. If this is a
- possibility, sets can be complimented with respect to a smaller
- universe using CHAR-SET-DIFFERENCE.
-
-char-set-complement! cs -> char-set
-char-set-union! cs1 cs2 ... -> char-set
-char-set-intersection! cs1 cs2 ... -> char-set
-char-set-difference! cs1 cs2 ... -> char-set
-char-set-xor! cs1 cs2 ... -> char-set
-char-set-diff+intersection! cs1 cs2 cs3 ... -> [char-set char-set]
- These are linear-update variants of the set-algebra functions.
- They are allowed, but not required, to side-effect their first
- (required) parameter.
-
-
- CHAR-SET-DIFF+INTERSECTION! is allowed to side-effect both of
- its two required parameters, CS1 and CS2.
-
-** Standard character sets
-==========================
-Several character sets are predefined for convenience:
- char-set:lower-case Lower-case letters
- char-set:upper-case Upper-case letters
- char-set:title-case Title-case letters
- char-set:letter Letters
- char-set:digit Digits
- char-set:letter+digit Letters and digits
- char-set:graphic Printing characters except spaces
- char-set:printing Printing characters including spaces
- char-set:whitespace Whitespace characters
- char-set:iso-control The ISO control characters
- char-set:punctuation Punctuation characters
- char-set:symbol Symbol characters
- char-set:hex-digit A hexadecimal digit: 0-9, A-F, a-f
- char-set:blank Blank characters -- horizontal whitespace
- char-set:ascii All characters in the ASCII set.
- char-set:empty Empty set
- char-set:full All characters
-
-Note that there may be characters in CHAR-SET:LETTER that are neither upper or
-lower case---this might occur in implementations that use a character type
-richer than ASCII, such as Unicode. A "graphic character" is one that would
-put ink on your page. While the exact composition of these sets may vary
-depending upon the character type provided by the underlying Scheme system,
-here are the definitions for some of the sets in an ASCII implementation:
-
- char-set:lower-case a-z
- char-set:upper-case A-Z
- char-set:letter A-Z and a-z
- char-set:digit 0123456789
- char-set:punctuation !"#%&'()*,-./:;?@[\]_{}
- char-set:symbol $+<=>^`|~
- char-set:whitespace Space, newline, tab, form feed,
- vertical tab, carriage return
- char-set:blank Space and tab
- char-set:graphic letter + digit + punctuation + symbol
- char-set:printing graphic + whitespace
- char-set:iso-control ASCII 0-31 and 127
-
-Note that the existence of the CHAR-SET:ASCII set implies that the underlying
-character set is required to be at least as rich as ASCII (including
-ASCII's control characters).
-
-Rationale: The name choices reflect a shift from the older
-"alphabetic/numeric" terms found in R5RS and Posix to newer,
-Unicode-influenced "letter/digit" lexemes.
-
--------------------------------------------------------------------------------
-* Unicode, Latin-1 and ASCII definitions of the standard character sets
------------------------------------------------------------------------
-
-In Unicode Scheme implementations, the base character sets are compatible with
-Java's Unicode specifications. For ASCII or Latin-1, we simply restrict the
-Unicode set specifications to their first 128 or 256 codes, respectively.
-Scheme implementations that are not based on ASCII, Latin-1 or Unicode should
-attempt to preserve the sense or spirit of these definitions.
-
-The following descriptions frequently make reference to the "Unicode character
-database." This is a file, available at URL
- ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
-Each line contains a description of a Unicode character. The first
-semicolon-delimited field of the line gives the hex value of the character's
-code; the second field gives the name of the character, and the third field
-gives a two-letter category. Other fields give simple 1-1 case-mappings for
-the character and other information; see
- ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html
-for further description of the file's format. Note in particular the
-two-letter category specified in the the third field, which is referenced
-frequently in the descriptions below.
-
-** char-set:lower-case
-======================
-For Unicode, we follow Java's specification: a character is lowercase if
- + it is not in the range [U+2000,U+2FFF], and
- + the Unicode attribute table does not give a lowercase mapping for it, and
- + at least one of the following is true:
- - the Unicode attribute table gives a mapping to uppercase
- for the character, or
- - the name for the character in the Unicode attribute table contains
- the words "SMALL LETTER" or "SMALL LIGATURE".
-
-The lower-case ASCII characters are
- abcdefghijklmnopqrstuvwxyz
-Latin-1 adds another 33 lower-case characters to the ASCII set:
- 00B5 MICRO SIGN
- 00DF LATIN SMALL LETTER SHARP S
- 00E0 LATIN SMALL LETTER A WITH GRAVE
- 00E1 LATIN SMALL LETTER A WITH ACUTE
- 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
- 00E3 LATIN SMALL LETTER A WITH TILDE
- 00E4 LATIN SMALL LETTER A WITH DIAERESIS
- 00E5 LATIN SMALL LETTER A WITH RING ABOVE
- 00E6 LATIN SMALL LETTER AE
- 00E7 LATIN SMALL LETTER C WITH CEDILLA
- 00E8 LATIN SMALL LETTER E WITH GRAVE
- 00E9 LATIN SMALL LETTER E WITH ACUTE
- 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
- 00EB LATIN SMALL LETTER E WITH DIAERESIS
- 00EC LATIN SMALL LETTER I WITH GRAVE
- 00ED LATIN SMALL LETTER I WITH ACUTE
- 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
- 00EF LATIN SMALL LETTER I WITH DIAERESIS
- 00F0 LATIN SMALL LETTER ETH
- 00F1 LATIN SMALL LETTER N WITH TILDE
- 00F2 LATIN SMALL LETTER O WITH GRAVE
- 00F3 LATIN SMALL LETTER O WITH ACUTE
- 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
- 00F5 LATIN SMALL LETTER O WITH TILDE
- 00F6 LATIN SMALL LETTER O WITH DIAERESIS
- 00F8 LATIN SMALL LETTER O WITH STROKE
- 00F9 LATIN SMALL LETTER U WITH GRAVE
- 00FA LATIN SMALL LETTER U WITH ACUTE
- 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
- 00FC LATIN SMALL LETTER U WITH DIAERESIS
- 00FD LATIN SMALL LETTER Y WITH ACUTE
- 00FE LATIN SMALL LETTER THORN
- 00FF LATIN SMALL LETTER Y WITH DIAERESIS
-Note that three of these have no corresponding Latin-1 upper-case character:
- 00B5 MICRO SIGN
- 00DF LATIN SMALL LETTER SHARP S
- 00FF LATIN SMALL LETTER Y WITH DIAERESIS
-(The compatibility micro character uppercases to the non-Latin-1 Greek capital
-mu; the German sharp s character uppercases to the pair of characters "SS,"
-and the capital y-with-diaeresis is non-Latin-1.)
-
-(Note that the Java spec for lowercase characters given at
- http://java.sun.com/docs/books/jls/html/javalang.doc4.html#14345
-is inconsistent. U+00B5 MICRO SIGN fulfills the requirements for a lower-case
-character (as of Unicode 3.0), but is not given in the numeric list of
-lower-case character codes.)
-
-(Note that the Java spec for isLowerCase() given at
- http://java.sun.com/products/jdk/1.2/docs/api/java/lang/Character.html#isLowerCase(char)
-gives three mutually inconsistent definitions of "lower case." The first is
-the definition used in this SRFI. Following text says "A character is
-considered to be lowercase if and only if it is specified to be lowercase by
-the Unicode 2.0 standard (category Ll in the Unicode specification data
-file)." The former spec excludes U+00AA FEMININE ORDINAL INDICATOR and
-U+00BA MASCULINE ORDINAL INDICATOR; the later spec includes them. Finally,
-the spec enumerates a list of characters in the Latin-1 subset; this list
-excludes U+00B5 MICRO SIGN, which is included in both of the previous specs.)
-
-
-** char-set:upper-case
-======================
-For Unicode, we follow Java's specification: a character is uppercase if
- + it is not in the range [U+2000,U+2FFF], and
- + the Unicode attribute table does not give an uppercase mapping for it
- (this excludes titlecase characters), and
- + at least one of the following is true:
- - the Unicode attribute table gives a mapping to lowercase
- for the character, or
- - the name for the character in the Unicode attribute table contains
- the words "CAPITAL LETTER" or "CAPITAL LIGATURE".
-
-The upper-case ASCII characters are
- ABCDEFGHIJKLMNOPQRSTUVWXYZ
-Latin-1 adds another 30 upper-case characters to the ASCII set:
- 00C0 LATIN CAPITAL LETTER A WITH GRAVE
- 00C1 LATIN CAPITAL LETTER A WITH ACUTE
- 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
- 00C3 LATIN CAPITAL LETTER A WITH TILDE
- 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
- 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
- 00C6 LATIN CAPITAL LETTER AE
- 00C7 LATIN CAPITAL LETTER C WITH CEDILLA
- 00C8 LATIN CAPITAL LETTER E WITH GRAVE
- 00C9 LATIN CAPITAL LETTER E WITH ACUTE
- 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
- 00CB LATIN CAPITAL LETTER E WITH DIAERESIS
- 00CC LATIN CAPITAL LETTER I WITH GRAVE
- 00CD LATIN CAPITAL LETTER I WITH ACUTE
- 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
- 00CF LATIN CAPITAL LETTER I WITH DIAERESIS
- 00D0 LATIN CAPITAL LETTER ETH
- 00D1 LATIN CAPITAL LETTER N WITH TILDE
- 00D2 LATIN CAPITAL LETTER O WITH GRAVE
- 00D3 LATIN CAPITAL LETTER O WITH ACUTE
- 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
- 00D5 LATIN CAPITAL LETTER O WITH TILDE
- 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
- 00D8 LATIN CAPITAL LETTER O WITH STROKE
- 00D9 LATIN CAPITAL LETTER U WITH GRAVE
- 00DA LATIN CAPITAL LETTER U WITH ACUTE
- 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
- 00DC LATIN CAPITAL LETTER U WITH DIAERESIS
- 00DD LATIN CAPITAL LETTER Y WITH ACUTE
- 00DE LATIN CAPITAL LETTER THORN
-
-
-** char-set:title-case
-======================
-In Unicode, a character is titlecase if it has the category Lt in
-the character attribute database. There are very few of these characters;
-here is the entire 31-character list as of Unicode 3.0:
-
- 01C5 LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
- 01C8 LATIN CAPITAL LETTER L WITH SMALL LETTER J
- 01CB LATIN CAPITAL LETTER N WITH SMALL LETTER J
- 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z
- 1F88 GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
- 1F89 GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
- 1F8A GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
- 1F8B GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
- 1F8C GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
- 1F8D GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
- 1F8E GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
- 1F8F GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
- 1F98 GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
- 1F99 GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
- 1F9A GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
- 1F9B GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
- 1F9C GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
- 1F9D GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
- 1F9E GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
- 1F9F GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
- 1FA8 GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
- 1FA9 GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
- 1FAA GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
- 1FAB GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
- 1FAC GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
- 1FAD GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
- 1FAE GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
- 1FAF GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
- 1FBC GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
- 1FCC GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
- 1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
-
-There are no ASCII or Latin-1 titlecase characters.
-
-
-** char-set:letter
-==================
-In Unicode, a letter is any character with one of the letter categories
-(Lu, Ll, Lt, Lm, Lo) in the Unicode character database.
-
-There are 52 ASCII letters
- abcdefghijklmnopqrstuvwxyz
- ABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-There are 117 Latin-1 letters. These are the 115 characters that are
-members of the Latin-1 CHAR-SET:LOWER-CASE and CHAR-SET:UPPER-CASE sets,
-plus
- 00AA FEMININE ORDINAL INDICATOR
- 00BA MASCULINE ORDINAL INDICATOR
-(These two letters are considered lower-case by Unicode, but not by
-Java or SRFI 14.)
-
-** char-set:digit
-=================
-In Unicode, a character is a digit if it has the category Nd in
-the character attribute database. In Latin-1 and ASCII, the only
-such characters are 0123456789. In Unicode, there are other digit
-characters in other code blocks, such as Gujarati digits and Tibetan
-digits.
-
-
-** char-set:hex-digit
-=====================
-The only hex digits are 0123456789abcdefABCDEF.
-
-
-** char-set:letter+digit
-========================
-The union of CHAR-SET:LETTER and CHAR-SET:DIGIT.
-
-
-** char-set:graphic
-===================
-A graphic character is one that would put ink on paper. The ASCII and Latin-1
-graphic characters are the members of
- CHAR-SET:LETTER
- CHAR-SET:DIGIT
- CHAR-SET:PUNCTUATION
- CHAR-SET:SYMBOL
-
-
-** char-set:printing
-====================
-A printing character is one that would occupy space when printed, i.e.,
-a graphic character or a space character. CHAR-SET:PRINTING is the union
-of CHAR-SET:WHITESPACE and CHAR-SET:GRAPHIC.
-
-
-** char-set:whitespace
-======================
-In Unicode, a whitespace character is either
- - a character with one of the space, line, or paragraph separator categories
- (Zs, Zl or Zp) of the Unicode character database.
- - U+0009 Horizontal tabulation (\t control-I)
- - U+000A Line feed (\n control-J)
- - U+000B Vertical tabulation (\v control-K)
- - U+000C Form feed (\f control-L)
- - U+000D Carriage return (\r control-M)
-
-There are 24 whitespace characters in Unicode 3.0:
- 0009 HORIZONTAL TABULATION \t control-I
- 000A LINE FEED \n control-J
- 000B VERTICAL TABULATION \v control-K
- 000C FORM FEED \f control-L
- 000D CARRIAGE RETURN \r control-M
- 0020 SPACE Zs
- 00A0 NO-BREAK SPACE Zs
- 1680 OGHAM SPACE MARK Zs
- 2000 EN QUAD Zs
- 2001 EM QUAD Zs
- 2002 EN SPACE Zs
- 2003 EM SPACE Zs
- 2004 THREE-PER-EM SPACE Zs
- 2005 FOUR-PER-EM SPACE Zs
- 2006 SIX-PER-EM SPACE Zs
- 2007 FIGURE SPACE Zs
- 2008 PUNCTUATION SPACE Zs
- 2009 THIN SPACE Zs
- 200A HAIR SPACE Zs
- 200B ZERO WIDTH SPACE Zs
- 2028 LINE SEPARATOR Zl
- 2029 PARAGRAPH SEPARATOR Zp
- 202F NARROW NO-BREAK SPACE Zs
- 3000 IDEOGRAPHIC SPACE Zs
-
-The ASCII whitespace characters are the first six characters in the above list
--- line feed, horizontal tabulation, vertical tabulation, form feed, carriage
-return, and space. These are also exactly the characters recognised by the
-Posix isspace() procedure. Latin-1 adds the no-break space.
-
-Note: Java's isWhitespace() method is incompatible, including
- 001C FILE SEPARATOR (control-\)
- 001D GROUP SEPARATOR (control-])
- 001E RECORD SEPARATOR (control-^)
- 001F UNIT SEPARATOR (control-_)
-and excluding
- 00A0 NO-BREAK SPACE
-
-Java's excluding the no-break space means that tokenizers can simply break
-character streams at "whitespace" boundaries. However, the exclusion introduces
-exceptions in other places, e.g. CHAR-SET:PRINTING is no longer simply the
-union of CHAR-SET:GRAPHIC and CHAR-SET:WHITESPACE.
-
-
-** char-set:iso-control
-=======================
-The ISO control characters are the Unicode/Latin-1 characters in the ranges
-[U+0000,U+001F] and [U+007F,U+009F].
-
-ASCII restricts this set to the characters in the range [U+0000,U+001F]
-plus the character U+007F.
-
-Note that Unicode defines other control characters which do not belong to this
-set (hence the qualifying prefix "iso-" in the name). This restriction is
-compatible with the Java IsISOControl() method.
-
-
-** char-set:punctuation
-=======================
-In Unicode, a punctuation character is any character that has one of the
-punctuation categories in the Unicode character database (Pc, Pd, Ps,
-Pe, Pi, Pf, or Po.)
-
-ASCII has 23 punctuation characters:
- !"#%&'()*,-./:;?@[\]_{}
-
-Latin-1 adds six more:
- 00A1 INVERTED EXCLAMATION MARK
- 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- 00AD SOFT HYPHEN
- 00B7 MIDDLE DOT
- 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- 00BF INVERTED QUESTION MARK
-
-Note that the nine ASCII characters $+<=>^`|~ are *not* punctuation.
-They are "symbols."
-
-
-** char-set:symbol
-==================
-In Unicode, a symbol is any character that has one of the symbol categories
-in the Unicode character database (Sm, Sc, Sk, or So). There are nine ASCII
-symbol characters:
- $+<=>^`|~
-
-Latin-1 adds 18 more:
- 00A2 CENT SIGN
- 00A3 POUND SIGN
- 00A4 CURRENCY SIGN
- 00A5 YEN SIGN
- 00A6 BROKEN BAR
- 00A7 SECTION SIGN
- 00A8 DIAERESIS
- 00A9 COPYRIGHT SIGN
- 00AC NOT SIGN
- 00AE REGISTERED SIGN
- 00AF MACRON
- 00B0 DEGREE SIGN
- 00B1 PLUS-MINUS SIGN
- 00B4 ACUTE ACCENT
- 00B6 PILCROW SIGN
- 00B8 CEDILLA
- 00D7 MULTIPLICATION SIGN
- 00F7 DIVISION SIGN
-
-
-** char-set:blank
-=================
-Blank chars are horizontal whitespace. In Unicode, a blank character is either
- - a character with the space separator category (Zs) in the Unicode
- character database.
- - U+0009 Horizontal tabulation (\t control-I)
-
-There are eighteen blank characters in Unicode 3.0:
- 0009 HORIZONTAL TABULATION \t control-I
- 0020 SPACE Zs
- 00A0 NO-BREAK SPACE Zs
- 1680 OGHAM SPACE MARK Zs
- 2000 EN QUAD Zs
- 2001 EM QUAD Zs
- 2002 EN SPACE Zs
- 2003 EM SPACE Zs
- 2004 THREE-PER-EM SPACE Zs
- 2005 FOUR-PER-EM SPACE Zs
- 2006 SIX-PER-EM SPACE Zs
- 2007 FIGURE SPACE Zs
- 2008 PUNCTUATION SPACE Zs
- 2009 THIN SPACE Zs
- 200A HAIR SPACE Zs
- 200B ZERO WIDTH SPACE Zs
- 202F NARROW NO-BREAK SPACE Zs
- 3000 IDEOGRAPHIC SPACE Zs
-
-The ASCII blank characters are the first two characters above --
-horizontal tab and space. Latin-1 adds the no-break space.
-
-Java doesn't have the concept of "blank" characters, so there are no
-compatibility issues.
-
-
--------------------------------------------------------------------------------
-* Reference implementation
---------------------------
-
-This SRFI comes with a reference implementation. It resides at:
- http://srfi.schemers.org/srfi-14/srfi-14.scm
-I have placed this source on the Net with an unencumbered, "open" copyright.
-Some of the code in the reference implementation bears a distant family
-relation to the MIT Scheme implementation, and being derived from that code,
-is covered by the MIT Scheme copyright (which is a generic BSD-style
-open-source copyright -- see the source file for details). The remainder of
-the code was written by myself for scsh or for this SRFI; I have placed this
-code under the scsh copyright, which is also a generic BSD-style open-source
-copyright.
-
-The code is written for portability and should be simple to port to
-any Scheme. It has only the following deviations from R4RS, clearly
-discussed in the comments:
- - an ERROR procedure;
- - the R5RS VALUES procedure for producing multiple return values;
- - a simple CHECK-ARG procedure for argument checking;
- - LET-OPTIONALS* and :OPTIONAL macros for for parsing, checking & defaulting
- optional arguments from rest lists;
- - The SRFI-19 DEFINE-RECORD-TYPE form;
- - BITWISE-AND for the hash function;
- - %LATIN1->CHAR & %CHAR->LATIN1.
-
-The library is written for clarity and well-commented; the current source is
-about 375 lines of source code and 375 lines of comments and white space.
-It is also written for efficiency. Fast paths are provided for common cases.
-
-This is not to say that the implementation can't be tuned up for
-a specific Scheme implementation. There are notes in comments addressing
-ways implementors can tune the reference implementation for performance.
-
-In short, I've written the reference implementation to make it as painless
-as possible for an implementor -- or a regular programmer -- to adopt this
-library and get good results with it.
-
-The code uses a rather simple-minded, inefficient representation for
-ASCII/Latin-1 char-sets -- a 256-character string. The character whose code is
-I is in the set if S[I] = ASCII 1 (soh, or ^a); not in the set if S[I] = ASCII
-0 (nul). A much faster and denser representation would be 16 or 32 bytes worth
-of bit string. A portable implementation using bit sets awaits standards for
-bitwise logical-ops and byte vectors.
-
-"Large" character types, such as Unicode, should use a sparse representation,
-taking care that the Latin-1 subset continues to be represented with a
-dense 32-byte bit set.
-
-
--------------------------------------------------------------------------------
-* Acknowledgements
-------------------
-
-The design of this library benefited greatly from the feedback provided during
-the SRFI discussion phase. Among those contributing thoughtful commentary and
-suggestions, both on the mailing list and by private discussion, were Paolo
-Amoroso, Lars Arvestad, Alan Bawden, Jim Bender, Dan Bornstein, Per Bothner,
-Will Clinger, Brian Denheyer, Kent Dybvig, Sergei Egorov, Marc Feeley,
-Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Arthur A. Gleckler, Ben
-Goetter, Sven Hartrumpf, Erik Hilsdale, Shiro Kawai, Richard Kelsey, Oleg
-Kiselyov, Bengt Kleberg, Donovan Kolbly, Bruce Korb, Shriram Krishnamurthi,
-Bruce Lewis, Tom Lord, Brad Lucier, Dave Mason, David Rush, Klaus Schilling,
-Jonathan Sobel, Mike Sperber, Mikael Staldal, Vladimir Tsyshevsky, Donald
-Welsh, and Mike Wilson. I am grateful to them for their assistance.
-
-I am also grateful the authors, implementors and documentors of all the systems
-mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted
-for their work in producing Web-accessible versions of the R5RS and Common
-Lisp spec, which was a tremendous aid.
-
-This is not to imply that these individuals necessarily endorse the final
-results, of course.
-
-During this document's long development period, great patience was exhibited
-by Mike Sperber, who is the editor for the SRFI, and by Hillary Sullivan,
-who is not.
-
--------------------------------------------------------------------------------
-* References & links
---------------------
-
-[Java]
- The following URLs provide documentation on relevant Java classes.
-
- http://java.sun.com/products/jdk/1.2/docs/api/java/lang/Character.html
- http://java.sun.com/products/jdk/1.2/docs/api/java/lang/String.html
- http://java.sun.com/products/jdk/1.2/docs/api/java/lang/StringBuffer.html
- http://java.sun.com/products/jdk/1.2/docs/api/java/text/Collator.html
- http://java.sun.com/products/jdk/1.2/docs/api/java/text/package-summary.html
-
-[MIT-Scheme]
- http://www.swiss.ai.mit.edu/projects/scheme/
-
-[R5RS]
- Revised^5 report on the algorithmic language Scheme.
- R. Kelsey, W. Clinger, J. Rees (editors).
- Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998.
- and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998.
-
- Available at http://www.schemers.org/Documents/Standards/
-
-[SRFI]
- The SRFI web site.
- http://srfi.schemers.org/
-
-[SRFI-14]
- SRFI-14: Character-set library.
- http://srfi.schemers.org/srfi-14/
-
- This document, in HTML:
- http://srfi.schemers.org/srfi-14/srfi-14.html
- This document, in plain text format:
- http://srfi.schemers.org/srfi-14/srfi-14.txt
- Source code for the reference implementation:
- http://srfi.schemers.org/srfi-14/srfi-14.scm
- Scheme 48 module specification, with typings:
- http://srfi.schemers.org/srfi-14/srfi-14-s48-module.scm
- Regression-test suite:
- http://srfi.schemers.org/srfi-14/srfi-14-tests.scm
-
-[Unicode]
- http://www.unicode.org/
-
-[UnicodeData]
- The Unicode character database.
- ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html
- ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
-
-
--------------------------------------------------------------------------------
-* Copyright
------------
-
-Certain portions of this document -- the specific, marked segments of text
-describing the R5RS procedures -- were adapted with permission from the R5RS
-report.
-
-All other text is copyright (C) Olin Shivers (1998, 1999).
-All Rights Reserved.
-
-This document and translations of it may be copied and furnished to others,
-and derivative works that comment on or otherwise explain it or assist in its
-implementation may be prepared, copied, published and distributed, in whole or
-in part, without restriction of any kind, provided that the above copyright
-notice and this paragraph are included on all such copies and derivative
-works. However, this document itself may not be modified in any way, such as
-by removing the copyright notice or references to the Scheme Request For
-Implementation process or editors, except as needed for the purpose of
-developing SRFIs in which case the procedures for copyrights defined in the
-SRFI process must be followed, or as required to translate it into languages
-other than English.
-
-The limited permissions granted above are perpetual and will not be revoked by
-the authors or their successors or assigns.
-
-This document and the information contained herein is provided on an "AS IS"
-basis and THE AUTHORS AND THE SRFI EDITORS DISCLAIM ALL WARRANTIES, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
-INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
-
-
--------------------------------------------------------------------------------
-* Ispell "buffer local" dictionary
-----------------------------------
-
-Ispell dumps "buffer local" words here. Please ignore.
-
- LocalWords: SRFI Unicode API RS lib ARG ascii xor diff defs Generalise cs CSi
- LocalWords: kons knil proc upcase cset lp eof lis cdr pred ary CHARi Posix op
- LocalWords: uniquified DrScheme soh nul HTML srfi html txt scm Clinger Rees
- LocalWords: SIGPLAN refs ucs iso CS's downcase IEC conformant JIS ASCII URL
- LocalWords: FFF abcdefghijklmnopqrstuvwxyz DF DIAERESIS AE EA EB EC EE EF ETH
- LocalWords: FA FB FC FD FF SS diaeresis isLowerCase Ll AA BA titlecase CA CB
- LocalWords: CC CD CE CF DA DC DD Lt CARON PSILI PROSGEGRAMMENI DASIA VARIA Lu
- LocalWords: OXIA PERISPOMENI FAA FAB FAC FAE FAF FBC FFC Lm Lo abcdefABCDEF
- LocalWords: Zs Zl Zp OGHAM IDEOGRAPHIC recognised isspace isWhitespace Pc Pd
- LocalWords: tokenizers IsISOControl Ps Pe Pf AB BB BF Sm Sc Sk AC AF MACRON
- LocalWords: PILCROW obj EQ scsh ops UnicodeData Paolo Amoroso Arvestad Bawden
- LocalWords: Bornstein Bothner Denheyer Dybvig Egorov Feeley Matthias Flatt eq
- LocalWords: Felleisen Gleckler Goetter Sven Hartrumpf Hilsdale Shiro Kawai
- LocalWords: Kiselyov Bengt Kleberg Kolbly Korb Shriram Krishnamurthi Lucier
- LocalWords: Schilling Sobel Mikael Staldal Tsyshevsky documentors Jaffer ans
- LocalWords: Sperber bignum fixnum ref init doc dict subform
diff --git a/scsh/lib/cset-obsolete.scm b/scsh/lib/cset-obsolete.scm
deleted file mode 100644
index cb041c4..0000000
--- a/scsh/lib/cset-obsolete.scm
+++ /dev/null
@@ -1,56 +0,0 @@
-;;; Support for obsolete, deprecated 0.5.2 char-set procedures.
-;;; Will go away in a future release.
-
-(define-interface obsolete-char-set-interface
- (export char-set-members ; char-set->list
- chars->char-set ; list->char-set
- ascii-range->char-set ; ucs-range->char-set (not exact)
- predicate->char-set ; char-set-filter (not exact)
- ;->char-set ; no longer handles a predicate
- char-set-every? ; char-set-every
- char-set-any? ; char-set-any
-
- char-set-invert ; char-set-complement
- char-set-invert! ; char-set-complement!
-
- char-set:alphabetic ; char-set:letter
- char-set:numeric ; char-set:digit
- char-set:alphanumeric ; char-set:letter+digit
- char-set:control)) ; char-set:iso-control
-
-
-(define-structure obsolete-char-set-lib obsolete-char-set-interface
- (open scsh-utilities char-set-lib scheme)
- (begin
-
- (define char-set-members
- (deprecated-proc char-set->list 'char-set-members
- "Use CHAR-SET->LIST instead."))
- (define chars->char-set
- (deprecated-proc list->char-set 'chars->char-set
- "Use LIST->CHAR-SET instead."))
- (define ascii-range->char-set
- (deprecated-proc (lambda (lower upper) (ucs-range->char-set lower upper #t))
- 'ascii-range->char-set
- "Use UCS-RANGE->CHAR-SET instead."))
- (define predicate->char-set
- (deprecated-proc (lambda (pred) (char-set-filter pred char-set:full))
- 'predicate->char-set
- "Change code to use CHAR-SET-FILTER."))
- (define char-set-every?
- (deprecated-proc char-set-every 'char-set-every?
- "Use CHAR-SET-EVERY instead."))
- (define char-set-any?
- (deprecated-proc char-set-every 'char-set-any?
- "Use CHAR-SET-ANY instead."))
- (define char-set-invert
- (deprecated-proc char-set-complement 'char-set-invert
- "Use CHAR-SET-COMPLEMENT instead."))
- (define char-set-invert!
- (deprecated-proc char-set-complement! 'char-set-invert!
- "Use CHAR-SET-COMPLEMENT! instead."))
-
- (define char-set:alphabetic char-set:letter)
- (define char-set:numeric char-set:digit)
- (define char-set:alphanumeric char-set:letter+digit)
- (define char-set:control char-set:iso-control)))
diff --git a/scsh/lib/cset-package.scm b/scsh/lib/cset-package.scm
deleted file mode 100644
index 0b0dcd7..0000000
--- a/scsh/lib/cset-package.scm
+++ /dev/null
@@ -1,151 +0,0 @@
-;;; SRFI-14 interface for Scheme48 -*- Scheme -*-
-;;;
-;;; Complete interface spec for the SRFI-14 char-set-lib library in the
-;;; Scheme48 interface and module language. The interface is fully typed, in
-;;; the Scheme48 type notation. The structure definitions also provide a
-;;; formal description of the external dependencies of the source code.
-
-(define-interface char-set-interface
- (export (char-set? (proc (:value) :boolean))
- ((char-set= char-set<=) (proc (&rest :value) :boolean))
-
- (char-set-hash (proc (:value &opt :exact-integer) :exact-integer))
-
- ;; Cursors are exact integers in the reference implementation.
- ;; These typings would be different with a different cursor
- ;; implementation.
- ;; Too bad Scheme doesn't have abstract data types.
- (char-set-cursor (proc (:value) :exact-integer))
- (char-set-ref (proc (:value :exact-integer) :char))
- (char-set-cursor-next (proc (:value :exact-integer) :exact-integer))
- (end-of-char-set? (proc (:value) :boolean))
-
- (char-set-fold (proc ((proc (:char :value) :value) :value :value)
- :value))
- (char-set-unfold (proc ((proc (:value) :boolean)
- (proc (:value) :value)
- (proc (:value) :value)
- :value
- &opt :value)
- :value))
-
- (char-set-unfold! (proc ((proc (:value) :boolean)
- (proc (:value) :value)
- (proc (:value) :value)
- :value :value)
- :value))
-
- (char-set-for-each (proc ((proc (:char) :values) :value) :unspecific))
- (char-set-map (proc ((proc (:char) :char) :value) :value))
-
- (char-set-copy (proc (:value) :value))
-
- (char-set (proc (&rest :char) :value))
-
- (list->char-set (proc (:value &opt :value) :value))
- (list->char-set! (proc (:value :value) :value))
-
- (string->char-set (proc (:value &opt :value) :value))
- (string->char-set! (proc (:value :value) :value))
-
- (ucs-range->char-set (proc (:exact-integer :exact-integer &opt
- :boolean :value)
- :value))
- (ucs-range->char-set! (proc (:exact-integer :exact-integer
- :boolean :value)
- :value))
-
- (char-set-filter (proc ((proc (:char) :boolean) :value &opt :value) :value))
- (char-set-filter! (proc ((proc (:char) :boolean) :value :value) :value))
-
- (->char-set (proc (:value) :value))
-
- (char-set-size (proc (:value) :exact-integer))
- (char-set-count (proc ((proc (:char) :boolean) :value) :exact-integer))
- (char-set-contains? (proc (:value :value) :boolean))
-
- (char-set-every (proc ((proc (:char) :boolean) :value) :boolean))
- (char-set-any (proc ((proc (:char) :boolean) :value) :value))
-
- ((char-set-adjoin char-set-delete
- char-set-adjoin! char-set-delete!)
- (proc (:value &rest :char) :value))
-
- (char-set->list (proc (:value) :value))
- (char-set->string (proc (:value) :string))
-
- (char-set-complement (proc (:value) :value))
- ((char-set-union char-set-intersection char-set-xor)
- (proc (&rest :value) :value))
-
- (char-set-difference (proc (:value &opt :value) :value))
-
- (char-set-diff+intersection (proc (:value &rest :value)
- (some-values :value :value)))
-
- (char-set-complement! (proc (:value) :value))
-
- ((char-set-union! char-set-intersection!
- char-set-xor! char-set-difference!)
- (proc (:value &opt :value) :value))
-
- (char-set-diff+intersection! (proc (:value :value &rest :value)
- (some-values :value :value)))
-
- char-set:lower-case
- char-set:upper-case
- char-set:letter
- char-set:digit
- char-set:letter+digit
- char-set:graphic
- char-set:printing
- char-set:whitespace
- char-set:blank
- char-set:iso-control
- char-set:punctuation
- char-set:symbol
- char-set:hex-digit
- char-set:ascii
- char-set:empty
- char-set:full
- ))
-
-; rdelim.scm gets into the innards of char-sets.
-(define-interface scsh-char-set-low-level-interface
- (export (char-set:s (proc (:value) :string))))
-
-(define-structures ((char-set-lib char-set-interface)
- (scsh-char-set-low-level-lib scsh-char-set-low-level-interface))
- (open error-package ; ERROR procedure
- let-opt ; LET-OPTIONALS* and :OPTIONAL
- ascii ; CHAR->ASCII ASCII->CHAR
- bitwise ; BITWISE-AND
- jar-d-r-t-package ; DEFINE-RECORD-TYPE/JAR macro.
- scheme)
-
- (begin (define (check-arg pred val caller)
- (let lp ((val val))
- (if (pred val) val (lp (error "Bad argument" val pred caller)))))
-
- (define %latin1->char ascii->char) ; Works for S48
- (define %char->latin1 char->ascii) ; Works for S48
-
- ;; Here's a SRFI-19 d-r-t defined in terms of jar's almost-identical
- ;; d-r-t.
- (define-syntax define-record-type
- (syntax-rules ()
- ((define-record-type ?name ?stuff ...)
- (define-record-type/jar ?name ?name ?stuff ...)))))
-
- (files cset-lib)
- (optimize auto-integrate))
-
-;;; Import jar's DEFINE-RECORD-TYPE macro, and export it under the
-;;; name DEFINE-RECORD-TYPE/JAR.
-(define-structure jar-d-r-t-package (export (define-record-type/jar :syntax))
- (open define-record-types ; JAR's record macro
- scheme)
- (begin (define-syntax define-record-type/jar
- (syntax-rules ()
- ((define-record-type/jar ?stuff ...)
- (define-record-type ?stuff ...))))))
diff --git a/scsh/lib/cset-tests.scm b/scsh/lib/cset-tests.scm
deleted file mode 100644
index 0b96314..0000000
--- a/scsh/lib/cset-tests.scm
+++ /dev/null
@@ -1,200 +0,0 @@
-;;; This is a regression testing suite for the SRFI-14 char-set library.
-;;; Olin Shivers
-
-(let-syntax ((test (syntax-rules ()
- ((test form ...)
- (cond ((not form) (error "Test failed" 'form)) ...
- (else 'OK))))))
- (let ((vowel (lambda (c) (member c '(#\a #\e #\i #\o #\u)))))
-
-(test
- (not (char-set? 5))
-
- (char-set? (char-set #\a #\e #\i #\o #\u))
-
- (char-set=)
- (char-set= (char-set))
-
- (char-set= (char-set #\a #\e #\i #\o #\u)
- (string->char-set "ioeauaiii"))
-
- (not (char-set= (char-set #\e #\i #\o #\u)
- (string->char-set "ioeauaiii")))
-
- (char-set<=)
- (char-set<= (char-set))
-
- (char-set<= (char-set #\a #\e #\i #\o #\u)
- (string->char-set "ioeauaiii"))
-
- (char-set<= (char-set #\e #\i #\o #\u)
- (string->char-set "ioeauaiii"))
-
- (<= 0 (char-set-hash char-set:graphic 100) 99)
-
- (= 4 (char-set-fold (lambda (c i) (+ i 1)) 0
- (char-set #\e #\i #\o #\u #\e #\e)))
-
- (char-set= (string->char-set "eiaou2468013579999")
- (char-set-unfold null? car cdr '(#\a #\e #\i #\o #\u #\u #\u)
- char-set:digit))
-
- (char-set= (string->char-set "eiaou246801357999")
- (char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u)
- (string->char-set "0123456789")))
-
- (not (char-set= (string->char-set "eiaou246801357")
- (char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u)
- (string->char-set "0123456789"))))
-
- (let ((cs (string->char-set "0123456789")))
- (char-set-for-each (lambda (c) (set! cs (char-set-delete cs c)))
- (string->char-set "02468000"))
- (char-set= cs (string->char-set "97531")))
-
- (not (let ((cs (string->char-set "0123456789")))
- (char-set-for-each (lambda (c) (set! cs (char-set-delete cs c)))
- (string->char-set "02468"))
- (char-set= cs (string->char-set "7531"))))
-
- (char-set= (char-set-map char-upcase (string->char-set "aeiou"))
- (string->char-set "IOUAEEEE"))
-
- (not (char-set= (char-set-map char-upcase (string->char-set "aeiou"))
- (string->char-set "OUAEEEE")))
-
- (char-set= (char-set-copy (string->char-set "aeiou"))
- (string->char-set "aeiou"))
-
- (char-set= (char-set #\x #\y) (string->char-set "xy"))
- (not (char-set= (char-set #\x #\y #\z) (string->char-set "xy")))
-
- (char-set= (string->char-set "xy") (list->char-set '(#\x #\y)))
- (not (char-set= (string->char-set "axy") (list->char-set '(#\x #\y))))
-
- (char-set= (string->char-set "xy12345")
- (list->char-set '(#\x #\y) (string->char-set "12345")))
- (not (char-set= (string->char-set "y12345")
- (list->char-set '(#\x #\y) (string->char-set "12345"))))
-
- (char-set= (string->char-set "xy12345")
- (list->char-set! '(#\x #\y) (string->char-set "12345")))
- (not (char-set= (string->char-set "y12345")
- (list->char-set! '(#\x #\y) (string->char-set "12345"))))
-
- (char-set= (string->char-set "aeiou12345")
- (char-set-filter vowel? char-set:ascii (string->char-set "12345")))
- (not (char-set= (string->char-set "aeou12345")
- (char-set-filter vowel? char-set:ascii (string->char-set "12345"))))
-
- (char-set= (string->char-set "aeiou12345")
- (char-set-filter! vowel? char-set:ascii (string->char-set "12345")))
- (not (char-set= (string->char-set "aeou12345")
- (char-set-filter! vowel? char-set:ascii (string->char-set "12345"))))
-
-
- (char-set= (string->char-set "abcdef12345")
- (ucs-range->char-set 97 103 #t (string->char-set "12345")))
- (not (char-set= (string->char-set "abcef12345")
- (ucs-range->char-set 97 103 #t (string->char-set "12345"))))
-
- (char-set= (string->char-set "abcdef12345")
- (ucs-range->char-set! 97 103 #t (string->char-set "12345")))
- (not (char-set= (string->char-set "abcef12345")
- (ucs-range->char-set! 97 103 #t (string->char-set "12345"))))
-
-
- (char-set= (->char-set #\x)
- (->char-set "x")
- (->char-set (char-set #\x)))
-
- (not (char-set= (->char-set #\x)
- (->char-set "y")
- (->char-set (char-set #\x))))
-
- (= 10 (char-set-size (char-set-intersection char-set:ascii char-set:digit)))
-
- (= 5 (char-set-count vowel? char-set:ascii))
-
- (equal? '(#\x) (char-set->list (char-set #\x)))
- (not (equal? '(#\X) (char-set->list (char-set #\x))))
-
- (equal? "x" (char-set->string (char-set #\x)))
- (not (equal? "X" (char-set->string (char-set #\x))))
-
- (char-set-contains? (->char-set "xyz") #\x)
- (not (char-set-contains? (->char-set "xyz") #\a))
-
- (char-set-every char-lower-case? (->char-set "abcd"))
- (not (char-set-every char-lower-case? (->char-set "abcD")))
- (char-set-any char-lower-case? (->char-set "abcd"))
- (not (char-set-any char-lower-case? (->char-set "ABCD")))
-
- (char-set= (->char-set "ABCD")
- (let ((cs (->char-set "abcd")))
- (let lp ((cur (char-set-cursor cs)) (ans '()))
- (if (end-of-char-set? cur) (list->char-set ans)
- (lp (char-set-cursor-next cs cur)
- (cons (char-upcase (char-set-ref cs cur)) ans))))))
-
-
- (char-set= (char-set-adjoin (->char-set "123") #\x #\a)
- (->char-set "123xa"))
- (not (char-set= (char-set-adjoin (->char-set "123") #\x #\a)
- (->char-set "123x")))
- (char-set= (char-set-adjoin! (->char-set "123") #\x #\a)
- (->char-set "123xa"))
- (not (char-set= (char-set-adjoin! (->char-set "123") #\x #\a)
- (->char-set "123x")))
-
- (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2)
- (->char-set "13"))
- (not (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2)
- (->char-set "13a")))
- (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2)
- (->char-set "13"))
- (not (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2)
- (->char-set "13a")))
-
- (char-set= (char-set-intersection char-set:hex-digit (char-set-complement char-set:digit))
- (->char-set "abcdefABCDEF"))
- (char-set= (char-set-intersection! (char-set-complement! (->char-set "0123456789"))
- char-set:hex-digit)
- (->char-set "abcdefABCDEF"))
-
- (char-set= (char-set-union char-set:hex-digit
- (->char-set "abcdefghijkl"))
- (->char-set "abcdefABCDEFghijkl0123456789"))
- (char-set= (char-set-union! (->char-set "abcdefghijkl")
- char-set:hex-digit)
- (->char-set "abcdefABCDEFghijkl0123456789"))
-
- (char-set= (char-set-difference (->char-set "abcdefghijklmn")
- char-set:hex-digit)
- (->char-set "ghijklmn"))
- (char-set= (char-set-difference! (->char-set "abcdefghijklmn")
- char-set:hex-digit)
- (->char-set "ghijklmn"))
-
- (char-set= (char-set-xor (->char-set "0123456789")
- char-set:hex-digit)
- (->char-set "abcdefABCDEF"))
- (char-set= (char-set-xor! (->char-set "0123456789")
- char-set:hex-digit)
- (->char-set "abcdefABCDEF"))
-
- (call-with-values (lambda ()
- (char-set-diff+intersection char-set:hex-digit
- char-set:letter))
- (lambda (d i)
- (and (char-set= d (->char-set "0123456789"))
- (char-set= i (->char-set "abcdefABCDEF")))))
-
- (call-with-values (lambda ()
- (char-set-diff+intersection! (char-set-copy char-set:hex-digit)
- (char-set-copy char-set:letter)))
- (lambda (d i)
- (and (char-set= d (->char-set "0123456789"))
- (char-set= i (->char-set "abcdefABCDEF"))))))
-
-))
diff --git a/scsh/lib/list-lib.scm b/scsh/lib/list-lib.scm
deleted file mode 100644
index 7386882..0000000
--- a/scsh/lib/list-lib.scm
+++ /dev/null
@@ -1,1599 +0,0 @@
-;;; SRFI-1 list-processing library -*- Scheme -*-
-;;; Reference implementation
-;;;
-;;; Copyright (c) 1998, 1999 by Olin Shivers. You may do as you please with
-;;; this code as long as you do not remove this copyright notice or
-;;; hold me liable for its use. Please send bug reports to shivers@ai.mit.edu.
-;;; -Olin
-
-;;; This is a library of list- and pair-processing functions. I wrote it after
-;;; carefully considering the functions provided by the libraries found in
-;;; R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common
-;;; Lisp, Bigloo, guile, T, APL and the SML standard basis. It is a pretty
-;;; rich toolkit, providing a superset of the functionality found in any of
-;;; the various Schemes I considered.
-
-;;; This implementation is intended as a portable reference implementation
-;;; for SRFI-1. See the porting notes below for more information.
-
-;;; Revision history
-;;;;;;;;;;;;;;;;;;;;
-;;; This is version 1.1. 12/18/2000
-;;; Fixes a small bug in DELETE-DUPLICATES!.
-
-;;; Exported:
-;;; xcons tree-copy make-list list-tabulate cons* list-copy
-;;; proper-list? circular-list? dotted-list? not-pair? null-list? list=
-;;; circular-list length+
-;;; iota
-;;; first second third fourth fifth sixth seventh eighth ninth tenth
-;;; car+cdr
-;;; take drop
-;;; take-right drop-right
-;;; take! drop-right!
-;;; split-at split-at!
-;;; last last-pair
-;;; zip unzip1 unzip2 unzip3 unzip4 unzip5
-;;; count
-;;; append! append-reverse append-reverse! concatenate concatenate!
-;;; unfold fold pair-fold reduce
-;;; unfold-right fold-right pair-fold-right reduce-right
-;;; append-map append-map! map! pair-for-each filter-map map-in-order
-;;; filter partition remove
-;;; filter! partition! remove!
-;;; find find-tail any every list-index
-;;; take-while drop-while take-while!
-;;; span break span! break!
-;;; delete delete!
-;;; alist-cons alist-copy
-;;; delete-duplicates delete-duplicates!
-;;; alist-delete alist-delete!
-;;; reverse!
-;;; lset<= lset= lset-adjoin
-;;; lset-union lset-intersection lset-difference lset-xor lset-diff+intersection
-;;; lset-union! lset-intersection! lset-difference! lset-xor! lset-diff+intersection!
-;;;
-;;; In principle, the following R4RS list- and pair-processing procedures
-;;; are also part of this package's exports, although they are not defined
-;;; in this file:
-;;; Primitives: cons pair? null? car cdr set-car! set-cdr!
-;;; Non-primitives: list length append reverse cadr ... cddddr list-ref
-;;; memq memv assq assv
-;;; (The non-primitives are defined in this file, but commented out.)
-;;;
-;;; These R4RS procedures have extended definitions in SRFI-1 and are defined
-;;; in this file:
-;;; map for-each member assoc
-;;;
-;;; The remaining two R4RS list-processing procedures are not included:
-;;; list-tail (use drop)
-;;; list? (use proper-list?)
-
-
-;;; A note on recursion and iteration/reversal:
-;;; Many iterative list-processing algorithms naturally compute the elements
-;;; of the answer list in the wrong order (left-to-right or head-to-tail) from
-;;; the order needed to cons them into the proper answer (right-to-left, or
-;;; tail-then-head). One style or idiom of programming these algorithms, then,
-;;; loops, consing up the elements in reverse order, then destructively
-;;; reverses the list at the end of the loop. I do not do this. The natural
-;;; and efficient way to code these algorithms is recursively. This trades off
-;;; intermediate temporary list structure for intermediate temporary stack
-;;; structure. In a stack-based system, this improves cache locality and
-;;; lightens the load on the GC system. Don't stand on your head to iterate!
-;;; Recurse, where natural. Multiple-value returns make this even more
-;;; convenient, when the recursion/iteration has multiple state values.
-
-;;; Porting:
-;;; This is carefully tuned code; do not modify casually.
-;;; - It is careful to share storage when possible;
-;;; - Side-effecting code tries not to perform redundant writes.
-;;;
-;;; That said, a port of this library to a specific Scheme system might wish
-;;; to tune this code to exploit particulars of the implementation.
-;;; The single most important compiler-specific optimisation you could make
-;;; to this library would be to add rewrite rules or transforms to:
-;;; - transform applications of n-ary procedures (e.g. LIST=, CONS*, APPEND,
-;;; LSET-UNION) into multiple applications of a primitive two-argument
-;;; variant.
-;;; - transform applications of the mapping functions (MAP, FOR-EACH, FOLD,
-;;; ANY, EVERY) into open-coded loops. The killer here is that these
-;;; functions are n-ary. Handling the general case is quite inefficient,
-;;; requiring many intermediate data structures to be allocated and
-;;; discarded.
-;;; - transform applications of procedures that take optional arguments
-;;; into calls to variants that do not take optional arguments. This
-;;; eliminates unnecessary consing and parsing of the rest parameter.
-;;;
-;;; These transforms would provide BIG speedups. In particular, the n-ary
-;;; mapping functions are particularly slow and cons-intensive, and are good
-;;; candidates for tuning. I have coded fast paths for the single-list cases,
-;;; but what you really want to do is exploit the fact that the compiler
-;;; usually knows how many arguments are being passed to a particular
-;;; application of these functions -- they are usually explicitly called, not
-;;; passed around as higher-order values. If you can arrange to have your
-;;; compiler produce custom code or custom linkages based on the number of
-;;; arguments in the call, you can speed these functions up a *lot*. But this
-;;; kind of compiler technology no longer exists in the Scheme world as far as
-;;; I can see.
-;;;
-;;; Note that this code is, of course, dependent upon standard bindings for
-;;; the R5RS procedures -- i.e., it assumes that the variable CAR is bound
-;;; to the procedure that takes the car of a list. If your Scheme
-;;; implementation allows user code to alter the bindings of these procedures
-;;; in a manner that would be visible to these definitions, then there might
-;;; be trouble. You could consider horrible kludgery along the lines of
-;;; (define fact
-;;; (let ((= =) (- -) (* *))
-;;; (letrec ((real-fact (lambda (n)
-;;; (if (= n 0) 1 (* n (real-fact (- n 1)))))))
-;;; real-fact)))
-;;; Or you could consider shifting to a reasonable Scheme system that, say,
-;;; has a module system protecting code from this kind of lossage.
-;;;
-;;; This code does a fair amount of run-time argument checking. If your
-;;; Scheme system has a sophisticated compiler that can eliminate redundant
-;;; error checks, this is no problem. However, if not, these checks incur
-;;; some performance overhead -- and, in a safe Scheme implementation, they
-;;; are in some sense redundant: if we don't check to see that the PROC
-;;; parameter is a procedure, we'll find out anyway three lines later when
-;;; we try to call the value. It's pretty easy to rip all this argument
-;;; checking code out if it's inappropriate for your implementation -- just
-;;; nuke every call to CHECK-ARG.
-;;;
-;;; On the other hand, if you *do* have a sophisticated compiler that will
-;;; actually perform soft-typing and eliminate redundant checks (Rice's systems
-;;; being the only possible candidate of which I'm aware), leaving these checks
-;;; in can *help*, since their presence can be elided in redundant cases,
-;;; and in cases where they are needed, performing the checks early, at
-;;; procedure entry, can "lift" a check out of a loop.
-;;;
-;;; Finally, I have only checked the properties that can portably be checked
-;;; with R5RS Scheme -- and this is not complete. You may wish to alter
-;;; the CHECK-ARG parameter checks to perform extra, implementation-specific
-;;; checks, such as procedure arity for higher-order values.
-;;;
-;;; The code has only these non-R4RS dependencies:
-;;; A few calls to an ERROR procedure;
-;;; Uses of the R5RS multiple-value procedure VALUES and the m-v binding
-;;; RECEIVE macro (which isn't R5RS, but is a trivial macro).
-;;; Many calls to a parameter-checking procedure check-arg:
-;;; (define (check-arg pred val caller)
-;;; (let lp ((val val))
-;;; (if (pred val) val (lp (error "Bad argument" val pred caller)))))
-;;; A few uses of the LET-OPTIONAL and :OPTIONAL macros for parsing
-;;; optional arguments.
-;;;
-;;; Most of these procedures use the NULL-LIST? test to trigger the
-;;; base case in the inner loop or recursion. The NULL-LIST? function
-;;; is defined to be a careful one -- it raises an error if passed a
-;;; non-nil, non-pair value. The spec allows an implementation to use
-;;; a less-careful implementation that simply defines NULL-LIST? to
-;;; be NOT-PAIR?. This would speed up the inner loops of these procedures
-;;; at the expense of having them silently accept dotted lists.
-
-;;; A note on dotted lists:
-;;; I, personally, take the view that the only consistent view of lists
-;;; in Scheme is the view that *everything* is a list -- values such as
-;;; 3 or "foo" or 'bar are simply empty dotted lists. This is due to the
-;;; fact that Scheme actually has no true list type. It has a pair type,
-;;; and there is an *interpretation* of the trees built using this type
-;;; as lists.
-;;;
-;;; I lobbied to have these list-processing procedures hew to this
-;;; view, and accept any value as a list argument. I was overwhelmingly
-;;; overruled during the SRFI discussion phase. So I am inserting this
-;;; text in the reference lib and the SRFI spec as a sort of "minority
-;;; opinion" dissent.
-;;;
-;;; Many of the procedures in this library can be trivially redefined
-;;; to handle dotted lists, just by changing the NULL-LIST? base-case
-;;; check to NOT-PAIR?, meaning that any non-pair value is taken to be
-;;; an empty list. For most of these procedures, that's all that is
-;;; required.
-;;;
-;;; However, we have to do a little more work for some procedures that
-;;; *produce* lists from other lists. Were we to extend these procedures to
-;;; accept dotted lists, we would have to define how they terminate the lists
-;;; produced as results when passed a dotted list. I designed a coherent set
-;;; of termination rules for these cases; this was posted to the SRFI-1
-;;; discussion list. I additionally wrote an earlier version of this library
-;;; that implemented that spec. It has been discarded during later phases of
-;;; the definition and implementation of this library.
-;;;
-;;; The argument *against* defining these procedures to work on dotted
-;;; lists is that dotted lists are the rare, odd case, and that by
-;;; arranging for the procedures to handle them, we lose error checking
-;;; in the cases where a dotted list is passed by accident -- e.g., when
-;;; the programmer swaps a two arguments to a list-processing function,
-;;; one being a scalar and one being a list. For example,
-;;; (member '(1 3 5 7 9) 7)
-;;; This would quietly return #f if we extended MEMBER to accept dotted
-;;; lists.
-;;;
-;;; The SRFI discussion record contains more discussion on this topic.
-
-
-;;; Constructors
-;;;;;;;;;;;;;;;;
-
-;;; Occasionally useful as a value to be passed to a fold or other
-;;; higher-order procedure.
-(define (xcons d a) (cons a d))
-
-;;;; Recursively copy every cons.
-;(define (tree-copy x)
-; (let recur ((x x))
-; (if (not (pair? x)) x
-; (cons (recur (car x)) (recur (cdr x))))))
-
-;;; Make a list of length LEN.
-
-(define (make-list len . maybe-elt)
- (check-arg (lambda (n) (and (integer? n) (>= n 0))) len make-list)
- (let ((elt (cond ((null? maybe-elt) #f) ; Default value
- ((null? (cdr maybe-elt)) (car maybe-elt))
- (else (error "Too many arguments to MAKE-LIST"
- (cons len maybe-elt))))))
- (do ((i len (- i 1))
- (ans '() (cons elt ans)))
- ((<= i 0) ans))))
-
-
-;(define (list . ans) ans) ; R4RS
-
-
-;;; Make a list of length LEN. Elt i is (PROC i) for 0 <= i < LEN.
-
-(define (list-tabulate len proc)
- (check-arg (lambda (n) (and (integer? n) (>= n 0))) len list-tabulate)
- (check-arg procedure? proc list-tabulate)
- (do ((i (- len 1) (- i 1))
- (ans '() (cons (proc i) ans)))
- ((< i 0) ans)))
-
-;;; (cons* a1 a2 ... an) = (cons a1 (cons a2 (cons ... an)))
-;;; (cons* a1) = a1 (cons* a1 a2 ...) = (cons a1 (cons* a2 ...))
-;;;
-;;; (cons first (unfold not-pair? car cdr rest values))
-
-(define (cons* first . rest)
- (let recur ((x first) (rest rest))
- (if (pair? rest)
- (cons x (recur (car rest) (cdr rest)))
- x)))
-
-;;; (unfold not-pair? car cdr lis values)
-
-(define (list-copy lis)
- (let recur ((lis lis))
- (if (pair? lis)
- (cons (car lis) (recur (cdr lis)))
- lis)))
-
-;;; IOTA count [start step] (start start+step ... start+(count-1)*step)
-
-(define (iota count . maybe-start+step)
- (check-arg integer? count iota)
- (if (< count 0) (error "Negative step count" iota count))
- (let-optionals maybe-start+step ((start 0) (step 1))
- (check-arg number? start iota)
- (check-arg number? step iota)
- (let ((last-val (+ start (* (- count 1) step))))
- (do ((count count (- count 1))
- (val last-val (- val step))
- (ans '() (cons val ans)))
- ((<= count 0) ans)))))
-
-;;; I thought these were lovely, but the public at large did not share my
-;;; enthusiasm...
-;;; :IOTA to (0 ... to-1)
-;;; :IOTA from to (from ... to-1)
-;;; :IOTA from to step (from from+step ...)
-
-;;; IOTA: to (1 ... to)
-;;; IOTA: from to (from+1 ... to)
-;;; IOTA: from to step (from+step from+2step ...)
-
-;(define (%parse-iota-args arg1 rest-args proc)
-; (let ((check (lambda (n) (check-arg integer? n proc))))
-; (check arg1)
-; (if (pair? rest-args)
-; (let ((arg2 (check (car rest-args)))
-; (rest (cdr rest-args)))
-; (if (pair? rest)
-; (let ((arg3 (check (car rest)))
-; (rest (cdr rest)))
-; (if (pair? rest) (error "Too many parameters" proc arg1 rest-args)
-; (values arg1 arg2 arg3)))
-; (values arg1 arg2 1)))
-; (values 0 arg1 1))))
-;
-;(define (iota: arg1 . rest-args)
-; (receive (from to step) (%parse-iota-args arg1 rest-args iota:)
-; (let* ((numsteps (floor (/ (- to from) step)))
-; (last-val (+ from (* step numsteps))))
-; (if (< numsteps 0) (error "Negative step count" iota: from to step))
-; (do ((steps-left numsteps (- steps-left 1))
-; (val last-val (- val step))
-; (ans '() (cons val ans)))
-; ((<= steps-left 0) ans)))))
-;
-;
-;(define (:iota arg1 . rest-args)
-; (receive (from to step) (%parse-iota-args arg1 rest-args :iota)
-; (let* ((numsteps (ceiling (/ (- to from) step)))
-; (last-val (+ from (* step (- numsteps 1)))))
-; (if (< numsteps 0) (error "Negative step count" :iota from to step))
-; (do ((steps-left numsteps (- steps-left 1))
-; (val last-val (- val step))
-; (ans '() (cons val ans)))
-; ((<= steps-left 0) ans)))))
-
-
-
-(define (circular-list val1 . vals)
- (let ((ans (cons val1 vals)))
- (set-cdr! (last-pair ans) ans)
- ans))
-
-;;;
-Olin Shivers
-
-
-This SRFI is currently in ``final status. To see an explanation of each status that a SRFI can hold, see here.
-You can access the discussion via the archive of the mailing list.
-
-
-R5RS Scheme has an impoverished set of list-processing utilities, which is a
-problem for authors of portable code. This SRFI proposes a coherent and
-comprehensive set of list-processing procedures; it is accompanied by a
-reference implementation of the spec. The reference implementation is
-
-The set of basic list and pair operations provided by R4RS/R5RS Scheme is far
-from satisfactory. Because this set is so small and basic, most
-implementations provide additional utilities, such as a list-filtering
-function, or a "left fold" operator, and so forth. But, of course, this
-introduces incompatibilities -- different Scheme implementations provide
-different sets of procedures.
-
-
-I have designed a full-featured library of procedures for list processing.
-While putting this library together, I checked as many Schemes as I could get
-my hands on. (I have a fair amount of experience with several of these
-already.) I missed Chez -- no on-line manual that I can find -- but I hit most
-of the other big, full-featured Schemes. The complete list of list-processing
-systems I checked is:
-
-As a result, the library I am proposing is fairly rich.
-
-Following this initial design phase, this library went through several
-months of discussion on the SRFI mailing lists, and was altered in light
-of the ideas and suggestions put forth during this discussion.
-
-In parallel with designing this API, I have also written a reference
-implementation. I have placed this source on the Net with an unencumbered,
-"open" copyright. A few notes about the reference implementation:
-
-
- This is not to say that the implementation can't be tuned up for
- a specific Scheme implementation. There are notes in comments addressing
- ways implementors can tune the reference implementation for performance.
-
-In short, I've written the reference implementation to make it as painless
-as possible for an implementor -- or a regular programmer -- to adopt this
-library and get good results with it.
-
-
-
-
-Here is a short list of the procedures provided by the list-lib package.
-R5RS procedures are shown in
-bold;
-extended R5RS
- procedures, in bold italic.
-
-Four R4RS/R5RS list-processing procedures are extended by this library in
-backwards-compatible ways:
-
-The following R4RS/R5RS list- and pair-processing procedures are also part of
-list-lib's exports, as defined by the R5RS:
-
-The remaining two R4RS/R5RS list-processing
-procedures are not part of
-this library:
-
-
-A set of general criteria guided the design of this library.
-
-
-
-I don't require "destructive" (what I call "linear update") procedures to
-alter and recycle cons cells from the argument lists. They are allowed to, but
-not required to. (And the reference implementations I have written do
-recycle the argument lists.)
-
-
-List-filtering procedures such as
-Contrariwise, although the reference implementations of the list-filtering
-procedures share longest common tails between argument and answer lists,
-it not is part of the spec.
-
-Because lists are an inherently sequential data structure (unlike, say,
-vectors), list-inspection functions such as
-However, constructor functions, such as
-Predicates return useful true values wherever possible. Thus
-Functionality is provided both in pure and linear-update (potentially
-destructive) forms wherever this makes sense.
-
-No special status accorded Scheme's built-in equality functions.
-Any functionality provided in terms of
-Proper design counts for more than backwards compatibility, but I have tried,
-ceteris paribus,
-to be as backwards-compatible as possible with existing
-list-processing libraries, in order to facilitate porting old code to run as a
-client of the procedures in this library. Name choices and semantics are, for
-the most part, in agreement with existing practice in many current Scheme
-systems. I have indicated some incompatibilities in the following text.
-
-These procedures are not "sequence generic" -- i.e., procedures that
-operate on either vectors and lists. They are list-specific. I prefer to
-keep the library simple and focussed.
-
-I have named these procedures without a qualifying initial "list-" lexeme,
-which is in keeping with the existing set of list-processing utilities in
-Scheme.
-I follow the general Scheme convention (vector-length, string-ref) of
-placing the type-name before the action when naming procedures -- so
-we have
-I have generally followed a regular and consistent naming scheme, composing
-procedure names from a set of basic lexemes.
-
-
-
-
-Many procedures in this library have "pure" and "linear update" variants. A
-"pure" procedure has no side-effects, and in particular does not alter its
-arguments in any way. A "linear update" procedure is allowed -- but not
-required -- to side-effect its arguments in order to construct its
-result. "Linear update" procedures are typically given names ending with an
-exclamation point. So, for example,
-This is why we do not call these procedures "destructive" -- because they
-aren't required to be destructive. They are potentially destructive.
-
-What this means is that you may only apply linear-update procedures to
-values that you know are "dead" -- values that will never be used again
-in your program. This must be so, since you can't rely on the value passed
-to a linear-update procedure after that procedure has been called. It
-might be unchanged; it might be altered.
-
-The "linear" in "linear update" doesn't mean "linear time" or "linear space"
-or any sort of multiple-of-n kind of meaning. It's a fancy term that
-type theorists and pure functional programmers use to describe
-systems where you are only allowed to have exactly one reference to each
-variable. This provides a guarantee that the value bound to a variable is
-bound to no other variable. So when you use a variable in a variable
-reference, you "use it up." Knowing that no one else has a pointer to that
-value means the a system primitive is free to side-effect its arguments to
-produce what is, observationally, a pure-functional result.
-
-In the context of this library, "linear update" means you, the programmer,
-know there are no other live references to the value passed to the
-procedure -- after passing the value to one of these procedures, the
-value of the old pointer is indeterminate. Basically, you are licensing
-the Scheme implementation to alter the data structure if it feels like
-it -- you have declared you don't care either way.
-
-You get no help from Scheme in checking that the values you claim are "linear"
-really are. So you better get it right. Or play it safe and use the non-!
-procedures -- it doesn't do any good to compute quickly if you get the wrong
-answer.
-
-Why go to all this trouble to define the notion of "linear update" and use it
-in a procedure spec, instead of the more common notion of a "destructive"
-operation? First, note that destructive list-processing procedures are almost
-always used in a linear-update fashion. This is in part required by the
-special case of operating upon the empty list, which can't be side-effected.
-This means that destructive operators are not pure side-effects -- they have
-to return a result. Second, note that code written using linear-update
-operators can be trivially ported to a pure, functional subset of Scheme by
-simply providing pure implementations of the linear-update operators. Finally,
-requiring destructive side-effects ruins opportunities to parallelise these
-operations -- and the places where one has taken the trouble to spell out
-destructive operations are usually exactly the code one would want a
-parallelising compiler to parallelise: the efficiency-critical kernels of the
-algorithm. Linear-update operations are easily parallelised. Going with a
-linear-update spec doesn't close off these valuable alternative implementation
-techniques. This list library is intended as a set of low-level, basic
-operators, so we don't want to exclude these possible implementations.
-
-The linear-update procedures in this library are
-
-
-Scheme does not properly have a list type, just as C does not have a string
-type. Rather, Scheme has a binary-tuple type, from which one can build binary
-trees. There is an interpretation of Scheme values that allows one to
-treat these trees as lists. Further complications ensue from the fact that
-Scheme allows side-effects to these tuples, raising the possibility of lists
-of unbounded length, and trees of unbounded depth (that is, circular data
-structures).
-
-
-However, there is a simple view of the world of Scheme values that considers
-every value to be a list of some sort. that is, every value is either
-
-Note that the zero-length dotted lists are simply all the non-null, non-pair
-values.
-
-
-This view is captured by the predicates
-Dotted lists are not fully supported by list-lib. Most procedures are
-defined only on proper lists -- that is, finite, nil-terminated lists. The
-procedures that will also handle circular or dotted lists are specifically
-marked. While this design decision restricts the domain of possible arguments
-one can pass to these procedures, it has the benefit of allowing the
-procedures to catch the error cases where programmers inadvertently pass
-scalar values to a list procedure by accident,
-e.g., by switching the arguments to a procedure call.
-
-
-
-
-Note that statements of the form "it is an error" merely mean "don't
-do that." They are not a guarantee that a conforming implementation will
-"catch" such improper use by, for example, raising some kind of exception.
-Regrettably, R5RS Scheme requires no firmer guarantee even for basic operators such
-as
- When speaking of an error situation, this report uses the phrase "an
- error is signalled" to indicate that implementations must detect and
- report the error. If such wording does not appear in the discussion
- of an error, then implementations are not required to detect or
- report the error, though they are encouraged to do so. An error
- situation that implementations are not required to detect is usually
- referred to simply as "an error."
-
- For example, it is an error for a procedure to be passed an argument
- that the procedure is not explicitly specified to handle, even though
- such domain errors are seldom mentioned in this report.
- Implementations may extend a procedure's domain of definition to
- include such arguments.
-
-The following items are not in this library:
-
-They should have their own SRFI specs.
-
-
-
-
-
-
-In a Scheme system that has a module or package system, these procedures
-should be contained in a module named "list-lib".
-
-The templates given below obey the following conventions for procedure formals:
-
-It is an error to pass a circular or dotted list to a procedure not
-defined to accept such an argument.
-
-
-
-
-
-Note: the predicates
- More carefully: The empty list is a proper list. A pair whose cdr is a
- proper list is also a proper list:
-
- Nil-terminated lists are called "proper" lists by R5RS and Common Lisp.
- The opposite of proper is improper.
-
- R5RS binds this function to the variable
-
- Terminology: The opposite of circular is finite.
-
- In the n-ary case,
- every listi is compared to
- listi+1
- (as opposed, for example, to comparing
- list1 to every listi,
- for i>1).
- If there are no list arguments at all,
-
- It is an error to apply
- Note that the dynamic order in which the elt= procedure is
- applied to pairs of elements is not specified.
- For example, if
- The equality procedure must be consistent with
- flist may be any finite list, either proper or dotted:
-
- If x is circular,
-
- The length of a proper list is a non-negative integer n such that
- Note that some Scheme implementations do not support passing more than a
- certain number (e.g., 64) of arguments to an n-ary procedure.
- In these implementations, the
- As with
-
- First, consider the single list-parameter case. If clist1 = (e1 e2 ... en),
- then this procedure returns
-
- First, consider the single list-parameter case. If clist1 =
- Example:
-
- ridentity should be a "right identity" of the procedure f -- that is,
- for any value x acceptable to f,
-
- Note that ridentity is used only in the empty-list case.
- You typically use
- Note: MIT Scheme and Haskell flip F's arg order for their
- In other words, we use g to generate a sequence of seed values
-
-
- In other words, we use g to generate a sequence of seed values
-
-
- At least one of the argument lists must be finite:
-
- At least one of the argument lists must be finite.
-
-
-
- The dynamic order in which the various applications of f are made is
- not specified.
-
- Example:
-
- The dynamic order in which the various applications of f are made is
- not specified.
-
- In the n-ary case, clist2, clist3, ... must have at least as many
- elements as list1.
-
-
-
- At least one of the list arguments must be finite.
-
-
-
- The f procedure may reliably apply
- At least one of the list arguments must be finite.
-
-
-The following procedures all search lists for a leftmost element satisfying
-some criteria. This means they do not always examine the entire list; thus,
-there is no efficient way for them to reliably detect and signal an error when
-passed a dotted or circular list. Here are the general rules describing how
-these procedures work when applied to different kinds of lists:
-
-
- In brief, SRFI-1 compliant code may not pass a dotted
- list argument to these procedures.
-
-
-Here are some examples, using the
-
- Examples:
-
-
-
-In other words:
-
-
-
- If there are n list arguments clist1 ... clistn, then pred must be a
- procedure taking n arguments and returning a boolean result.
-
-
- Note the difference between
- Like
- If there are n list arguments clist1 ... clistn, then pred must be a
- procedure taking n arguments and returning a boolean result.
-
-
- If one of the clisti has no elements,
- Like
- If there are n list arguments clist1 ... clistn, then pred must be a
- function taking n arguments and returning a boolean result.
-
-
- The iteration stops when one of the lists runs out of values; in this
- case,
- The comparison procedure is used to compare the elements ei of list
- to the key x in this way:
-
- Note that fully general list searching may be performed with
- the
-
-
- The list is not disordered -- elements that appear in the result list
- occur in the same order as they occur in the argument list.
- The result may share a common tail with the argument list.
-
-
- Note that fully general element deletion can be performed with the
-
- The = parameter is used to compare the elements of the list; it defaults
- to
- Implementations of
- Be aware that, in general,
-
-An "association list" (or "alist") is a list of pairs. The car of each pair
-contains a key value, and the cdr contains the associated data value. They can
-be used to construct simple look-up tables in Scheme. Note that association
-lists are probably inappropriate for performance-critical use on large data;
-in these cases, hash tables or some other alternative should be employed.
-
-
- The comparison procedure is used to compare the elements ei of list
- to the key parameter in this way:
-
- Note that fully general alist searching may be performed with
- the
- Return values may share common tails with the alist argument.
- The alist is not disordered -- elements that appear in the result alist
- occur in the same order as they occur in the argument alist.
-
- The comparison procedure is used to compare the element keys ki of alist's
- entries to the key parameter in this way:
-
-
-These procedures implement operations on sets represented as lists of elements.
-They all take an = argument used to compare elements of lists.
-This equality procedure is required to be consistent with
-Note that this implies, in turn, that two lists that are
-Be aware that these procedures typically run in time
-O(n * m)
-for n- and m-element list arguments.
-Performance-critical applications
-operating upon large sets will probably wish to use other data
-structures and algorithms.
-
-
- The list parameter is always a suffix of the result -- even if the list
- parameter contains repeated elements, these are not reduced.
-
- The union of lists A and B is constructed as follows:
-
- In the n-ary case, the two-argument list-union operation is simply
- folded across the argument lists.
-
-
- The intersection of lists A and B
- is comprised of every element of A that is =
- to some element of B:
-
- The order in which elements appear in the result is the same as
- they appear in list1 --
- that is,
- In the n-ary case, the two-argument list-intersection operation is simply
- folded across the argument lists. However, the dynamic order in which the
- applications of = are made is not specified.
- The procedure may check an
- element of list1 for membership
- in every other list before proceeding to
- consider the next element of list1,
- or it may completely intersect list1
- and list2
- before proceeding to list3,
- or it may go about its work in some third order.
-
-
- The = procedure's first argument is
- always an element of list1;
- its second is an element of one of the other listi.
- Elements that are repeated multiple times in the
- list1 parameter
- will occur multiple times in the result.
-
- The order in which elements appear in the result is the same as
- they appear in list1 --
- that is,
- More precisely, for two lists A and B,
- A xor B is a list of
-
- In the n-ary case, the binary-xor operation is simply folded across
- the lists.
-
-
- The = procedure's first argument is an element of list1; its second
- is an element of one of the other listi.
-
- Either of the answer lists may share a
- common tail with list1.
- This operation essentially partitions list1.
-
-
-
-These two procedures are the primitive,
-R5RS
-side-effect operations on pairs.
-
-
-The design of this library benefited greatly from the feedback provided during
-the SRFI discussion phase. Among those contributing thoughtful commentary and
-suggestions, both on the mailing list and by private discussion, were Mike
-Ashley, Darius Bacon, Alan Bawden, Phil Bewig, Jim Blandy, Dan Bornstein, Per
-Bothner, Anthony Carrico, Doug Currie, Kent Dybvig, Sergei Egorov, Doug Evans,
-Marc Feeley, Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Dan Friedman,
-Lars Thomas Hansen, Brian Harvey, Erik Hilsdale, Wolfgang Hukriede, Richard
-Kelsey, Donovan Kolbly, Shriram Krishnamurthi, Dave Mason, Jussi Piitulainen,
-David Pokorny, Duncan Smith, Mike Sperber, Maciej Stachowiak, Harvey J. Stein,
-John David Stone, and Joerg F. Wittenberger. I am grateful to them for their
-assistance.
-
-I am also grateful the authors, implementors and documentors of all the systems
-mentioned in the rationale. Aubrey Jaffer and Kent Pitman should be noted
-for their work in producing Web-accessible versions of the R5RS and
-Common Lisp spec, which was a tremendous aid.
-
-This is not to imply that these individuals necessarily endorse the final
-results, of course.
-
-
-
-
-
-
-
-
-
-The Common Lisp "HyperSpec," produced by Kent Pitman, is essentially
-the ANSI spec for Common Lisp:
-
-http://www.harlequin.com/education/books/HyperSpec/.
-
-
-
-Certain portions of this document -- the specific, marked segments of text
-describing the R5RS procedures -- were adapted with permission from the R5RS
-report.
-
-
-All other text is copyright (C) Olin Shivers (1998, 1999).
-All Rights Reserved.
-
-This document and translations of it may be copied and furnished to
-others, and derivative works that comment on or otherwise explain it
-or assist in its implementation may be prepared, copied, published and
-distributed, in whole or in part, without restriction of any kind,
-provided that the above copyright notice and this paragraph are
-included on all such copies and derivative works. However, this
-document itself may not be modified in any way, such as by removing
-the copyright notice or references to the Scheme Request For
-Implementation process or editors, except as needed for the purpose of
-developing SRFIs in which case the procedures for copyrights defined
-in the SRFI process must be followed, or as required to translate it
-into languages other than English.
-
-The limited permissions granted above are perpetual and will not be
-revoked by the authors or their successors or assigns.
-
-This document and the information contained herein is provided on an
-"AS IS" basis and THE AUTHOR AND THE SRFI EDITORS DISCLAIM ALL
-WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY
-WARRANTY THAT THE USE OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY
-RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
-PARTICULAR PURPOSE.
-
-
-
diff --git a/scsh/lib/srfi-1.txt b/scsh/lib/srfi-1.txt
deleted file mode 100644
index bdff052..0000000
--- a/scsh/lib/srfi-1.txt
+++ /dev/null
@@ -1,2015 +0,0 @@
-The SRFI-1 list library -*- outline -*-
-Olin Shivers
-98/10/16
-Last Update: 99/10/3
-
-Emacs should display this document in outline mode. Say c-h m for
-instructions on how to move through it by sections (e.g., c-c c-n, c-c c-p).
-During the SRFI discussion period, the current draft may be found at
- ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt
-
-
-* Table of contents
--------------------
-
-Abstract
-Introduction
-Procedure index
-General discussion
- "Linear update" procedures
- Improper lists
- Errors
- Not included in this library
-The procedures
- Constructors
- Predicates
- Selectors
- Miscellaneous: length, append, reverse, zip & count
- Fold, unfold & map
- Filtering & partitioning
- Searching
- Deletion
- Association lists
- Set operations on lists
- Primitive side-effects
-Acknowledgements
-References & links
-Copyright
-
-
-* Abstract
-----------
-
-R5RS Scheme has an impoverished set of list-processing utilities, which is a
-problem for authors of portable code. This SRFI proposes a coherent and
-comprehensive set of list-processing procedures; it is accompanied by a
-reference implementation of the spec. The reference implementation is
- - portable
- - efficient
- - completely open, public-domain source
-
-
-* Introduction
---------------
-
-The set of basic list and pair operations provided by R4RS/R5RS Scheme is far
-from satisfactory. Because this set is so small and basic, most
-implementations provide additional utilities, such as a list-filtering
-function, or a "left fold" operator, and so forth. But, of course, this
-introduces incompatibilities -- different Scheme implementations provide
-different sets of procedures.
-
-I have designed a full-featured library of procedures for list processing.
-While putting this library together, I checked as many Schemes as I could get
-my hands on. (I have a fair amount of experience with several of these
-already.) I missed Chez -- no on-line manual that I can find -- but I hit most
-of the other big, full-featured Schemes. The complete list of list-processing
-systems I checked is:
- R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common
- Lisp, Bigloo, guile, T, APL and the SML standard basis
-As a result, the library I am proposing is fairly rich.
-
-Following this initial design phase, this library went through several
-months of discussion on the SRFI mailing lists, and was altered in light
-of the ideas and suggestions put forth during this discussion.
-
-In parallel with designing this API, I have also written a reference
-implementation. I have placed this source on the Net with an unencumbered,
-"open" copyright. A few notes about the reference implementation:
-
- - Although I got procedure names and specs from many Schemes, I wrote this
- code myself. Thus, there are *no* entanglements. Any Scheme implementor
- can pick this library up with no worries about copyright problems -- both
- commercial and non-commercial systems.
-
- - The code is written for portability and should be trivial to port to
- any Scheme. It has only four deviations from R4RS, clearly discussed
- in the comments:
- - Use of an ERROR procedure;
- - Use of the R5RS VALUES and a simple RECEIVE macro for producing
- and consuming multiple return values;
- - Use of simple :OPTIONAL and LET-OPTIONALS macros for optional
- argument parsing and defaulting;
- - Use of a simple CHECK-ARG procedure for argument checking.
-
- - It is written for clarity and well-commented. The current source is
- 768 lines of source code and 826 lines of comments and white space.
-
- - It is written for efficiency. Fast paths are provided for common
- cases. Side-effecting procedures such as FILTER! avoid unnecessary,
- redundant SET-CDR!s which would thrash a generational GC's write barrier
- and the store buffers of fast processors. Functions reuse longest common
- tails from input parameters to construct their results where
- possible. Constant-space iterations are used in preference to recursions;
- local recursions are used in preference to consing temporary intermediate
- data structures.
-
- This is not to say that the implementation can't be tuned up for
- a specific Scheme implementation. There are notes in comments addressing
- ways implementors can tune the reference implementation for performance.
-
-In short, I've written the reference implementation to make it as painless
-as possible for an implementor -- or a regular programmer -- to adopt this
-library and get good results with it.
-
-
-
-* Procedure index
------------------
-Here is a short list of the procedures provided by the list-lib package.
-"#" marks R5RS procedures; "+" marks extended R5RS procedures
-
-Constructors
-# cons list
- xcons cons* make-list list-tabulate
- list-copy circular-list iota
-
-Predicates
-# pair? null?
- proper-list? circular-list? dotted-list?
- not-pair? null-list?
- list=
-
-Selectors
-# car cdr ... cdddar cddddr list-ref
- first second third fourth fifth sixth seventh eighth ninth tenth
- car+cdr
- take drop
- take-right drop-right
- take! drop-right!
- split-at split-at!
- last last-pair
-
-Miscellaneous: length, append, concatenate, reverse, zip & count
-# length
- length+
-# append reverse
- append! reverse!
- concatenate concatenate!
- append-reverse append-reverse!
- zip unzip1 unzip2 unzip3 unzip4 unzip5
- count
-
-Fold, unfold & map
-+ map for-each
- fold unfold pair-fold reduce
- fold-right unfold-right pair-fold-right reduce-right
- append-map append-map!
- map! pair-for-each filter-map map-in-order
-
-Filtering & partitioning
- filter partition remove
- filter! partition! remove!
-
-Searching
-+ member
-# memq memv
- find
- any every
- list-index
- take-while drop-while take-while!
- span break span! break!
-
-Deleting
- delete delete-duplicates
- delete! delete-duplicates!
-
-Association lists
-+ assoc
-# assq assv
- alist-cons alist-copy
- alist-delete alist-delete!
-
-Set operations on lists
- lset<= lset= lset-adjoin
- lset-union lset-union!
- lset-intersection lset-intersection!
- lset-difference lset-difference!
- lset-xor lset-xor!
- lset-diff+intersection lset-diff+intersection!
-
-Primitive side effects
-# set-car! set-cdr!
-
-------
-Four R4RS/R5RS list-processing procedures are extended by this library in
-backwards-compatible ways:
- map for-each (Extended to take lists of unequal length)
- member assoc (Extended to take an optional comparison procedure)
-
-The following R4RS/R5RS list- and pair-processing procedures are also part of
-list-lib's exports, as defined by the R5RS report:
- cons pair? null? list length append reverse
- car cdr ... cdddar cddddr set-car! set-cdr! list-ref
- memq memv assq assv
-
-The remaining two R4RS/R5RS list-processing procedures are *not* part of
-this library:
- list-tail (renamed DROP)
- list? (see PROPER-LIST?, CIRCULAR-LIST? and DOTTED-LIST?)
-
-
-
-* General discussion
---------------------
-
-A set of general criteria guided the design of this library.
-
-I don't require "destructive" (what I call "linear update") procedures to
-alter and recycle cons cells from the argument lists. They are allowed to, but
-not required to. (The reference implementations I have written *do* recycle
-the argument lists.) See below for further discussion.
-
-List-filtering procedures such as FILTER or DELETE do not disorder
-lists. Elements appear in the answer list in the same order as they appear in
-the argument list. This constrains implementation, but seems like a desirable
-feature, since in many uses of lists, order matters. (In particular,
-disordering an alist is definitely a bad idea.)
-
-Contrariwise, although the reference implementations of the list-filtering
-procedures share longest common tails between argument and answer lists,
-it not is part of the spec.
-
-Because lists are an inherently sequential data structure (unlike, say,
-vectors), list-inspection functions such as FIND, FIND-TAIL, FOR-EACH, ANY
-and EVERY commit to a left-to-right traversal order of their argument list.
-
-However, constructor functions, such as LIST-TABULATE and the mapping
-procedures (APPEND-MAP, APPEND-MAP!, MAP!, PAIR-FOR-EACH, FILTER-MAP,
-MAP-IN-ORDER) do *not* specify the dynamic order in which their
-procedural argument is applied to its various values.
-
-Predicates return useful true values wherever possible. Thus ANY must return
-the true value produced by its predicate, and EVERY returns the final true
-value produced by applying its predicate argument to the last element of its
-argument list.
-
-Functionality is provided both in pure and linear-update (potentially
-destructive) forms wherever this makes sense.
-
-No special status accorded Scheme's built-in equality functions.
-Any functionality provided in terms of EQ?, EQV?, EQUAL? is also
-available using a client-provided equality function.
-
-Proper design counts for more than backwards compatibility, but I have tried,
-ceteris paribus, to be as backwards-compatible as possible with existing
-list-processing libraries, in order to facilitate porting old code to run as a
-client of the procedures in this library. Name choices and semantics are, for
-the most part, in agreement with existing practice in many current Scheme
-systems. I have indicated some incompatibilities in the following text.
-
-These procedures are *not* "sequence generic" -- i.e., procedures that
-operate on either vectors and lists. They are list-specific. I prefer to
-keep the library simple and focussed.
-
-I have named these procedures without a qualifying initial "list-"
-lexeme, which is in keeping with the existing set of list-processing
-utilities in Scheme. I follow the general Scheme convention
-(VECTOR-LENGTH, STRING-REF) of placing the type-name before the action
-when naming procedures -- so we have LIST-COPY and PAIR-FOR-EACH rather
-than the perhaps more fluid, but less consistent, COPY-LIST, or
-FOR-EACH-PAIR.
-
-I have generally followed a regular and consistent naming scheme, composing
-procedure names from a set of basic lexemes.
-
-
-** "Linear update" procedures
-=============================
-
-Many procedures in this library have "pure" and "linear update" variants. A
-"pure" procedure has no side-effects, and in particular does not alter its
-arguments in any way. A "linear update" procedure is allowed -- but *not*
-required -- to side-effect its arguments in order to construct its
-result. "Linear update" procedures are typically given names ending with an
-exclamation point. So, for example, (APPEND! list1 list2) is allowed to
-construct its result by simply using SET-CDR! to set the cdr of the last pair
-of list1 to point to list2, and then returning list1 (unless list1 is the
-empty list, in which case it would simply return list2). However, APPEND! may
-also elect to perform a pure append operation -- this is a legal definition
-of APPEND!:
- (define append! append)
-This is why we do not call these procedures "destructive" -- because they
-aren't *required* to be destructive. They are *potentially* destructive.
-
-What this means is that you may only apply linear-update procedures to
-values that you know are "dead" -- values that will never be used again
-in your program. This must be so, since you can't rely on the value passed
-to a linear-update procedure after that procedure has been called. It
-might be unchanged; it might be altered.
-
-The "linear" in "linear update" doesn't mean "linear time" or "linear space"
-or any sort of multiple-of-n kind of meaning. It's a fancy term that type
-theorists and pure functional programmers use to describe systems where you
-are only allowed to have exactly one reference to each variable. This provides
-a guarantee that the value bound to a variable is bound to no other
-variable. So when you *use* a variable in a variable reference, you "use it
-up." Knowing that no one else has a pointer to that value means the system
-primitive is free to side-effect its arguments to produce what is,
-observationally, a pure-functional result.
-
-In the context of this library, "linear update" means you, the programmer,
-know there are *no other* live references to the value passed to the
-procedure -- after passing the value to one of these procedures, the
-value of the old pointer is indeterminate. Basically, you are licensing
-the Scheme implementation to alter the data structure if it feels like
-it -- you have declared you don't care either way.
-
-You get no help from Scheme in checking that the values you claim are "linear"
-really are. So you better get it right. Or play it safe and use the non-!
-procedures -- it doesn't do any good to compute quickly if you get the wrong
-answer.
-
-Why go to all this trouble to define the notion of "linear update" and use it
-in a procedure spec, instead of the more common notion of a "destructive"
-operation? First, note that destructive list-processing procedures are almost
-always used in a linear-update fashion. This is in part required by the
-special case of operating upon the empty list, which can't be side-effected.
-This means that destructive operators are not pure side-effects -- they have
-to return a result. Second, note that code written using linear-update
-operators can be trivially ported to a pure, functional subset of Scheme by
-simply providing pure implementations of the linear-update operators. Finally,
-requiring destructive side-effects ruins opportunities to parallelise these
-operations -- and the places where one has taken the trouble to spell out
-destructive operations are usually exactly the code one would want a
-parallelising compiler to parallelise: the efficiency-critical kernels of the
-algorithm. Linear-update operations are easily parallelised. Going with a
-linear-update spec doesn't close off these valuable alternative implementation
-techniques. This list library is intended as a set of low-level, basic
-operators, so we don't want to exclude these possible implementations.
-
-The linear-update procedures in this library are
- take! drop-right!
- append! reverse! append-reverse!
- append-map! map!
- filter! partition! remove!
- delete! alist-delete! delete-duplicates!
- lset-adjoin! lset-union! lset-intersection! lset-difference! lset-xor!
- lset-diff+intersection!
-
-
-** Improper lists
-=================
-
-Scheme does not properly have a list type, just as C does not have a string
-type. Rather, Scheme has a binary-tuple type, from which one can build binary
-trees. There is an *interpretation* of Scheme values that allows one to treat
-these trees as lists. Further complications ensue from the fact that Scheme
-allows side-effects to these tuples, raising the possibility of lists of
-unbounded length, and trees of unbounded depth (that is, circular data
-structures).
-
-However, there is a simple view of the world of Scheme values that considers
-every value to be a list of some sort. That is, every value is either
- - a "proper list" -- a finite, nil-terminated list, such as:
- (a b c)
- ()
- (32)
- - a "dotted list" -- a finite, non-nil terminated list, such as
- (a b c . d)
- (x . y)
- 42
- george
- - or a "circular list" -- an infinite, unterminated list.
-Note that the zero-length dotted lists are simply all the non-null, non-pair
-values.
-
-This view is captured by the predicates PROPER-LIST?, DOTTED-LIST?, and
-CIRCULAR-LIST?. List-lib users should note that dotted lists are not commonly
-used, and are considered by many Scheme programmers to be an ugly artifact of
-Scheme's lack of a true list type. However, dotted lists do play a noticeable
-role in the *syntax* of Scheme, in the "rest" parameters used by n-ary
-lambdas: (lambda (x y . rest) ...).
-
-Dotted lists are *not* fully supported by list-lib. Most procedures are
-defined only on proper lists -- that is, finite, nil-terminated lists. The
-procedures that will also handle circular or dotted lists are specifically
-marked. While this design decision restricts the domain of possible arguments
-one can pass to these procedures, it has the benefit of allowing the
-procedures to catch the error cases where programmers inadvertently pass
-scalar values to a list procedure by accident, e.g. by switching the arguments
-to a procedure call.
-
-
-** Errors
-=========
-
-Note that statements of the form "it is an error" merely mean "don't
-do that." They are not a guarantee that a conforming implementation will
-"catch" such improper use by, for example, raising some kind of exception.
-Regrettably, R5RS Scheme requires no firmer guarantee even for basic operators
-such as CAR and CDR, so there's little point in requiring these procedures to
-do more. Here is the relevant section of the R5RS report:
-
- When speaking of an error situation, this report uses the phrase "an
- error is signalled" to indicate that implementations must detect and
- report the error. If such wording does not appear in the discussion
- of an error, then implementations are not required to detect or
- report the error, though they are encouraged to do so. An error
- situation that implementations are not required to detect is usually
- referred to simply as "an error."
-
- For example, it is an error for a procedure to be passed an argument
- that the procedure is not explicitly specified to handle, even though
- such domain errors are seldom mentioned in this report.
- Implementations may extend a procedure's domain of definition to
- include such arguments.
-
-
-** Not included in this library
-===============================
-
-The following items are not in this library:
-- Sort routines
-- Destructuring/pattern-matching macro
-- Tree-processing routines
-They should have their own SRFI specs.
-
-
-
-* The procedures
-----------------
-In a Scheme system that has a module or package system, these procedures
-should be contained in a module named "list-lib".
-
-The templates given below obey the following conventions for procedure formals:
- list A proper (finite, nil-terminated) list
- clist A proper or circular list
- flist A finite (proper or dotted) list
- pair A pair
- x, y, d, a Any value
- object, value Any value
- n, i A natural number (an integer >= 0)
- proc A procedure
- pred A procedure whose return value is treated as a boolean
- = A boolean procedure taking two arguments
-
-It is an error to pass a circular or dotted list to a procedure not
-defined to accept such an argument.
-
-** Constructors
-===============
-
-cons a d -> pair R5RS
- The primitive constructor. Returns a newly allocated pair whose car is A
- and whose cdr is D. The pair is guaranteed to be different (in the sense
- of EQV?) from every existing object.
-
- (cons 'a '()) ==> (a)
- (cons '(a) '(b c d)) ==> ((a) b c d)
- (cons "a" '(b c)) ==> ("a" b c)
- (cons 'a 3) ==> (a . 3)
- (cons '(a b) 'c) ==> ((a b) . c)
-
-list object ... -> list R5RS
- Returns a newly allocated list of its arguments.
-
- (list 'a (+ 3 4) 'c) ==> (a 7 c)
- (list) ==> ()
-
-xcons d a -> pair
- (lambda (d a) (cons a d))
- Of utility only as a value to be conveniently passed to higher-order
- procedures.
-
- (xcons '(b c) 'a) => (a b c)
-
- The name stands for "eXchanged CONS."
-
-cons* elt1 elt2 ... -> object
- Like LIST, but the last argument provides the tail of the constructed
- list, returning (cons elt1 (cons elt2 (cons ... eltn))).
- This function is called LIST* in Common Lisp and about half of the
- Schemes that provide it; and CONS* in the other half.
-
- (cons* 1 2 3 4) => (1 2 3 . 4)
- (cons* 1) => 1
-
-make-list n [fill] -> list
- Returns an N-element list, whose elements are all the value FILL.
- If the FILL argument is not given, the elements of the list may
- be arbitrary values.
-
- (make-list 4 'c) => (c c c c)
- (make-list 10) => (2 3 5 7 11 13 17 19 23 29)
-
-list-tabulate n init-proc -> list
- Returns an N-element list. Element i of the list, where 0 <= i < N,
- is produced by (INIT-PROC i). No guarantee is made about the dynamic
- order in which INIT-PROC is applied to these indices.
-
- (list-tabulate 4 values) => (0 1 2 3)
-
-list-copy flist -> flist
- Copies the "spine" of the argument.
-
-circular-list elt1 elt2 ... -> clist
- Constructs a circular list of the elements.
- (circular-list 'z 'q) => (z q z q z q ...)
-
-iota count [start step] -> list
- Returns a list containing the elements
- (start start+step ... start+(count-1)*step)
- The START and STEP parameters default to 0 and 1, respectively.
- This procedure takes its name from the APL primitive.
-
- (iota 5) => (0 1 2 3 4)
- (iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4)
-
-
-** Predicates
-=============
-
-Note: the predicates PROPER-LIST?, CIRCULAR-LIST?, and DOTTED-LIST?
-partition the entire universe of Scheme values.
-
-proper-list? x -> boolean
- Returns true iff X is a proper list -- a finite, nil-terminated list.
-
- More carefully: The empty list is a proper list. A pair whose cdr is a
- proper list is also a proper list:
- Title
-Author
-Status
-
-
-
-
-Table of contents
-
-
-
-
-
-
-
-Abstract
-
-
-
-
-Rationale
-
-
-
-
-
-error
procedure;
- values
and a simple receive
macro for producing
- and consuming multiple return values;
- :optional
and let-optionals
macros for optional
- argument parsing and defaulting;
- check-arg
procedure for argument checking.
- filter!
avoid unnecessary,
- redundant set-cdr!
s which would thrash a generational GC's write barrier
- and the store buffers of fast processors. Functions reuse longest common
- tails from input parameters to construct their results where
- possible. Constant-space iterations are used in preference to recursions;
- local recursions are used in preference to consing temporary intermediate
- data structures.
-Procedure Index
-
-
-
-cons list
-xcons cons* make-list list-tabulate
-list-copy circular-list iota
-
-
-
-pair? null?
-proper-list? circular-list? dotted-list?
-not-pair? null-list?
-list=
-
-
-
-car cdr ... cddadr cddddr list-ref
-first second third fourth fifth sixth seventh eighth ninth tenth
-car+cdr
-take drop
-take-right drop-right
-take! drop-right!
-split-at split-at!
-last last-pair
-
-
-
-length length+
-append concatenate reverse
-append! concatenate! reverse!
-append-reverse append-reverse!
-zip unzip1 unzip2 unzip3 unzip4 unzip5
-count
-
-
-
-map for-each
-fold unfold pair-fold reduce
-fold-right unfold-right pair-fold-right reduce-right
-append-map append-map!
-map! pair-for-each filter-map map-in-order
-
-
-
-filter partition remove
-filter! partition! remove!
-
-
-
-member memq memv
-find find-tail
-any every
-list-index
-take-while drop-while take-while!
-span break span! break!
-
-
-
-delete delete-duplicates
-delete! delete-duplicates!
-
-
-
-assoc assq assv
-alist-cons alist-copy
-alist-delete alist-delete!
-
-
-
-lset<= lset= lset-adjoin
-lset-union lset-union!
-lset-intersection lset-intersection!
-lset-difference lset-difference!
-lset-xor lset-xor!
-lset-diff+intersection lset-diff+intersection!
-
-
-
-set-car! set-cdr!
-
-
-
-map for-each
- (Extended to take lists of unequal length)
- member assoc
- (Extended to take an optional comparison procedure.)
-
-cons pair? null?
-car cdr ... cdddar cddddr
-set-car! set-cdr!
-list append reverse
-length list-ref
-memq memv assq assv
-
-
-
-list-tail
- (renamed drop
)
-list?
- (see proper-list?
,
- circular-list?
and
- dotted-list?
)
-General discussion
-filter
or delete
do not disorder
-lists. Elements appear in the answer list in the same order as they appear in
-the argument list. This constrains implementation, but seems like a desirable
-feature, since in many uses of lists, order matters. (In particular,
-disordering an alist is definitely a bad idea.)
-find
, find-tail
, for-each
, any
-and every
commit to a left-to-right traversal order of their argument list.
-
and the mapping
-procedures (list-tabulate
append-map
, append-map!
, map!
, pair-for-each
, filter-map
,
-map-in-order
), do not specify the dynamic order in which their procedural
-argument is applied to its various values.
-any
must return
-the true value produced by its predicate, and every
returns the final true
-value produced by applying its predicate argument to the last element of its
-argument list.
-eq?
, eqv?
, equal?
is also
-available using a client-provided equality function.
-list-copy
and pair-for-each
rather than the perhaps
-more fluid, but less consistent, copy-list
or for-each-pair
.
-"Linear update" procedures
-(append! list1 list2)
is allowed to
-construct its result by simply using set-cdr!
to set the cdr of the last pair
-of list1 to point to list2, and then returning list1 (unless list1 is the
-empty list, in which case it would simply return list2). However, append!
may
-also elect to perform a pure append operation -- this is a legal definition
-of append!
:
-
-(define append! append)
-
-
-take! drop-right! split-at!
-append! concatenate! reverse! append-reverse!
-append-map! map!
-filter! partition! remove!
-take-while! span! break!
-delete! alist-delete! delete-duplicates!
-lset-adjoin! lset-union! lset-intersection!
-lset-difference! lset-xor! lset-diff+intersection!
-
Improper Lists
-
-
-
- (a b c)
- ()
- (32)
-
- (a b c . d)
- (x . y)
- 42
- george
- proper-list?
, dotted-list?
, and
-circular-list?
. List-lib users should note that dotted lists are not commonly
-used, and are considered by many Scheme programmers to be an ugly artifact of
-Scheme's lack of a true list type. However, dotted lists do play a noticeable
-role in the syntax of Scheme, in the "rest" parameters used by n-ary
-lambdas: (lambda (x y . rest) ...)
.
-
-Errors
-car
and cdr
, so there's little point in requiring these procedures to do
-more. Here is the relevant section of the R5RS:
-
-
-
-
-
-Not included in this library
-
-
-The procedures
-
-
-
- list
- A proper (finite, nil-terminated) list
- clist
- A proper or circular list
- flist
- A finite (proper or dotted) list
- pair
- A pair
-
- x, y, d, a
- Any value
- object, value
- Any value
- n, i
- A natural number (an integer >= 0)
- proc
- A procedure
- pred
- A procedure whose return value is treated as a boolean
- =
- A boolean procedure taking two arguments
- Constructors
-
-
-
-
-
-
-cons
a d -> pair
-eqv?
)
- from every existing object.
-
-(cons 'a '()) => (a)
-(cons '(a) '(b c d)) => ((a) b c d)
-(cons "a" '(b c)) => ("a" b c)
-(cons 'a 3) => (a . 3)
-(cons '(a b) 'c) => ((a b) . c)
-
-
-
-list
object ... -> list
-
-(list 'a (+ 3 4) 'c) => (a 7 c)
-(list) => ()
-
-
-
-xcons
d a -> pair
-
-(lambda (d a) (cons a d))
-
- Of utility only as a value to be conveniently passed to higher-order
- procedures.
-
-
-(xcons '(b c) 'a) => (a b c)
-
-
- The name stands for "eXchanged CONS."
-
-
-
-cons*
elt1 elt2 ... -> object
-list
,
- but the last argument provides the tail of the constructed list,
- returning
-
-(cons elt1 (cons elt2 (cons ... eltn)))
-
list*
in Common Lisp and about
- half of the Schemes that provide it,
- and cons*
in the other half.
-
-(cons* 1 2 3 4) => (1 2 3 . 4)
-(cons* 1) => 1
-
-
-
-
-make-list
n [fill] -> list
-
-(make-list 4 'c) => (c c c c)
-
-
-
-
-list-tabulate
n init-proc -> list
-(init-proc i)
. No guarantee is made about the dynamic
- order in which init-proc is applied to these indices.
-
-
-(list-tabulate 4 values) => (0 1 2 3)
-
-
-
-
-list-copy
flist -> flist
-circular-list
elt1 elt2 ... -> list
-
-(circular-list 'z 'q) => (z q z q z q ...)
-
-
-
-
-iota
count [start step] -> list
-
-(start start+step ... start+(count-1)*step)
-
- The start and step parameters default to 0 and 1, respectively.
- This procedure takes its name from the APL primitive.
-
-
-(iota 5) => (0 1 2 3 4)
-(iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4)
-
-Predicates
-proper-list?
, circular-list?
, and dotted-list?
-partition the entire universe of Scheme values.
-
-
-
-
-
-
-
-proper-list?
x -> boolean
-
-
-<proper-list> ::= () (Empty proper list)
- | (cons <x> <proper-list>) (Proper-list pair)
-
- Note that this definition rules out circular lists. This
- function is required to detect this case and return false.
-list?
.
-
-(not (proper-list? x)) = (or (dotted-list? x) (circular-list? x))
-
-
-
-
-circular-list?
x -> boolean
-
-(not (circular-list? x)) = (or (proper-list? x) (dotted-list? x))
-
-
-
-
-dotted-list?
x -> boolean
-
-(not (dotted-list? x)) = (or (proper-list? x) (circular-list? x))
-
-
-
-
-pair?
object -> boolean
-
-(pair? '(a . b)) => #t
-(pair? '(a b c)) => #t
-(pair? '()) => #f
-(pair? '#(a b)) => #f
-(pair? 7) => #f
-(pair? 'a) => #f
-
-
-
-
-null?
object -> boolean
-null-list?
list -> boolean
-not-pair?
x -> boolean
-(lambda (x) (not (pair? x)))
- Provided as a procedure as it can be useful as the termination condition
- for list-processing procedures that wish to handle all finite lists,
- both proper and dotted.
-
-
-list=
elt= list1 ... -> boolean
-(elt= a b)
- for a an element of list A,
- and b an element of list B.
-list=
simply returns true.
-list=
to anything except proper lists.
- While
- implementations may choose to extend it to circular lists, note that it
- cannot reasonably be extended to dotted lists, as it provides no way to
- specify an equality procedure for comparing the list terminators.
-list=
is applied
- to three lists, A, B, and C,
- it may first completely compare A to B,
- then compare B to C,
- or it may compare the first elements of A and B,
- then the first elements of B and C,
- then the second elements of A and B, and so forth.
-eq?
.
- That is, it must be the case that
-(eq? x y)
=> (elt= x y)
.
-eq?
- are always list=, as well; implementations may exploit this
- fact to "short-cut" the element-by-element comparisons.
-
-(list= eq?) => #t ; Trivial cases
-(list= eq? '(a)) => #t
-
-
-Selectors
-
-
-
-
-
-
-car
pair -> value
-cdr
pair -> value
-
-(car '(a b c)) => a (cdr '(a b c)) => (b c)
-(car '((a) b c d)) => (a) (cdr '((a) b c d)) => (b c d)
-(car '(1 . 2)) => 1 (cdr '(1 . 2)) => 2
-(car '()) => *error* (cdr '()) => *error*
-
-
-
-
-
-
-
-
-
-caar
pair -> value
-cadr
pair -> value
-:
-cdddar
pair -> value
-cddddr
pair -> value
-car
and cdr
,
- where for example caddr
could be defined by
-
-(define caddr (lambda (x) (car (cdr (cdr x))))).
-
- Arbitrary compositions, up to four deep, are provided. There are
- twenty-eight of these procedures in all.
-
-
-
-list-ref
clist i -> value
-(drop clist i)
.)
- It is an error if i >= n,
- where n is the length of clist.
-
-(list-ref '(a b c d) 2) => c
-
-
-
-first
pair -> object
-second
pair -> object
-third
pair -> object
-fourth
pair -> object
-fifth
pair -> object
-sixth
pair -> object
-seventh
pair -> object
-eighth
pair -> object
-ninth
pair -> object
-tenth
pair -> object
-car
, cadr
, caddr
, ...
-
-
-(third '(a b c d e)) => c
-
-
-
-car+cdr
pair -> [x y]
-
-(lambda (p) (values (car p) (cdr p)))
-
- This can, of course, be implemented more efficiently by a compiler.
-
-
-take
x i -> list
-drop
x i -> object
-take
returns the first i elements of list x.
- drop
returns all but the first i elements of list x.
-
-(take '(a b c d e) 2) => (a b)
-(drop '(a b c d e) 2) => (c d e)
-
- x may be any value -- a proper, circular, or dotted list:
-
-(take '(1 2 3 . d) 2) => (1 2)
-(drop '(1 2 3 . d) 2) => (3 . d)
-(take '(1 2 3 . d) 3) => (1 2 3)
-(drop '(1 2 3 . d) 3) => d
-
- For a legal i, take
and drop
partition the list in a manner which
- can be inverted with append
:
-
-(append (take x i) (drop x i)) = x
-
- drop
is exactly equivalent to performing i cdr operations on x;
- the returned value shares a common tail with x.
-
- If the argument is a list of non-zero length, take
is guaranteed to
- return a freshly-allocated list, even in the case where the entire
- list is taken, e.g. (take lis (length lis))
.
-
-
-take-right
flist i -> object
-drop-right
flist i -> list
-take-right
returns the last i elements of flist.
- drop-right
returns all but the last i elements of flist.
-
-(take-right '(a b c d e) 2) => (d e)
-(drop-right '(a b c d e) 2) => (a b c)
-
- The returned list may share a common tail with the argument list.
-
-(take-right '(1 2 3 . d) 2) => (2 3 . d)
-(drop-right '(1 2 3 . d) 2) => (1)
-(take-right '(1 2 3 . d) 0) => d
-(drop-right '(1 2 3 . d) 0) => (1 2 3)
-
- For a legal i, take-right
and drop-right
partition the list in a manner
- which can be inverted with append
:
-
-(append (take flist i) (drop flist i)) = flist
-
- take-right
's return value is guaranteed to share a common tail with flist.
-
- If the argument is a list of non-zero length, drop-right
is guaranteed to
- return a freshly-allocated list, even in the case where nothing is
- dropped, e.g. (drop-right lis 0)
.
-
-
-take!
x i -> list
-drop-right!
flist i -> list
-take!
and drop-right!
are "linear-update" variants of take
and
- drop-right
: the procedure is allowed, but not required, to alter the
- argument list to produce the result.
-take!
may return a shorter-than-expected list:
-
-(take! (circular-list 1 3 5) 8) => (1 3)
-(take! (circular-list 1 3 5) 8) => (1 3 5 1 3 5 1 3)
-
-
-
-
-split-at
x i -> [list object]
-split-at!
x i -> [list object]
-split-at
splits the list x
- at index i, returning a list of the
- first i elements, and the remaining tail. It is equivalent
- to
-
-(values (take x i) (drop x i))
-
- split-at!
is the linear-update variant. It is allowed, but not
- required, to alter the argument list to produce the result.
-
-(split-at '(a b c d e f g h) 3) =>
- (a b c)
- (d e f g h)
-
-
-
-
-last
pair -> object
-last-pair
pair -> pair
-last
returns the last element of the non-empty,
- finite list pair.
- last-pair
returns the last pair in the non-empty,
- finite list pair.
-
-
-(last '(a b c)) => c
-(last-pair '(a b c)) => (c)
-
-
-Miscellaneous: length, append, concatenate, reverse, zip & count
-
-
-
-
-
-
-length
list -> integer
-length+
clist -> integer or #f
-length
and length+
return the length of the argument.
- It is an error to pass a value to length
which is not a proper
- list (finite and nil-terminated). In particular, this means an
- implementation may diverge or signal an error when length
is
- applied to a circular list.
-length+
, on the other hand, returns #F
when applied to a circular
- list.
-cdr
- applied n times to the list produces the empty list.
-
-
-
-append
list1 ... -> list
-append!
list1 ... -> list
-append
returns a list consisting of the elements
- of list1
- followed by the elements of the other list parameters.
-
-(append '(x) '(y)) => (x y)
-(append '(a) '(b c d)) => (a b c d)
-(append '(a (b)) '((c))) => (a (b) (c))
-
- The resulting list is always newly allocated, except that it
- shares structure with the final listi argument.
- This last argument may be any value at all;
- an improper list results if it is not
- a proper list. All other arguments must be proper lists.
-
-(append '(a b) '(c . d)) => (a b c . d)
-(append '() 'a) => a
-(append '(x y)) => (x y)
-(append) => ()
-
-
- append!
is the "linear-update" variant of append
- -- it is allowed, but not required, to alter cons cells in the argument
- lists to construct the result list.
- The last argument is never altered; the result
- list shares structure with this parameter.
-
-
-concatenate
list-of-lists -> value
-concatenate!
list-of-lists -> value
-concatenate
returns
-
-(apply append list-of-lists)
-
- or, equivalently,
-
-(reduce-right append '() list-of-lists)
-
-
- concatenate!
is the linear-update variant, defined in
- terms of append!
instead of append
.
-
-(apply append ...)
idiom
- would fail when applied to long lists,
- but concatenate
would continue to function properly.
-
-append
and append!
,
- the last element of the input list may be any value at all.
-
-
-reverse
list -> list
-reverse!
list -> list
-reverse
returns a newly allocated list consisting of
- the elements of list in reverse order.
-
-(reverse '(a b c)) => (c b a)
-(reverse '(a (b c) d (e (f))))
- => ((e (f)) d (b c) a)
-
- reverse!
is the linear-update variant of reverse
.
- It is permitted, but not required, to alter the argument's cons cells
- to produce the reversed list.
-
-
-
-append-reverse
rev-head tail -> list
-append-reverse!
rev-head tail -> list
-append-reverse
returns
- (append (reverse rev-head) tail)
.
- It is provided because it is a common operation -- a common
- list-processing style calls for this exact operation to transfer values
- accumulated in reverse order onto the front of another list, and because
- the implementation is significantly more efficient than the simple
- composition it replaces. (But note that this pattern of iterative
- computation followed by a reverse can frequently be rewritten as a
- recursion, dispensing with the reverse
and append-reverse
steps, and
- shifting temporary, intermediate storage from the heap to the stack,
- which is typically a win for reasons of cache locality and eager storage
- reclamation.)
-append-reverse!
is just the linear-update variant -- it is allowed, but
- not required, to alter rev-head's cons cells to construct the result.
-
-
-
-zip
clist1 clist2 ... -> list
-(lambda lists (apply map list lists))
-
- If zip
is passed n lists, it returns a list as long as the shortest
- of these lists, each element of which is an n-element list comprised
- of the corresponding elements from the parameter lists.
-
-
-(zip '(one two three)
- '(1 2 3)
- '(odd even odd even odd even odd even))
- => ((one 1 odd) (two 2 even) (three 3 odd))
-
-(zip '(1 2 3)) => ((1) (2) (3))
-
- At least one of the argument lists must be finite:
-
-(zip '(3 1 4 1) (circular-list #f #t))
- => ((3 #f) (1 #t) (4 #f) (1 #t))
-
-
-
-
-unzip1
list -> list
-
-unzip2
list -> [list list]
-
-unzip3
list -> [list list list]
-
-unzip4
list -> [list list list list]
-
-unzip5
list -> [list list list list list]
-unzip1
takes a list of lists,
- where every list must contain at least one element,
- and returns a list containing the initial element of each such list.
- That is, it returns (map car lists)
.
- unzip2
takes a list of lists, where every list must contain at least
- two elements, and returns two values: a list of the first elements,
- and a list of the second elements. unzip3
does the same for the first
- three elements of the lists, and so forth.
-
-
-(unzip2 '((1 one) (2 two) (3 three))) =>
- (1 2 3)
- (one two three)
-
-
-
-count
pred clist1 clist2 -> integer
-count
is "iterative" in that it is guaranteed
- to apply pred to the list elements in a
- left-to-right order.
- The counting stops when the shortest list expires.
-
-(count even? '(3 1 4 1 5 9 2 5 6)) => 3
-(count < '(1 2 4 8) '(2 4 6 8 10 12 14 16)) => 3
-
- At least one of the argument lists must be finite:
-
-(count < '(3 1 4 1) (circular-list 1 10)) => 2
-
-
-Fold, unfold & map
-
-
-
-
-
-fold
kons knil clist1 clist2 ... -> value
-(kons en ... (kons e2 (kons e1 knil)) ... )
-
-(fold kons knil lis) = (fold kons (kons (car lis) knil) (cdr lis))
-(fold kons knil '()) = knil
-
-
- Examples:
-
-(fold + 0 lis) ; Add up the elements of LIS.
-
-(fold cons '() lis) ; Reverse LIS.
-
-(fold cons tail rev-head) ; See APPEND-REVERSE.
-
-;; How many symbols in LIS?
-(fold (lambda (x count) (if (symbol? x) (+ count 1) count))
- 0
- lis)
-
-;; Length of the longest string in LIS:
-(fold (lambda (s max-len) (max max-len (string-length s)))
- 0
- lis)
-
-
- If n list arguments are provided, then the kons function must take
- n+1 parameters: one element from each list, and the "seed" or fold
- state, which is initially knil. The fold operation terminates when
- the shortest list runs out of values:
-
-(fold cons* '() '(a b c) '(1 2 3 4 5)) => (c 3 b 2 a 1)
-
- At least one of the list arguments must be finite.
-
-
-fold-right
kons knil clist1 clist2 ... -> value
-(e1 e2 ... en)
,
- then this procedure returns
-
-(kons e1 (kons e2 ... (kons en knil)))
-
-(fold-right kons knil lis) = (kons (car lis) (fold-right kons knil (cdr lis)))
-(fold-right kons knil '()) = knil
-
-
- Examples:
-
-(fold-right cons '() lis) ; Copy LIS.
-
-;; Filter the even numbers out of LIS.
-(fold-right (lambda (x l) (if (even? x) (cons x l) l)) '() lis))
-
-
- If n list arguments are provided, then the kons function must take
- n+1 parameters: one element from each list, and the "seed" or fold
- state, which is initially knil. The fold operation terminates when
- the shortest list runs out of values:
-
-(fold-right cons* '() '(a b c) '(1 2 3 4 5)) => (a 1 b 2 c 3)
-
- At least one of the list arguments must be finite.
-
-
-pair-fold
kons knil clist1 clist2 ... -> value
-fold
, but kons is applied to successive sublists of the
- lists, rather than successive elements -- that is, kons is applied to the
- pairs making up the lists, giving this (tail) recursion:
-
-(pair-fold kons knil lis) = (let ((tail (cdr lis)))
- (pair-fold kons (kons lis knil) tail))
-(pair-fold kons knil
- For finite lists, the kons function may reliably apply
- '()
) = knil
-set-cdr!
to the pairs it is given
- without altering the sequence of execution.
-
-;;; Destructively reverse a list.
-(pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis))
-
-
- At least one of the list arguments must be finite.
-
-
-
-pair-fold-right
kons knil clist1 clist2 ... -> value
-fold-right
that pair-fold
holds with fold
.
- Obeys the recursion
-
-(pair-fold-right kons knil lis) =
- (kons lis (pair-fold-right kons knil (cdr lis)))
-(pair-fold-right kons knil
-
- Example:
-'()
) = knil
-
-(pair-fold-right cons '() '(a b c)) => ((a b c) (b c) (c))
-
-
- At least one of the list arguments must be finite.
-
-
-reduce
f ridentity list -> value
-reduce
is a variant of fold
.
-
-(f x ridentity) = x
-
-
- reduce
has the following definition:
-
-Otherwise, return (fold f (car list) (cdr list))
.
-(fold f ridentity list)
.
-reduce
when applying f is expensive and you'd
- like to avoid the extra application incurred when fold
applies
- f to the head of list and the identity value,
- redundantly producing the same value passed in to f.
- For example, if f involves searching a file directory or
- performing a database query, this can be significant.
- In general, however, fold
is useful in many contexts where reduce
is not
- (consider the examples given in the fold
definition -- only one of the
- five folds uses a function with a right identity.
- The other four may not be performed with reduce
).
-
-reduce
and
- fold
functions.
-
-
-;; Take the max of a list of non-negative integers.
-(reduce max 0 nums) ; i.e., (apply max 0 nums)
-
-
-
-reduce-right
f ridentity list -> value
-reduce-right
is the fold-right variant of reduce
.
- It obeys the following definition:
-
-(reduce-right f ridentity '()) = ridentity
-(reduce-right f ridentity '(e1)) = (f e1 ridentity) = e1
-(reduce-right f ridentity '(e1 e2 ...)) =
- (f e1 (reduce f ridentity (e2 ...)))
-
- ...in other words, we compute
- (fold-right f ridentity list)
.
-
-
-;; Append a bunch of lists together.
-;; I.e., (apply append list-of-lists)
-(reduce-right append '() list-of-lists)
-
-
-
-unfold
p f g seed [tail-gen] -> list
-unfold
is best described by its basic recursion:
-
-(unfold p f g seed) =
- (if (p seed) (tail-gen seed)
- (cons (f seed)
- (unfold p f g (g seed))))
-
-
-
-(lambda (x) '())
-unfold
is the fundamental recursive list constructor,
- just as fold-right
is
- the fundamental recursive list consumer.
- While unfold
may seem a bit abstract
- to novice functional programmers, it can be used in a number of ways:
-
-
-;; List of squares: 1^2 ... 10^2
-(unfold (lambda (x) (> x 10))
- (lambda (x) (* x x))
- (lambda (x) (+ x 1))
- 1)
-
-(unfold null-list? car cdr lis) ; Copy a proper list.
-
-;; Read current input port into a list of values.
-(unfold eof-object? values (lambda (x) (read)) (read))
-
-;; Copy a possibly non-proper list:
-(unfold not-pair? car cdr lis
- values)
-
-;; Append HEAD onto TAIL:
-(unfold null-list? car cdr head
- (lambda (x) tail))
-
-
- Interested functional programmers may enjoy noting that
- fold-right
and unfold
- are in some sense inverses.
- That is, given operations knull?, kar,
- kdr, kons, and knil satisfying
-(kons (kar x) (kdr x))
= x
- and
-(knull? knil)
= #t
-(fold-right kons knil (unfold knull? kar kdr x))
= x
-(unfold knull? kar kdr (fold-right kons knil x))
= x.
-unfold-right
p f g seed [tail] -> list
-unfold-right
constructs a list with the following loop:
-
-(let lp ((seed seed) (lis tail))
- (if (p seed) lis
- (lp (g seed)
- (cons (f seed) lis))))
-
-
-
-'()
.
-unfold-right
is the fundamental iterative list constructor,
- just as fold
is the
- fundamental iterative list consumer.
- While unfold-right
may seem a bit abstract
- to novice functional programmers, it can be used in a number of ways:
-
-;; List of squares: 1^2 ... 10^2
-(unfold-right zero?
- (lambda (x) (* x x))
- (lambda (x) (- x 1))
- 10)
-
-;; Reverse a proper list.
-(unfold-right null-list? car cdr lis)
-
-;; Read current input port into a list of values.
-(unfold-right eof-object? values (lambda (x) (read)) (read))
-
-;; (append-reverse rev-head tail)
-(unfold-right null-list? car cdr rev-head tail)
-
-
- Interested functional programmers may enjoy noting that
- fold
and unfold-right
- are in some sense inverses.
- That is, given operations knull?, kar,
- kdr, kons, and knil satisfying
-(kons (kar x) (kdr x))
= x
- and
-(knull? knil)
= #t
-(fold kons knil (unfold-right knull? kar kdr x))
= x
-(unfold-right knull? kar kdr (fold kons knil x))
= x.
-map
proc clist1 clist2 ... -> list
-map
applies proc element-wise to the elements
- of the lists and returns a list of the results,
- in order.
- The dynamic order in which proc
- is applied to the elements of the lists is unspecified.
-
-
-(map cadr '((a b) (d e) (g h))) => (b e h)
-
-(map (lambda (n) (expt n n))
- '(1 2 3 4 5))
- => (1 4 27 256 3125)
-
-(map + '(1 2 3) '(4 5 6)) => (5 7 9)
-
-(let ((count 0))
- (map (lambda (ignored)
- (set! count (+ count 1))
- count)
- '(a b))) => (1 2) or (2 1)
-
-
- This procedure is extended from its
- R5RS
- specification to allow the arguments to be of unequal length;
- it terminates when the shortest list runs out.
-
-(map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1)
-
-
-
-for-each
proc clist1 clist2 ... -> unspecified
-for-each
are like the arguments to
- map
, but
- for-each
calls proc for its side effects rather
- than for its values.
- Unlike map
, for-each
is guaranteed to call
- proc on the elements of the lists in order from the first
- element(s) to the last,
- and the value returned by for-each
is unspecified.
-
-(let ((v (make-vector 5)))
- (for-each (lambda (i)
- (vector-set! v i (* i i)))
- '(0 1 2 3 4))
- v) => #(0 1 4 9 16)
-
-
- This procedure is extended from its
- R5RS
- specification to allow the arguments to be of unequal length;
- it terminates when the shortest list runs out.
-append-map
f clist1 clist2 ... -> value
-append-map!
f clist1 clist2 ... -> value
-
-(apply append (map f clist1 clist2 ...))
-
-(apply append! (map f clist1 clist2 ...))
-
map
function.
- However, the results of the applications are appended together to
- make the final result. append-map
uses append
to append the results
- together; append-map!
uses append!
.
-
-(append-map! (lambda (x) (list x (- x))) '(1 3 8))
- => (1 -1 3 -3 8 -8)
-
-
- At least one of the list arguments must be finite.
-
-
-map!
f list1 clist2 ... -> list
-map
-- map!
is allowed, but not required, to
- alter the cons cells of list1 to construct the result list.
-map-in-order
f clist1 clist2 ... -> list
-map
procedure that guarantees to apply f across
- the elements of the listi arguments in a left-to-right order. This
- is useful for mapping procedures that both have side effects and
- return useful values.
-pair-for-each
f clist1 clist2 ... -> unspecific
-for-each
, but f is applied to successive sublists of the argument
- lists. That is, f is applied to the cons cells of the lists, rather
- than the lists' elements. These applications occur in left-to-right
- order.
-set-cdr!
to the pairs it is given
- without altering the sequence of execution.
-
-
-(pair-for-each (lambda (pair) (display pair) (newline)) '(a b c)) ==>
- (a b c)
- (b c)
- (c)
-
-
- At least one of the list arguments must be finite.
-
-
-filter-map
f clist1 clist2 ... -> list
-map
, but only true values are saved.
-
-(filter-map (lambda (x) (and (number? x) (* x x))) '(a 1 b 3 c 7))
- => (1 9 49)
-
- The dynamic order in which the various applications of f are made is
- not specified.
-Filtering & partitioning
-
-
-
-
-
-
-filter
pred list -> list
-
-(filter even? '(0 7 8 8 43 -4)) => (0 8 8 -4)
-
-
-
-partition
pred list -> [list list]
-
-(partition symbol? '(one 2 3 four five 6)) =>
- (one four five)
- (2 3 6)
-
-
-
-remove
pred list -> list
-
-(lambda (pred list) (filter (lambda (x) (not (pred x))) list))
-
- The list is not disordered -- elements that appear in the result list
- occur in the same order as they occur in the argument list.
- The returned list may share a common tail with the argument list.
- The dynamic order in which the various applications of pred are made is
- not specified.
-
-
-(remove even? '(0 7 8 8 43 -4)) => (7 43)
-
-
-
-filter!
pred list -> list
-partition!
pred list -> [list list]
-remove!
pred list -> list
-filter
, partition
and remove
.
- These procedures are allowed, but not required, to alter the cons cells
- in the argument list to construct the result lists.
-
-Searching
-
-
-find
and any
procedures as canonical
-representatives:
-
-;; Proper list -- success
-(find even? '(1 2 3)) => 2
-(any even? '(1 2 3)) => #t
-
-;; proper list -- failure
-(find even? '(1 7 3)) => #f
-(any even? '(1 7 3)) => #f
-
-;; Failure is error on a dotted list.
-(find even? '(1 3 . x)) => error
-(any even? '(1 3 . x)) => error
-
-;; The dotted list contains an element satisfying the search.
-;; This case is not specified -- it could be success, an error,
-;; or some third possibility.
-(find even? '(1 2 . x)) => error/undefined
-(any even? '(1 2 . x)) => error/undefined ; success, error or other.
-
-;; circular list -- success
-(find even? (circular-list 1 6 3)) => 6
-(any even? (circular-list 1 6 3)) => #t
-
-;; circular list -- failure is error. Procedure may diverge.
-(find even? (circular-list 1 3)) => error
-(any even? (circular-list 1 3)) => error
-
-
-
-
-
-
-
-find
pred clist -> value
-
-(find even? '(3 1 4 1 5 9)) => 4
-
-
- Note that find
has an ambiguity in its lookup semantics -- if find
- returns #f
, you cannot tell (in general) if it found a #f
element
- that satisfied pred, or if it did not find any element at all. In
- many situations, this ambiguity cannot arise -- either the list being
- searched is known not to contain any #f
elements, or the list is
- guaranteed to have an element satisfying pred. However, in cases
- where this ambiguity can arise, you should use find-tail
instead of
- find
-- find-tail
has no such ambiguity:
-
-(cond ((find-tail pred lis) => (lambda (pair) ...)) ; Handle (CAR PAIR)
- (else ...)) ; Search failed.
-
-
-
-find-tail
pred clist -> pair or false
-find-tail
can be viewed as a general-predicate variant of the member
- function.
-
-(find-tail even? '(3 1 37 -8 -5 0 0)) => (-8 -5 0 0)
-(find-tail even? '(3 1 37 -5)) => #f
-
-;; MEMBER X LIS:
-(find-tail (lambda (elt) (equal? x elt)) lis)
-
-
- In the circular-list case, this procedure "rotates" the list.
-
-Find-tail
is essentially drop-while
,
- where the sense of the predicate is inverted:
- Find-tail
searches until it finds an element satisfying
- the predicate; drop-while
searches until it finds an
- element that doesn't satisfy the predicate.
-
-
-take-while
pred clist -> list
-take-while!
pred clist -> list
-Take-while!
is the linear-update variant. It is allowed, but not
-required, to alter the argument list to produce the result.
-
-
-(take-while even? '(2 18 3 10 22 9)) => (2 18)
-
-
-
-drop-while
pred clist -> list
-
-(drop-while even? '(2 18 3 10 22 9)) => (3 10 22 9)
-
-The circular-list case may be viewed as "rotating" the list.
-
-
-
-span
pred clist -> [list clist]
-span!
pred list -> [list list]
-break
pred clist -> [list clist]
-break!
pred list -> [list list]
-Span
splits the list into the longest initial prefix whose
-elements all satisfy pred, and the remaining tail.
-Break
inverts the sense of the predicate:
-the tail commences with the first element of the input list
-that satisfies the predicate.
-
-span
finds the intial span of elements
-satisfying pred,
-and break
breaks the list at the first element satisfying
-pred.
-
-Span
is equivalent to
-
-(values (take-while pred clist)
- (drop-while pred clist))
-
-
-Span!
and break!
are the linear-update variants.
-They are allowed, but not required,
-to alter the argument list to produce the result.
-
-
-(span even? '(2 18 3 10 22 9)) =>
- (2 18)
- (3 10 22 9)
-
-(break even? '(3 1 4 1 5 9)) =>
- (3 1)
- (4 1 5 9)
-
-
-
-
-any
pred clist1 clist2 ... -> value
-any
applies pred to the first elements of the clisti parameters.
- If this application returns a true value, any
immediately returns
- that value. Otherwise, it iterates, applying pred to the second
- elements of the clisti parameters, then the third, and so forth.
- The iteration stops when a true value is produced or one of the lists runs
- out of values; in
- the latter case, any
returns #f
.
- The application of pred to the last element of the
- lists is a tail call.
-find
and any
-- find
returns the element
- that satisfied the predicate; any
returns the true value that the
- predicate produced.
-every
, any
's name does not end with a question mark -- this is to
- indicate that it does not return a simple boolean (#t
or #f
), but a
- general value.
-
-
-(any integer? '(a 3 b 2.7)) => #t
-(any integer? '(a 3.1 b 2.7)) => #f
-(any < '(3 1 4 1 5)
- '(2 7 1 8 2)) => #t
-
-
-
-every
pred clist1 clist2 ... -> value
-every
applies pred to the first elements of the clisti parameters.
- If this application returns false, every
immediately returns false.
- Otherwise, it iterates, applying pred to the second elements of the
- clisti parameters, then the third, and so forth. The iteration stops
- when a false value is produced or one of the lists runs out of values.
- In the latter case, every
returns
- the true value produced by its final application of pred.
- The application of pred to the last element of the lists
- is a tail call.
-every
simply returns #t
.
-any
, every
's name does not end with a question mark -- this is to
- indicate that it does not return a simple boolean (#t
or #f
), but a
- general value.
-
-
-list-index
pred clist1 clist2 ... -> integer or false
-list-index
applies pred to the first elements of the clisti parameters.
- If this application returns true, list-index
immediately returns zero.
- Otherwise, it iterates, applying pred to the second elements of the
- clisti parameters, then the third, and so forth. When it finds a tuple of
- list elements that cause pred to return true, it stops and returns the
- zero-based index of that position in the lists.
-list-index
returns #f
.
-
-
-(list-index even? '(3 1 4 1 5 9)) => 2
-(list-index < '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => 1
-(list-index = '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => #f
-
-
-
-member
x list [=] -> list
-memq
x list -> list
-memv
x list -> list
-(drop list i)
- for i less than the length of list.
- If x does
- not occur in list, then #f
is returned.
- memq
uses eq?
to compare x
- with the elements of list,
- while memv
uses eqv?
, and
- member
uses equal?
.
-
-
- (memq 'a '(a b c)) => (a b c)
- (memq 'b '(a b c)) => (b c)
- (memq 'a '(b c d)) => #f
- (memq (list 'a) '(b (a) c)) => #f
- (member (list 'a)
- '(b (a) c)) => ((a) c)
- (memq 101 '(100 101 102)) => *unspecified*
- (memv 101 '(100 101 102)) => (101 102)
-
-
- member
is extended from its
- R5RS
- definition to allow the client to pass in
- an optional equality procedure = used to compare keys.
-
-
-(= x ei) ; list is (E1 ... En)
-
(member 5 list <)
-
-find-tail
and find
procedures, e.g.
-
-(find-tail even? list) ; Find the first elt with an even key.
-
-
-Deletion
-
-
-
-
-
-delete
x list [=] -> list
-delete!
x list [=] -> list
-delete
uses the comparison procedure =, which defaults to equal?
, to find
- all elements of list that are equal to x, and deletes them from list. The
- dynamic order in which the various applications of = are made is not
- specified.
-
-remove
- and remove!
procedures, e.g.:
-
-;; Delete all the even elements from LIS:
-(remove even? lis)
-
-
- The comparison procedure is used in this way:
- (= x ei)
.
- That is, x is always the first argument,
- and a list element is always the
- second argument. The comparison procedure will be used to compare each
- element of list exactly once; the order in which it is applied to the
- various ei is not specified. Thus, one can reliably remove all the
- numbers greater than five from a list with
- (delete 5 list <)
-
-delete!
is the linear-update variant of delete
.
- It is allowed, but not required, to alter the cons cells in
- its argument list to construct the result.
-
-
-delete-duplicates
list [=] -> list
-delete-duplicates!
list [=] -> list
-delete-duplicates
removes duplicate elements from the
- list argument.
- If there are multiple equal elements in the argument list, the result list
- only contains the first or leftmost of these elements in the result.
- The order of these surviving elements is the same as in the original
- list -- delete-duplicates
does not disorder the list (hence it is useful
- for "cleaning up" association lists).
-equal?
. If x comes before y in list, then the comparison is performed
- (= x y)
.
- The comparison procedure will be used to compare each pair of elements in
- list no more than once;
- the order in which it is applied to the various pairs is not specified.
-delete-duplicates
- are allowed to share common tails
- between argument and result lists -- for example, if the list argument
- contains only unique elements, it may simply return exactly
- this list.
-delete-duplicates
- runs in time O(n2) for n-element lists.
- Uniquifying long lists can be accomplished in O(n lg n) time by sorting
- the list to bring equal elements together, then using a linear-time
- algorithm to remove equal elements. Alternatively, one can use algorithms
- based on element-marking, with linear-time results.
-
-delete-duplicates!
is the linear-update variant of delete-duplicates
; it
- is allowed, but not required, to alter the cons cells in its argument
- list to construct the result.
-
-(delete-duplicates '(a b a c a b c z)) => (a b c z)
-
-;; Clean up an alist:
-(delete-duplicates '((a . 3) (b . 7) (a . 9) (c . 1))
- (lambda (x y) (eq? (car x) (car y))))
- => ((a . 3) (b . 7) (c . 1))
-
-Association lists
-
-
-
-
-
-
-assoc
key alist [=] -> pair or #f
-assq
key alist -> pair or #f
-assv
key alist -> pair or #f
-#f
is returned.
- assq
uses eq?
to compare key
- with the car fields of the pairs in alist,
- while assv
uses eqv?
- and assoc
uses equal?
.
-
-(define e '((a 1) (b 2) (c 3)))
-(assq 'a e) => (a 1)
-(assq 'b e) => (b 2)
-(assq 'd e) => #f
-(assq (list 'a) '(((a)) ((b)) ((c)))) => #f
-(assoc (list 'a) '(((a)) ((b)) ((c)))) => ((a))
-(assq 5 '((2 3) (5 7) (11 13))) => *unspecified*
-(assv 5 '((2 3) (5 7) (11 13))) => (5 7)
-
-
- assoc
is extended from its
- R5RS
- definition to allow the client to pass in
- an optional equality procedure = used to compare keys.
-
-
-(= key (car ei)) ; list is (E1 ... En)
-
(assoc 5 alist <)
-
-find-tail
and find
procedures, e.g.
-
-;; Look up the first association in alist with an even key:
-(find (lambda (a) (even? (car a))) alist)
-
-
-
-
-alist-cons
key datum alist -> alist
-
-(lambda (key datum alist) (cons (cons key datum) alist))
-
- Cons a new alist entry mapping key -> datum onto alist.
-
-
-alist-copy
alist -> alist
-
-(lambda (a) (map (lambda (elt) (cons (car elt) (cdr elt))) a))
-
-
-
-alist-delete
key alist [=] -> alist
-alist-delete!
key alist [=] -> alist
-alist-delete
deletes all associations from alist with the given key,
- using key-comparison procedure =, which defaults to equal?
.
- The dynamic order in which the various applications of = are made is not
- specified.
-(= key ki)
.
- Thus, one can reliably remove all entries of alist whose key is greater
- than five with
- (alist-delete 5 alist <)
-alist-delete!
is the linear-update variant of alist-delete
.
- It is allowed, but not required,
- to alter cons cells from the alist parameter to construct the result.
-
-Set operations on lists
-eq?
.
-That is, it must be the case that
-(eq? x y)
=> (= x y)
.
-eq?
are
-also set-equal by any legal comparison procedure. This allows for
-constant-time determination of set operations on eq?
lists.
-
-
-
-
-
-
-lset<=
= list1 ... -> boolean
-
-(lset<= eq? '(a) '(a b a) '(a b c c)) => #t
-
-(lset<= eq?) => #t ; Trivial cases
-(lset<= eq? '(a)) => #t
-
-
-
-lset=
= list1 list2 ... -> boolean
-
-(lset= eq? '(b e a) '(a e b) '(e e b a)) => #t
-
-(lset= eq?) => #t ; Trivial cases
-(lset= eq? '(a)) => #t
-
-
-
-lset-adjoin
= list elt1 ... -> list
-
-(lset-adjoin eq? '(a b c d c e) 'a 'e 'i 'o 'u) => (u o i a b c d c e)
-
-
-
-lset-union
= list1 ... -> list
-
-
- However, there is no guarantee that = will be applied to every pair
- of arguments from A and B.
- In particular, if A is (= r b)
.
- If all comparisons fail,
- b is consed onto the front of the result.
- eq
? to B,
- the operation may immediately terminate.
-
-
-(lset-union eq? '(a b c d e) '(a e i o u)) =>
- (u o i a b c d e)
-
-;; Repeated elements in LIST1 are preserved.
-(lset-union eq? '(a a c) '(x a x)) => (x a a c)
-
-;; Trivial cases
-(lset-union eq?) => ()
-(lset-union eq? '(a b c)) => (a b c)
-
-
-
-lset-intersection
= list1 list2 ... -> list
-(= a b)
,
- for a in A, and b in B.
- Note this implies that an element which appears in B
- and multiple times in list A
- will also appear multiple times in the result.
-lset-intersection
essentially filters
- list1,
- without disarranging element order.
- The result may
- share a common tail with list1.
-
-(lset-intersection eq? '(a b c d e) '(a e i o u)) => (a e)
-
-;; Repeated elements in LIST1 are preserved.
-(lset-intersection eq? '(a x y a) '(x a x z)) => '(a x a)
-
-(lset-intersection eq? '(a b c)) => (a b c) ; Trivial case
-
-
-
-lset-difference
= list1 list2 ... -> list
-lset-difference
essentially
- filters list1, without disarranging element order.
- The result may share a common tail with list1.
-
- The dynamic order in which the applications of = are made is not
- specified.
- The procedure may check an element of list1
- for membership in every other list before proceeding to consider the
- next element of list1,
- or it may completely compute the difference of
- list1 and list2 before
- proceeding to list3,
- or it may go about its work in some third order.
-
-
-(lset-difference eq? '(a b c d e) '(a e i o u)) => (b c d)
-
-(lset-difference eq? '(a b c)) => (a b c) ; Trivial case
-
-
-
-lset-xor
= list1 ... -> list
-
-
- However, an implementation is allowed to assume that = is
- symmetric -- that is, that
-(= a b)
, and
- (= b a)
.
- (= a b)
=>
- (= b a)
.
-(= a b)
produces
- true for some a in A
- and b in B,
- both a and b may be removed from
- inclusion in the result.
-
-(lset-xor eq? '(a b c d e) '(a e i o u)) => (d c b i o u)
-
-;; Trivial cases.
-(lset-xor eq?) => ()
-(lset-xor eq? '(a b c d e)) => (a b c d e)
-
-
-
-
-lset-diff+intersection
= list1 list2 ... -> [list list]
-
-(values (lset-difference = list1 list2 ...)
- (lset-intersection = list1
- (lset-union = list2 ...)))
-
- but can be implemented more efficiently.
-lset-union!
= list1 ... -> list
-lset-intersection!
= list1 list2 ... -> list
-lset-difference!
= list1 list2 ... -> list
-lset-xor!
= list1 ... -> list
-lset-diff+intersection!
= list1 list2 ... -> [list list]
-lset-union!
is permitted to recycle cons cells from any
- of its list arguments.
-Primitive side-effects
-
-
-
-
-
-set-car!
pair object -> unspecified
-set-cdr!
pair object -> unspecified
-
-(define (f) (list 'not-a-constant-list))
-(define (g) '(constant-list))
-(set-car! (f) 3) => *unspecified*
-(set-car! (g) 3) => *error*
-
-Acknowledgements
-References & links
-
-
-
-
-
-
-
-
-
-
-Guy L. Steele Jr. (editor).
-Digital Press, Maynard, Mass., second edition 1990.
-Available at
-http://www.elwood.com/alu/table/references.htm#cltl2.
-
- R. Kelsey, W. Clinger, J. Rees (editors).
- Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998.
- and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998.
- Available at
- http://www.schemers.org/Documents/Standards/.
-
-Copyright
-