Checking in a slew of systemic changes:
- Switching scsh over to the SRE regexp system. - Upgraded string & character handling - Switching code over to list-lib SRFI names. Some switchover to probably new string-lib SRFI names, though that SRFI hasn't happened, so it's probably a waste of time at the moment to stress out about it. - General switching from "reduce-" lexeme to "fold-" lexeme. -Olin A per-file breakdown of changes: ccp.scm New char-set.scm New features stringlib.scm New stringpack.scm New awk.scm SRE filemtch.scm SRE string-lib fname.scm string-lib fr.scm SRE string-lib glob.scm SRE procobj.scm list-lib rdelim.scm SRE re.c SRE killed re.scm SRE killed re1.c SRE moved to rx/ dir re1.h SRE moved to rx/ dir rx/... SRE General fitting-it-in to the upgraded scsh. scsh-interfaces.scm scsh-package.scm scsh.scm type-check loophole added; string-lib; list-lib reduce-port -> foldl-port syscalls.scm string-lib; diked out unused MATCH-FILES code syscalls.c utilities.scm string-lib; {any?,every?} -> {any,every} procobj list-lib
This commit is contained in:
parent
1bb7a2e494
commit
96d9b0e068
221
scsh/awk.scm
221
scsh/awk.scm
|
@ -1,10 +1,27 @@
|
|||
;;; An awk loop, after the design of David Albertz and Olin Shivers.
|
||||
;;; Copyright (c) 1994 by Olin Shivers.
|
||||
|
||||
;;; This uses the new RX SRE syntax. Defines a Clinger-Rees expander for
|
||||
;;; the old, pre-SRE syntax AWK, and one for the new SRE-syntax AWK.
|
||||
|
||||
;;; Imports:
|
||||
;;; - Requires RECEIVE from RECEIVING package.
|
||||
;;; - Would require DESTRUCTURE from DESTRUCTURING package, but it appears
|
||||
;;; to be broken, so we hack it w/cars and cdrs.
|
||||
;;; - Requires STRING-MATCH from SCSH package.
|
||||
;;; - Requires STRING-MATCH and STRING-MATCH? from RE-EXPORTS package.
|
||||
;;; - Requires regexp manipulation stuff from SRE-SYNTAX-TOOLS
|
||||
;;; - Requires ERROR from ERROR-PACKAGE.
|
||||
;;; - Requires ANY and FILTER frm SCSH-UTILITIES.
|
||||
;;;
|
||||
;;; Needs error-package receiving sre-syntax-tools scsh-utilities
|
||||
;;;
|
||||
;;; Exports:
|
||||
;;; (expand-awk exp r c) Clinger-Rees macro expander, new syntax
|
||||
;;; (expand-awk/obsolete exp r c) Clinger-Rees macro expander, old syntax
|
||||
;;;
|
||||
;;; next-range next-:range These four functions are used in the
|
||||
;;; next-range: next-:range: code output by the expander.
|
||||
|
||||
|
||||
;;; Examples:
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
@ -15,13 +32,14 @@
|
|||
;;;
|
||||
;;; ;;; Count the number of non-comment lines of code in my Scheme source.
|
||||
;;; (awk (read-line) (line) ((nlines 0))
|
||||
;;; ("^[ \t]*;" nlines) ; A comment line.
|
||||
;;; ((: bos (* white) ";") nlines) ; A comment line.
|
||||
;;; (else (+ nlines 1))) ; Not a comment line.
|
||||
;;;
|
||||
;;; ;;; Read numbers, counting the evens and odds.
|
||||
;;; ;;; Read numbers, counting the evens and odds,
|
||||
;;; ;;; and printing out sign information.
|
||||
;;; (awk (read) (val) ((evens 0) (odds 0))
|
||||
;;; ((zero? val) (display "zero ") (values evens odds)) ; Tell me about
|
||||
;;; ((> val 0) (display "pos ") (values evens odds)) ; sign, too.
|
||||
;;; ((zero? val) (display "zero ") (values evens odds))
|
||||
;;; ((> val 0) (display "pos ") (values evens odds))
|
||||
;;; (else (display "neg ") (values evens odds))
|
||||
;;;
|
||||
;;; ((even? val) (values (+ evens 1) odds))
|
||||
|
@ -33,6 +51,17 @@
|
|||
;;; .
|
||||
;;; .
|
||||
;;; <clausen>)
|
||||
;;;
|
||||
;;; <clause> ::= (ELSE body ...)
|
||||
;;; | (:RANGE test1 test2 body ...) ; RANGE :RANGE RANGE: :RANGE:
|
||||
;;; | (AFTER body ...)
|
||||
;;; | (test => proc)
|
||||
;;; | (test ==> vars body ...)
|
||||
;;; | (test body ...)
|
||||
;;;
|
||||
;;; test ::= integer | sre | (WHEN exp) | exp
|
||||
;;; (sre/exp ambiguities resolved in favor of SRE)
|
||||
|
||||
|
||||
;;; This macro is written using Clinger/Rees explicit-renaming low-level
|
||||
;;; macros. So it is pretty ugly. It takes a little care to generate
|
||||
|
@ -141,17 +170,23 @@
|
|||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (expand-awk exp r c)
|
||||
;;; If STRING-REGEXPS? is true, we use the old, obsolete syntax, where
|
||||
;;; a test form that is a string, such as "shivers|bdc", is treated as
|
||||
;;; a regular expression in the Posix string syntax. Otherwise, we use the
|
||||
;;; new SRE syntax, where strings are treated as SRE constants.
|
||||
|
||||
(define (expand-awk exp r c) (really-expand-awk exp r c #f))
|
||||
(define (expand-awk/obsolete exp r c) (really-expand-awk exp r c #t))
|
||||
|
||||
(define (really-expand-awk exp r c string-regexps?)
|
||||
(let* ((%lambda (r 'lambda)) ; Bind a mess of keywords.
|
||||
(%let (r 'let))
|
||||
(%receive (r 'receive))
|
||||
(%values (r 'values))
|
||||
(%if (r 'if))
|
||||
(%eof-object? (r 'eof-object?))
|
||||
(%after (r 'after))
|
||||
(%else (r 'else))
|
||||
(%+ (r '+))
|
||||
(%make-regexp (r 'make-regexp))
|
||||
(%rx (r 'rx))
|
||||
|
||||
(gensym (let ((i 0))
|
||||
(lambda (s)
|
||||
|
@ -169,7 +204,9 @@
|
|||
|
||||
;; Rip the form apart.
|
||||
(reader-exp (cadr exp))
|
||||
(rec/field-vars (caddr exp))
|
||||
;; Replace #F's with gensym'd variables in the record/field vars.
|
||||
(rec/field-vars (map (lambda (v) (or v (r (gensym "anon-rfval"))))
|
||||
(caddr exp)))
|
||||
(rec-var (car rec/field-vars)) ; The var bound to the record.
|
||||
(rest (cdddr exp))) ; Stuff after the rec&field-vars.
|
||||
|
||||
|
@ -178,10 +215,18 @@
|
|||
(values #f (car rest) (cdr rest)) ; form.
|
||||
(values (car rest) (cadr rest) (cddr rest)))
|
||||
|
||||
;; If we are doing the old, obsolete Posix-string syntax, map
|
||||
;; the clause tests over to the new syntax.
|
||||
(let* ((clauses (if string-regexps?
|
||||
(map (lambda (clause)
|
||||
(hack-clause-for-posix-string-syntax clause r c))
|
||||
clauses)
|
||||
clauses))
|
||||
|
||||
;; Some analysis: what have we got?
|
||||
;; Range clauses, else clauses, line num tests,...
|
||||
(let* ((recnum-tests? ; Do any of the clauses test the record
|
||||
(any? (lambda (clause) ; count? (I.e., any integer tests?)
|
||||
(recnum-tests? ; Do any of the clauses test the record
|
||||
(any (lambda (clause) ; count? (I.e., any integer tests?)
|
||||
(let ((test (car clause)))
|
||||
(or (integer? test)
|
||||
(and (range? clause)
|
||||
|
@ -191,25 +236,25 @@
|
|||
|
||||
;; If any ELSE clauses, bind this to the var in which we
|
||||
;; will keep the else state, otherwise #f.
|
||||
(else-var (and (any? (lambda (clause)
|
||||
(else-var (and (any (lambda (clause)
|
||||
(c (car clause) %else))
|
||||
clauses)
|
||||
(r 'else)))
|
||||
(r 'else-state)))
|
||||
|
||||
;; We compile all of the regexp patterns into regexp
|
||||
;; We compile all of the *static* regexp patterns into regexp
|
||||
;; data structures outside the AWK loop. So we need to
|
||||
;; make a list of all the regexps that are used as tests.
|
||||
(patterns (apply append
|
||||
(map (lambda (clause)
|
||||
(let ((test (car clause)))
|
||||
(cond ((string? test) (list test))
|
||||
(cond ((sre-form? test r c) (list test))
|
||||
((range? clause)
|
||||
(let ((t1 (cadr clause))
|
||||
(t2 (caddr clause)))
|
||||
(append (if (string? t1)
|
||||
(append (if (sre-form? t1 r c)
|
||||
(list t1)
|
||||
'())
|
||||
(if (string? t2)
|
||||
(if (sre-form? t2 r c)
|
||||
(list t2)
|
||||
'()))))
|
||||
(else '()))))
|
||||
|
@ -223,16 +268,28 @@
|
|||
(if (member pat ans) ans (cons pat ans)))
|
||||
'())))
|
||||
|
||||
;; An alist matching regexp patterns with the vars to which
|
||||
;; we will bind their compiled regexp data structure.
|
||||
(pats/vars (map (lambda (p) (cons p (r (gensym "re."))))
|
||||
(pats-static? (map (lambda (sre)
|
||||
(static-regexp? (parse-sre sre r c)))
|
||||
patterns))
|
||||
|
||||
;; A LET-list binding the regexp vars to their compiled regexps.
|
||||
(regexp-inits (map (lambda (p/v)
|
||||
`(,(cdr p/v) (,%make-regexp ,(car p/v))))
|
||||
pats/vars))
|
||||
;; An alist matching each pattern with the exp that refers
|
||||
;; to it -- a var if it's static, a Scheme (RX ...) exp otw.
|
||||
(pats/refs (map (lambda (pat static?)
|
||||
(cons pat
|
||||
(if static?
|
||||
(r (gensym "re."))
|
||||
`(,%rx ,pat))))
|
||||
patterns pats-static?))
|
||||
|
||||
;; A LET-list binding the regexp vars to their
|
||||
;; compiled static regexps.
|
||||
(regexp-inits (apply append
|
||||
(map (lambda (p/r static?)
|
||||
(if static?
|
||||
`((,(cdr p/r) (,%rx ,(car p/r))))
|
||||
'()))
|
||||
pats/refs
|
||||
pats-static?)))
|
||||
|
||||
;; Make a list of state vars for the range clauses.
|
||||
;; For each range clause, we need a boolean var to track
|
||||
|
@ -279,7 +336,7 @@
|
|||
|
||||
(loop-body (awk-loop-body lp-var rec-var else-var
|
||||
rec-counter range-vars svars
|
||||
clauses pats/vars r c))
|
||||
clauses pats/refs r c))
|
||||
|
||||
;; Variables that have to be updated per-iteration, as a LET list.
|
||||
;; Note that we are careful not to increment the record counter
|
||||
|
@ -302,12 +359,32 @@
|
|||
`(,%if (,%eof-object? ,rec-var) ,after-exp
|
||||
,loop-body))))))))
|
||||
|
||||
;;; This maps a clause in the old, obsolete syntax over to a clause
|
||||
;;; in the new, SRE syntax.
|
||||
(define (hack-clause-for-posix-string-syntax clause r c)
|
||||
(let ((hack-simple-test (lambda (test)
|
||||
(cond ((string? test)
|
||||
`(,(r 'posix-string) ,test))
|
||||
((integer? test) test)
|
||||
(else `(,(r 'when) ,test)))))
|
||||
(test (car clause)))
|
||||
(cond ((range-keyword? test r c)
|
||||
`(,test ,(hack-simple-test (cadr clause))
|
||||
,(hack-simple-test (caddr clause))
|
||||
. ,(cdddr clause)))
|
||||
|
||||
((or (c test (r 'else))
|
||||
(c test (r 'after)))
|
||||
clause)
|
||||
|
||||
(else `(,(hack-simple-test test) . ,(cdr clause))))))
|
||||
|
||||
|
||||
;;; Expand into the body of the awk loop -- the code that tests & executes
|
||||
;;; each clause, and then jumps to the top of the loop.
|
||||
|
||||
(define (awk-loop-body lp-var rec-var else-var rec-counter
|
||||
range-vars svars clauses pats/vars r c)
|
||||
range-vars svars clauses pats/refs r c)
|
||||
(let ((clause-vars (if else-var (cons else-var svars) svars))
|
||||
(loop-vars (append (if rec-counter (list rec-counter) '())
|
||||
range-vars
|
||||
|
@ -325,7 +402,7 @@
|
|||
(let ((tail (expand (cdr clauses) (cdr range-vars))))
|
||||
(expand-range-clause clause tail (car range-vars)
|
||||
rec-var else-var rec-counter svars
|
||||
pats/vars
|
||||
pats/refs
|
||||
r c)))
|
||||
|
||||
((c test %after) ; An AFTER clause. Skip it.
|
||||
|
@ -339,7 +416,7 @@
|
|||
(let ((tail (expand (cdr clauses) range-vars)))
|
||||
(expand-simple-clause clause tail
|
||||
rec-var else-var rec-counter svars
|
||||
pats/vars r c)))))
|
||||
pats/refs r c)))))
|
||||
|
||||
;; No clauses -- just jump to top of loop.
|
||||
`(,lp-var . ,loop-vars)))))
|
||||
|
@ -347,49 +424,92 @@
|
|||
|
||||
;;; Make a Scheme expression out of a test form.
|
||||
;;; Integer i => (= i <record-counter>)
|
||||
;;; String s => (regexp-exec s <record>)
|
||||
;;; SRE s => (regexp-search <re> <record>)
|
||||
;;; (when e) => e
|
||||
;;; Expression e => e
|
||||
;;;
|
||||
;;; If FOR-VALUE? is true, then we do regexp searches with REGEXP-SEARCH,
|
||||
;;; otherwise, we use the cheaper REGEXP-SEARCH?.
|
||||
|
||||
(define (->simple-clause-test test-form rec-var rec-counter pats/vars r)
|
||||
(define (->simple-clause-test test-form for-value? rec-var rec-counter pats/refs r c)
|
||||
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
|
||||
((string? test-form)
|
||||
(let ((re-var (cond ((assoc test-form pats/vars) => cdr)
|
||||
(else (error "Impossible AWK error -- unknown regexp"
|
||||
test-form pats/vars)))))
|
||||
`(,(r 'regexp-exec) ,re-var ,rec-var)))
|
||||
|
||||
((sre-form? test-form r c)
|
||||
`(,(r (if for-value? 'regexp-search 'regexp-search?))
|
||||
,(cdr (assoc test-form pats/refs))
|
||||
,rec-var))
|
||||
|
||||
((and (pair? test-form)
|
||||
(c (r 'when) (car test-form)))
|
||||
(if (= 2 (length test-form)) (cadr test-form)
|
||||
(error "Illegal WHEN test in AWK" test-form)))
|
||||
|
||||
(else test-form)))
|
||||
|
||||
|
||||
(define (expand-simple-clause clause tail
|
||||
rec-var else-var rec-counter svars
|
||||
pats/vars r c)
|
||||
pats/refs r c)
|
||||
(let* ((%let (r 'let))
|
||||
(%= (r '=))
|
||||
(%string-match (r 'string-match))
|
||||
(%arrow (r '=>))
|
||||
(%long-arrow (r '==>))
|
||||
(%if (r 'if))
|
||||
(%mss (r 'match:substring))
|
||||
|
||||
(test (car clause))
|
||||
(test (->simple-clause-test test rec-var rec-counter pats/vars r))
|
||||
(mktest (lambda (for-value?)
|
||||
(->simple-clause-test test for-value? rec-var
|
||||
rec-counter pats/refs r c)))
|
||||
|
||||
;; Is clause of the form (test => proc)
|
||||
(arrow? (and (= 3 (length clause))
|
||||
(c (cadr clause) %arrow)))
|
||||
|
||||
;; How about (test ==> (var ...) body ...)?
|
||||
(long-arrow? (and (< 3 (length clause))
|
||||
(c (cadr clause) %long-arrow)))
|
||||
|
||||
(null-clause-list (null-clause-action else-var svars r))
|
||||
|
||||
;; The core form conditionally executes the body.
|
||||
;; It returns the new else var and the new state vars, if any.
|
||||
(core (if arrow?
|
||||
(core (cond (arrow?
|
||||
(let* ((tv (r 'tval)) ; APP is the actual
|
||||
(app `(,(caddr clause) ,tv))) ; body: (proc tv).
|
||||
(app `(,(caddr clause) ,tv)) ; body: (proc tv).
|
||||
(test (mktest #t)))
|
||||
`(,%let ((,tv ,test))
|
||||
(,%if ,tv
|
||||
,(clause-action (list app) else-var svars r c)
|
||||
. ,null-clause-list)))
|
||||
. ,null-clause-list))))
|
||||
|
||||
`(,%if ,test ,(clause-action (cdr clause) else-var svars r c)
|
||||
. ,null-clause-list)))
|
||||
(long-arrow?
|
||||
(let* ((tv (r 'tval))
|
||||
(test (mktest #t))
|
||||
(bindings ; List of LET bindings for submatches.
|
||||
(let lp ((i 0)
|
||||
(vars (caddr clause))
|
||||
(bindings '()))
|
||||
(if (pair? vars)
|
||||
(let ((var (car vars)))
|
||||
(lp (+ i 1) (cdr vars)
|
||||
(if var
|
||||
`((,var (,%mss ,tv ,i)) . ,bindings)
|
||||
bindings))) ; #F = "don't-care"
|
||||
bindings))))
|
||||
|
||||
`(,%let ((,tv ,test))
|
||||
(,%if ,tv
|
||||
(,%let ,bindings ; Bind submatches.
|
||||
. ,(deblock (clause-action (cdddr clause)
|
||||
else-var svars
|
||||
r c)
|
||||
r c))
|
||||
. ,null-clause-list))))
|
||||
|
||||
(else
|
||||
`(,%if ,(mktest #f) ,(clause-action (cdr clause)
|
||||
else-var svars r c)
|
||||
. ,null-clause-list))))
|
||||
|
||||
(loop-vars (if else-var (cons else-var svars) svars)))
|
||||
|
||||
|
@ -402,7 +522,7 @@
|
|||
|
||||
(define (expand-range-clause clause tail range-var
|
||||
rec-var else-var rec-counter svars
|
||||
pats/vars r c)
|
||||
pats/refs r c)
|
||||
(let* ((start-test (cadr clause))
|
||||
(stop-test (caddr clause))
|
||||
(body (cdddr clause))
|
||||
|
@ -419,10 +539,10 @@
|
|||
(else (error "Unrecognised range keyword!" clause)))))
|
||||
|
||||
;; Convert the start and stop test forms to code.
|
||||
(start-test (->simple-clause-test start-test rec-var
|
||||
rec-counter pats/vars r))
|
||||
(stop-test (->simple-clause-test stop-test rec-var
|
||||
rec-counter pats/vars r))
|
||||
(start-test (->simple-clause-test start-test #f rec-var
|
||||
rec-counter pats/refs r c))
|
||||
(stop-test (->simple-clause-test stop-test #f rec-var
|
||||
rec-counter pats/refs r c))
|
||||
|
||||
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
|
||||
(stop-thunk `(,%lambda () ,stop-test))
|
||||
|
@ -444,7 +564,6 @@
|
|||
(tail-exps (deblock tail r c))
|
||||
|
||||
(%if (r 'if))
|
||||
(%receive (r 'receive))
|
||||
(%let (r 'let))
|
||||
|
||||
;; We are hard-wiring the else var to #t after this, so the core
|
||||
|
|
|
@ -0,0 +1,576 @@
|
|||
;;; Char->char partial maps -*- Scheme -*-
|
||||
;;; Copyright (C) 1998 by Olin Shivers.
|
||||
|
||||
;;; CCPs are an efficient data structure for doing simple string transforms,
|
||||
;;; similar to the kinds of things you would do with the tr(1) program.
|
||||
;;;
|
||||
;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit
|
||||
;;; character types would need a more sophisticated data structure, tuned
|
||||
;;; for sparseness. I would suggest something like this:
|
||||
;;; (define-record ccp
|
||||
;;; domain ; The domain char-set
|
||||
;;; map ; Sorted vector of (char . string) pairs
|
||||
;;; ; specifying the map.
|
||||
;;; id?) ; If true, mappings not specified by MAP are
|
||||
;;; ; identity mapping. If false, MAP must
|
||||
;;; ; specify a mapping for every char in DOMAIN.
|
||||
;;;
|
||||
;;; A (char . string) elements in MAP specifies a mapping for the contiguous
|
||||
;;; sequence of L chars beginning with CHAR (in the sequence of the underlying
|
||||
;;; char type representation), where L is the length of STRING. These MAP elements
|
||||
;;; are sorted by CHAR, so that binary search can be used to get from an input
|
||||
;;; character C to the right MAP element quickly.
|
||||
;;;
|
||||
;;; This representation should be reasonably compact for standard mappings on,
|
||||
;;; say, a Unicode CCP. An implementation might wish to have a cache field
|
||||
;;; in the record for storing the full 8kb bitset when performing ccp-map
|
||||
;;; operations. Or, an implementation might want to store the Latin-1 subset
|
||||
;;; of the map in a dense format, and keep the remainder in a sparse format.
|
||||
|
||||
(define num-chars (char-set-size char-set:full)) ; AKA 256.
|
||||
|
||||
(define-record ccp
|
||||
domain ; The domain char-set
|
||||
dshared? ; Is the domain value shared or linear?
|
||||
map ; 256-elt string
|
||||
mshared?) ; Is the map string shared or linear?
|
||||
|
||||
|
||||
;;; Accessors and setters that manage the linear bookkeeping
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (ccp-domain ccp)
|
||||
(set-ccp:dshared? ccp #t)
|
||||
(ccp:domain ccp))
|
||||
|
||||
;;; CCP is a linear ccp. PROC is a domain->domain function; it must be
|
||||
;;; linear in its parameter and result.
|
||||
;;;
|
||||
;;; Updates the domain of the CCP with PROC, returns the resulting
|
||||
;;; CCP; reuses the old one to construct the new one.
|
||||
|
||||
(define (restrict-linear-ccp-domain ccp proc)
|
||||
(let ((new-d (proc (if (ccp:dshared? ccp)
|
||||
(begin (set-ccp:dshared? ccp #f)
|
||||
(char-set-copy (ccp:domain ccp)))
|
||||
(ccp:domain ccp)))))
|
||||
(set-ccp:domain ccp new-d)
|
||||
ccp))
|
||||
|
||||
;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function.
|
||||
;;; It is passed a linear domain and cmap string. It may side-effect
|
||||
;;; the cmap string, and returns the resulting updated domain.
|
||||
;;; We return the resulting CCP, reusing the parameter to construct it.
|
||||
|
||||
(define (linear-update-ccp ccp proc)
|
||||
(let* ((cmap (if (ccp:mshared? ccp)
|
||||
(begin (set-ccp:mshared? ccp #f)
|
||||
(string-copy (ccp:map ccp)))
|
||||
(ccp:map ccp)))
|
||||
|
||||
(new-d (proc (if (ccp:dshared? ccp)
|
||||
(begin (set-ccp:dshared? ccp #f)
|
||||
(char-set-copy (ccp:domain ccp)))
|
||||
(ccp:domain ccp))
|
||||
cmap)))
|
||||
(set-ccp:domain ccp new-d)
|
||||
ccp))
|
||||
|
||||
|
||||
|
||||
;;; Return CCP's map field, and mark it as shared. CCP functions that
|
||||
;;; restrict a ccp's domain share map strings, so they use this guy.
|
||||
(define (ccp:map/shared ccp)
|
||||
(set-ccp:mshared? ccp #t)
|
||||
(ccp:map ccp))
|
||||
|
||||
(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f
|
||||
(string-copy (ccp:map ccp)) #f))
|
||||
|
||||
;;; N-ary equality relation for partial maps
|
||||
|
||||
(define (ccp= ccp1 . rest)
|
||||
(let ((domain (ccp:domain ccp1))
|
||||
(cmap (ccp:map ccp1)))
|
||||
(every (lambda (ccp2)
|
||||
(and (char-set= domain (ccp:domain ccp2))
|
||||
(let ((cmap2 (ccp:map ccp2)))
|
||||
(char-set-every? (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(char=? (string-ref cmap i)
|
||||
(string-ref cmap2 i))))
|
||||
domain))))
|
||||
rest)))
|
||||
|
||||
|
||||
;;; N-ary subset relation for partial maps
|
||||
|
||||
(define (ccp<= ccp1 . rest)
|
||||
(let lp ((domain1 (ccp:domain ccp1))
|
||||
(cmap1 (ccp:map ccp1))
|
||||
(rest rest))
|
||||
(or (not (pair? rest))
|
||||
(let* ((ccp2 (car rest))
|
||||
(domain2 (ccp:domain ccp2))
|
||||
(cmap2 (ccp:map ccp2))
|
||||
(rest (cdr rest)))
|
||||
(and (char-set<= domain1 domain2)
|
||||
(let ((cmap2 (ccp:map ccp2)))
|
||||
(char-set-every? (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(char=? (string-ref cmap1 i)
|
||||
(string-ref cmap2 i))))
|
||||
domain1))
|
||||
(lp domain2 cmap2 rest))))))
|
||||
|
||||
|
||||
;;; CCP iterators
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (ccp-fold kons knil ccp)
|
||||
(let ((cmap (ccp:map ccp)))
|
||||
(char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v))
|
||||
knil
|
||||
(ccp:domain ccp))))
|
||||
|
||||
(define (ccp-for-each proc ccp)
|
||||
(let ((cmap (ccp:map ccp)))
|
||||
(char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c))))
|
||||
(ccp:domain ccp))))
|
||||
|
||||
(define (ccp->alist ccp)
|
||||
(ccp-fold (lambda (from to alist) (cons (cons from to) alist))
|
||||
'()
|
||||
ccp))
|
||||
|
||||
|
||||
;;; CCP-RESTRICT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Restrict a ccp's domain.
|
||||
|
||||
(define (ccp-restrict ccp cset)
|
||||
(make-ccp (char-set-intersection cset (ccp:domain ccp))
|
||||
#f
|
||||
(ccp:map/shared ccp)
|
||||
#t))
|
||||
|
||||
(define (ccp-restrict! ccp cset)
|
||||
(restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset))))
|
||||
|
||||
|
||||
;;; CCP-ADJOIN ccp from-char1 to-char1 ...
|
||||
;;; CCP-DELETE ccp char1 ...
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Add & delete mappings to/from a ccp.
|
||||
|
||||
(define (ccp-delete ccp . chars)
|
||||
(make-ccp (apply char-set-delete (ccp:domain ccp) chars)
|
||||
#f
|
||||
(ccp:map/shared ccp)
|
||||
#t))
|
||||
|
||||
(define (ccp-delete! ccp . chars)
|
||||
(restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars))))
|
||||
|
||||
|
||||
(define (ccp-adjoin ccp . chars)
|
||||
(let ((cmap (string-copy (ccp:map ccp))))
|
||||
(make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars)
|
||||
#f
|
||||
cmap
|
||||
#f)))
|
||||
|
||||
(define (ccp-adjoin! ccp . chars)
|
||||
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars))))
|
||||
|
||||
(define (install-ccp-adjoin! cmap domain chars)
|
||||
(let lp ((chars chars) (d domain))
|
||||
(if (pair? chars)
|
||||
(let ((from (car chars))
|
||||
(to (cadr chars))
|
||||
(chars (cddr chars)))
|
||||
(string-set! cmap (char->ascii from) to)
|
||||
(lp chars (char-set-adjoin! d from)))
|
||||
d)))
|
||||
|
||||
|
||||
;;; CCP-EXTEND ccp1 ...
|
||||
;;; CCP-EXTEND! ccp1 ccp2 ...
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Extend ccp1 with ccp2, etc.
|
||||
|
||||
(define (ccp-extend . ccps)
|
||||
(if (pair? ccps)
|
||||
(let ((ccp0 (car ccps))
|
||||
(ccps (cdr ccps)))
|
||||
(if (pair? ccps)
|
||||
(let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap.
|
||||
;; The FOLD installs each ccp in CCPS into CMAP and produces
|
||||
;; the new domain.
|
||||
(make-ccp (fold (lambda (ccp d)
|
||||
(install-ccp-extension! cmap d ccp))
|
||||
(char-set-copy (ccp:domain ccp0))
|
||||
ccps)
|
||||
#f cmap #f))
|
||||
|
||||
ccp0)) ; Only 1 parameter
|
||||
|
||||
ccp:0)) ; 0 parameters
|
||||
|
||||
(define (ccp-extend! ccp0 . ccps)
|
||||
(linear-update-ccp ccp0
|
||||
(lambda (domain cmap)
|
||||
(fold (lambda (ccp d) (install-ccp-extension! cmap d ccp))
|
||||
domain
|
||||
ccps))))
|
||||
|
||||
|
||||
;;; Side-effect CMAP, linear-update and return DOMAIN.
|
||||
(define (install-ccp-extension! cmap domain ccp)
|
||||
(let ((cmap1 (ccp:map ccp))
|
||||
(domain1 (ccp:domain ccp)))
|
||||
(char-set-for-each (lambda (c)
|
||||
(let ((i (char->ascii c)))
|
||||
(string-set! cmap i (string-ref cmap1 i))))
|
||||
domain1)
|
||||
(char-set-union! domain domain1)))
|
||||
|
||||
|
||||
;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; For each character C-IN in the original domain, we push it
|
||||
;;; through the pipeline of CCPs. If we ever land outside the
|
||||
;;; domain of a ccp, we punt C-IN. If we push it all the way
|
||||
;;; through, we add C-IN to our result domain, and add the mapping
|
||||
;;; into the cmap we are assembling.
|
||||
;;;
|
||||
;;; Looping this way avoids building up intermediate temporary
|
||||
;;; CCPs. If CCP's were small bitsets, we might be better off
|
||||
;;; slicing the double-nested loops the other way around.
|
||||
|
||||
(define (ccp-compose . ccps)
|
||||
(cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1
|
||||
((not (pair? (cdr ccps))) (car ccps)) ; 1 arg
|
||||
(else
|
||||
(let* ((v (list->vector ccps))
|
||||
(vlen-2 (- (vector-length v) 2))
|
||||
(cmap (make-string num-chars))
|
||||
(d1 (ccp:domain (vector-ref v (+ vlen-2 1))))
|
||||
(d (char-set-fold (lambda (c-in d)
|
||||
(let lp ((c c-in) (i vlen-2))
|
||||
(if (>= i 0)
|
||||
(let ((ccp (vector-ref v i)))
|
||||
(if (char-set-contains? (ccp:domain ccp) c)
|
||||
(lp (string-ref (ccp:map ccp)
|
||||
(char->ascii c))
|
||||
(- i 1))
|
||||
|
||||
;; Lose: remove c-in from d.
|
||||
(char-set-delete! d c-in)))
|
||||
|
||||
;; Win: C-IN -> C
|
||||
(begin (string-set! cmap
|
||||
(char->ascii c-in)
|
||||
c)
|
||||
d))))
|
||||
(char-set-copy d1)
|
||||
d1)))
|
||||
(make-ccp d #f cmap #f)))))
|
||||
|
||||
|
||||
|
||||
;;; ALIST->CPP
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (alist->ccp cc-alist . maybe-base-ccp)
|
||||
(let ((base (:optional maybe-base-ccp ccp:0)))
|
||||
(if (pair? cc-alist)
|
||||
(let ((cmap (string-copy (ccp:map base))))
|
||||
(make-ccp (install-ccp-alist! cmap
|
||||
(char-set-copy (ccp:domain base))
|
||||
cc-alist)
|
||||
#f cmap #f))
|
||||
base)))
|
||||
|
||||
(define (alist->ccp! alist base)
|
||||
(linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist))))
|
||||
|
||||
;;; Side-effect CMAP, linear-update and return DOMAIN.
|
||||
(define (install-ccp-alist! cmap domain alist)
|
||||
(fold (lambda (from/to d) (let ((from (car from/to))
|
||||
(to (cdr from/to)))
|
||||
(string-set! cmap (char->ascii from) to)
|
||||
(char-set-adjoin! domain from)))
|
||||
domain
|
||||
alist))
|
||||
|
||||
|
||||
;;; PROC->CCP
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; (proc->ccp proc [domain base-ccp])
|
||||
|
||||
(define (proc->ccp proc . args)
|
||||
(let-optionals args ((proc-domain char-set:full)
|
||||
(base ccp:0))
|
||||
(let ((cmap (string-copy (ccp:map base))))
|
||||
(make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base))
|
||||
proc proc-domain)
|
||||
#f cmap #f))))
|
||||
|
||||
(define (proc->ccp! proc proc-domain base)
|
||||
(linear-update-ccp base
|
||||
(lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain))))
|
||||
|
||||
(define (install-ccp-proc! cmap domain proc proc-domain)
|
||||
(char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c)))
|
||||
proc-domain)
|
||||
(char-set-union! domain proc-domain))
|
||||
|
||||
|
||||
;;; CONSTANT-CCP
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; (constant-ccp char [domain base-ccp])
|
||||
;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR.
|
||||
;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0.
|
||||
|
||||
(define (constant-ccp char . args)
|
||||
(let-optionals args ((char-domain char-set:full) (base ccp:0))
|
||||
(let ((cmap (string-copy (ccp:map base))))
|
||||
(make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base))
|
||||
char char-domain)
|
||||
#f cmap #f))))
|
||||
|
||||
(define (constant-ccp! char char-domain base)
|
||||
(linear-update-ccp base
|
||||
(lambda (d cmap) (install-constant-ccp! cmap d char char-domain))))
|
||||
|
||||
;;; Install the constant mapping into CMAP0 by side-effect,
|
||||
;;; linear-update & return DOMAIN0 with the constant-mapping's domain.
|
||||
(define (install-constant-ccp! cmap0 domain0 char char-domain)
|
||||
(char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char))
|
||||
char-domain)
|
||||
(char-set-union! domain0 char-domain))
|
||||
|
||||
|
||||
;;; CCP/MAPPINGS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp
|
||||
;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp
|
||||
;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp
|
||||
;;; Each FROM element is either a string or a (lo-char . hi-char) range.
|
||||
;;; Each TO element is either a string or a lo-char. Strings are replicated
|
||||
;;; to match the length of the corresponding FROM element.
|
||||
;;; CCP/MAPPINGS's base CCP is CCP:0
|
||||
;;;
|
||||
;;; Tedious code.
|
||||
|
||||
;;; Internal utility.
|
||||
;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect.
|
||||
;;; Return the new domain.
|
||||
|
||||
(define (install-ccp-mapping-pair! cmap domain from to)
|
||||
;; Tedium -- four possibilities here:
|
||||
;; str->str, str->lo-char,
|
||||
;; range->str, range->lo-char.
|
||||
(if (string? from)
|
||||
(if (string? to)
|
||||
;; "abc" -> "ABC"
|
||||
(let ((len1 (string-length from))
|
||||
(len2 (string-length to)))
|
||||
(let lp2 ((i (- len1 1))
|
||||
(j (modulo (- len2 1) len1))
|
||||
(d domain))
|
||||
(if (>= i 0)
|
||||
(let ((c (string-ref from i)))
|
||||
(string-set! cmap
|
||||
(char->ascii c)
|
||||
(string-ref to i))
|
||||
(lp2 (- i 1)
|
||||
(- (if (> j 0) j len2) 1)
|
||||
(char-set-adjoin! d c)))
|
||||
d)))
|
||||
|
||||
;; "abc" -> #\A
|
||||
(let lp2 ((i (- (string-length from) 1))
|
||||
(j (char->ascii to))
|
||||
(d domain))
|
||||
(if (>= i 0)
|
||||
(let ((c (string-ref from i)))
|
||||
(string-set! cmap
|
||||
(char->ascii c)
|
||||
(ascii->char j))
|
||||
(lp2 (- i 1)
|
||||
(- j 1)
|
||||
(char-set-adjoin! d c)))
|
||||
d)))
|
||||
|
||||
(let ((from-start (char->ascii (car from)))
|
||||
(from-end (char->ascii (cdr from))))
|
||||
(if (string? to)
|
||||
(let ((len2-1 (- (string-length to) 1)))
|
||||
;; (#\a . #\c) -> "ABC"
|
||||
(let lp2 ((i from-start) (j 0) (d domain))
|
||||
(if (<= i from-end)
|
||||
(let ((c (string-ref to j)))
|
||||
(string-set! cmap i c)
|
||||
(lp2 (+ i 1)
|
||||
(if (= j len2-1) 0 (+ j 1))
|
||||
(char-set-adjoin! d c)))
|
||||
d)))
|
||||
|
||||
;; (#\a . #\c) -> #\A
|
||||
(do ((i from-start (+ i 1))
|
||||
(j (char->ascii to) (+ j 1))
|
||||
(d domain (begin (string-set! cmap i (ascii->char j))
|
||||
(char-set-adjoin d (ascii->char i)))))
|
||||
((> i from-end) d))))))
|
||||
|
||||
;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN.
|
||||
(define (install-mapping-pairs cmap domain args)
|
||||
(let lp ((domain domain) (args args))
|
||||
(if (pair? args)
|
||||
(lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args))
|
||||
(cddr args))
|
||||
domain)))
|
||||
|
||||
(define (ccp/mappings . args)
|
||||
(let ((cmap (make-string num-chars)))
|
||||
(make-ccp (install-mapping-pairs (make-string num-chars)
|
||||
(char-set-copy char-set:empty)
|
||||
args)
|
||||
#f cmap #f)))
|
||||
|
||||
(define (extend-ccp/mappings base . args)
|
||||
(let ((cmap (string-copy (ccp:map base))))
|
||||
(make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args)
|
||||
#f cmap #f)))
|
||||
|
||||
(define (extend-ccp/mappings! base . args)
|
||||
(linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args))))
|
||||
|
||||
|
||||
;;; CONSTRUCT-CCP! ccp elt ...
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; The kitchen-sink constructor; static typing be damned.
|
||||
;;; ELTS are interpreted as follows:
|
||||
;;; (lo-char . hi-char) to-string|lo-char ; ccp/range
|
||||
;;; from-string to-string|lo-char ; ccp/range
|
||||
;;; ccp ; ccp-extend
|
||||
;;; alist ; alist->ccp
|
||||
;;; domain char ; ccp-constant
|
||||
;;; domain proc ; proc->ccp
|
||||
|
||||
(define (construct-ccp! ccp . elts)
|
||||
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts))))
|
||||
|
||||
(define (construct-ccp base . elts)
|
||||
(let ((cmap (string-copy (ccp:map base))))
|
||||
(make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts)
|
||||
#f cmap #f)))
|
||||
|
||||
;;; Install the mappings into CMAP by side-effect,
|
||||
;;; linear-update & return DOMAIN with the final domain.
|
||||
|
||||
(define (install-ccp-construct! cmap domain elts)
|
||||
(let lp ((d domain) (elts elts))
|
||||
;(format #t "d=~s elts=~s\n" d elts)
|
||||
(if (not (pair? elts)) d
|
||||
(let ((elt (car elts))
|
||||
(elts (cdr elts)))
|
||||
(cond ((pair? elt)
|
||||
(cond ((pair? (car elt)) ; ELT is an alist.
|
||||
(lp (install-ccp-alist! cmap d elt) elts))
|
||||
((char? (car elt)) ; ELT is (lo-char . hi-char) range.
|
||||
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
|
||||
(cdr elts)))
|
||||
(else (error "Illegal elt to construct-ccp" elt))))
|
||||
|
||||
((string? elt)
|
||||
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
|
||||
(cdr elts)))
|
||||
|
||||
((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts))
|
||||
|
||||
((char-set? elt)
|
||||
(let ((elt2 (car elts))
|
||||
(elts (cdr elts)))
|
||||
(lp (cond ((char? elt2)
|
||||
(install-constant-ccp! cmap d elt2 elt))
|
||||
((procedure? elt2)
|
||||
(install-ccp-proc! cmap d elt2 elt))
|
||||
(else (error "Illegal elt-pair to construct-ccp"
|
||||
elt elt2)))
|
||||
elts)))
|
||||
|
||||
(else (error "Illegal elt to construct-ccp" elt)))))))
|
||||
|
||||
|
||||
;;; CCP unfold
|
||||
|
||||
(define (ccp-unfold p f g seed)
|
||||
(let lp ((seed seed) (ccp (ccp-copy ccp:0)))
|
||||
(if (p seed) ccp
|
||||
(lp (g seed)
|
||||
(receive (from to) (f seed)
|
||||
(lp (g seed) (ccp-adjoin! ccp from to)))))))
|
||||
|
||||
|
||||
|
||||
;;; Using CCPs
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; TR ccp string [start end] -> string
|
||||
;;; CCP-MAP ccp string [start end] -> string
|
||||
;;; CCP-MAP! ccp string [start end] -> undefined
|
||||
;;; CCP-APP ccp char -> char or false
|
||||
|
||||
;;; If a char in S is not in CCP's domain, it is dropped from the result.
|
||||
;;; You can use this to map and delete chars from a string.
|
||||
|
||||
(define (tr ccp s . maybe-start+end)
|
||||
(let-optionals maybe-start+end ((start 0) (end (string-length s)))
|
||||
;; Count up the chars in S that are in the domain,
|
||||
;; and allocate the answer string ANS:
|
||||
(let* ((len (- end start))
|
||||
(domain (ccp:domain ccp))
|
||||
(ans-len (string-fold (lambda (c numchars)
|
||||
(if (char-set-contains? domain c)
|
||||
(+ numchars 1)
|
||||
numchars))
|
||||
0 s start end))
|
||||
(ans (make-string ans-len)))
|
||||
|
||||
;; Apply the map, installing the resulting chars into ANS:
|
||||
(string-fold (lambda (c i) (cond ((ccp-app ccp c) =>
|
||||
(lambda (c)
|
||||
(string-set! ans i c)
|
||||
(+ i 1)))
|
||||
(else i))) ; Not in domain -- drop it.
|
||||
0 s start end)
|
||||
ans)))
|
||||
|
||||
(define (ccp-map ccp s . maybe-start+end)
|
||||
(apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end))
|
||||
|
||||
(define (ccp-map! ccp s . maybe-start+end)
|
||||
(apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end))
|
||||
|
||||
(define (ccp-app ccp char)
|
||||
(and (char-set-contains? (ccp:domain ccp) char)
|
||||
(string-ref (ccp:map ccp) (char->ascii char))))
|
||||
|
||||
|
||||
;;; Primitive CCPs
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define id-cmap
|
||||
(let ((m (make-string num-chars)))
|
||||
(do ((i (- num-chars 1) (- i 1)))
|
||||
((< i 0))
|
||||
(string-set! m i (ascii->char i)))
|
||||
m))
|
||||
|
||||
(define ccp:0 (make-ccp char-set:empty #t id-cmap #t))
|
||||
(define ccp:1 (make-ccp char-set:full #t id-cmap #t))
|
||||
|
||||
(define ccp:upcase (proc->ccp char-upcase char-set:full))
|
||||
(define ccp:downcase (proc->ccp char-downcase char-set:full))
|
|
@ -19,12 +19,11 @@
|
|||
;;; copy of the record fields.
|
||||
|
||||
;;; New dfns:
|
||||
;;; (char-set= cs1 cs2)
|
||||
;;; (char-set<= cs1 cs2)
|
||||
;;; (reduce-char-set kons knil cs)
|
||||
;;; (set-char-set! cs char in?)
|
||||
;;; (char-set= cs1 cs2 ...)
|
||||
;;; (char-set<= cs1 cs2 ...)
|
||||
;;; (char-set-fold kons knil cs)
|
||||
;;; (char-set-for-each f cs)
|
||||
;;; (copy-char-set cs)
|
||||
;;; (char-set-copy cs)
|
||||
;;; (char-set-size cs)
|
||||
;;; char-set:printing (char-printing? c)
|
||||
;;; char-set:blank (char-blank? c)
|
||||
|
@ -33,6 +32,12 @@
|
|||
;;; char-set:ascii (char-ascii? c)
|
||||
;;; char-set:empty
|
||||
;;; char-set:full
|
||||
;;; char-set-every? pred cs
|
||||
;;; char-set-any pred cs
|
||||
;;; char-set-adjoin cset char -> cset
|
||||
;;; char-set-adjoin! cset char -> cset
|
||||
;;; char-set-delete cset char -> cset
|
||||
;;; char-set-delete! cset char -> cset
|
||||
|
||||
(define char:newline (ascii->char 13))
|
||||
(define char:tab (ascii->char 9))
|
||||
|
@ -41,6 +46,8 @@
|
|||
(define char:return (ascii->char 10))
|
||||
(define char:space (ascii->char 32))
|
||||
|
||||
(define (string-copy s) (substring s 0 (string-length s)))
|
||||
|
||||
(define (string-fill-range! str lower upper ch)
|
||||
(do ((index lower (+ index 1)))
|
||||
((>= index upper) str)
|
||||
|
@ -61,24 +68,26 @@
|
|||
char-set?
|
||||
(s char-set:s))
|
||||
|
||||
(define (copy-char-set cs) (make-char-set (string-copy (char-set:s cs))))
|
||||
(define (char-set-copy cs) (make-char-set (string-copy (char-set:s cs))))
|
||||
|
||||
(define (char-set= cs1 cs2)
|
||||
(let ((s1 (char-set:s cs1))
|
||||
(s2 (char-set:s cs2)))
|
||||
(let lp ((i 255))
|
||||
(or (< i 0)
|
||||
(and (char=? (string-ref s1 i) (string-ref s2 i))
|
||||
(lp (- i 1)))))))
|
||||
;;; The = and <= code is ugly because it's n-ary.
|
||||
|
||||
(define (char-set<= cs1 cs2)
|
||||
(let ((s1 (char-set:s cs1))
|
||||
(s2 (char-set:s cs2)))
|
||||
(let lp ((i 255))
|
||||
(or (< i 0)
|
||||
(define (char-set= cs1 . rest)
|
||||
(let ((s1 (char-set:s cs1)))
|
||||
(every (lambda (cs) (string=? s1 (char-set:s cs)))
|
||||
rest)))
|
||||
|
||||
(define (char-set<= cs1 . rest)
|
||||
(let lp ((s1 (char-set:s cs1))
|
||||
(rest rest))
|
||||
(or (not (pair? rest))
|
||||
(let ((s2 (char-set:s (car rest)))
|
||||
(rest (cdr rest)))
|
||||
(let lp2 ((i 255))
|
||||
(if (< i 0) (lp s2 rest)
|
||||
(and (<= (char->ascii (string-ref s1 i))
|
||||
(char->ascii (string-ref s2 i)))
|
||||
(lp (- i 1)))))))
|
||||
(lp2 (- i 1)))))))))
|
||||
|
||||
|
||||
(define (char-set-size cs)
|
||||
|
@ -88,20 +97,34 @@
|
|||
(lp (- i 1)
|
||||
(if (= 0 (char->ascii (string-ref s i))) size (+ size 1)))))))
|
||||
|
||||
(define (set-char-set! cs char in?)
|
||||
(string-set! (char-set:s cs)
|
||||
(char->ascii char)
|
||||
(ascii->char (if in? 1 0))))
|
||||
(define (set-char-set cs in? . chars)
|
||||
(let ((s (string-copy (char-set:s cs)))
|
||||
(val (if in? (ascii->char 1) (ascii->char 0))))
|
||||
(for-each (lambda (c) (string-set! s (char->ascii c) val))
|
||||
chars)
|
||||
(make-char-set s)))
|
||||
|
||||
(define (char-set-for-each f cs)
|
||||
(define (set-char-set! cs in? . chars)
|
||||
(let ((s (char-set:s cs))
|
||||
(val (if in? (ascii->char 1) (ascii->char 0))))
|
||||
(for-each (lambda (c) (string-set! s (char->ascii c) val))
|
||||
chars))
|
||||
cs)
|
||||
|
||||
(define (char-set-adjoin cs . chars) (apply set-char-set cs #t chars))
|
||||
(define (char-set-adjoin! cs . chars) (apply set-char-set! cs #t chars))
|
||||
(define (char-set-delete cs . chars) (apply set-char-set cs #f chars))
|
||||
(define (char-set-delete! cs . chars) (apply set-char-set! cs #f chars))
|
||||
|
||||
(define (char-set-for-each proc cs)
|
||||
(let ((s (char-set:s cs)))
|
||||
(let lp ((i 255))
|
||||
(cond ((>= i 0)
|
||||
(if (not (= 0 (char->ascii (string-ref s i))))
|
||||
(f (ascii->char i)))
|
||||
(proc (ascii->char i)))
|
||||
(lp (- i 1)))))))
|
||||
|
||||
(define (reduce-char-set kons knil cs)
|
||||
(define (char-set-fold kons knil cs)
|
||||
(let ((s (char-set:s cs)))
|
||||
(let lp ((i 255) (ans knil))
|
||||
(if (< i 0) ans
|
||||
|
@ -110,6 +133,28 @@
|
|||
ans
|
||||
(kons (ascii->char i) ans)))))))
|
||||
|
||||
(define reduce-char-set (deprecated-proc char-set-fold 'char-set-fold
|
||||
"Use char-set-fold instead."))
|
||||
|
||||
(define (char-set-every? pred cs)
|
||||
(let ((s (char-set:s cs)))
|
||||
(let lp ((i 255))
|
||||
(or (< i 0)
|
||||
(if (= 0 (char->ascii (string-ref s i)))
|
||||
(lp (- i 1))
|
||||
(and (pred (ascii->char i))
|
||||
(lp (- i 1))))))))
|
||||
|
||||
(define (char-set-any pred cs)
|
||||
(let ((s (char-set:s cs)))
|
||||
(let lp ((i 255))
|
||||
(and (>= i 0)
|
||||
(if (= 0 (char->ascii (string-ref s i)))
|
||||
(lp (- i 1))
|
||||
(or (pred (ascii->char i))
|
||||
(lp (- i 1))))))))
|
||||
|
||||
|
||||
(define (char-set . chars)
|
||||
(chars->char-set chars))
|
||||
|
||||
|
@ -181,58 +226,88 @@
|
|||
(define (char-set-member? . args)
|
||||
(error "CHAR-SET-MEMBER? is no longer provided. Use CHAR-SET-CONTAINS? instead."))
|
||||
|
||||
|
||||
;;; Set algebra
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (char-set-invert cs)
|
||||
(predicate->char-set (lambda (char)
|
||||
(not (char-set-contains? cs char)))))
|
||||
|
||||
;;; The union, intersection, and difference code is ugly,
|
||||
;;; because the ops are n-ary.
|
||||
(define (char-set-union . csets)
|
||||
(if (pair? csets)
|
||||
(apply char-set-union! (char-set-copy (car csets)) (cdr csets))
|
||||
char-set:empty))
|
||||
|
||||
(define (char-set-intersection . csets)
|
||||
(if (pair? csets)
|
||||
(apply char-set-intersection! (char-set-copy (car csets)) (cdr csets))
|
||||
char-set:full))
|
||||
|
||||
(define (char-set-difference cs1 . csets)
|
||||
(if (pair? csets)
|
||||
(apply char-set-difference! (char-set-copy cs1) csets)
|
||||
cs1))
|
||||
|
||||
|
||||
;;; Linear set-algebraic ops
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; These guys are allowed, but not required, to side-effect their first
|
||||
;;; argument when computing their result. In other words, you must use them
|
||||
;;; as if they were completely functional, just like their non-! counterparts,
|
||||
;;; and you must additionally ensure that their first arguments are "dead"
|
||||
;;; at the point of call. In return, we promise a more efficient result, plus
|
||||
;;; allowing you to always assume char-sets are unchangeable values.
|
||||
|
||||
;;; Apply P to each index and it's char in S: (P I C).
|
||||
;;; Used by the intersection & difference.
|
||||
(define (string-iter s p)
|
||||
(define (string-iter p s)
|
||||
(let lp ((i (- (string-length s) 1)))
|
||||
(cond ((>= i 0)
|
||||
(p i (string-ref s i))
|
||||
(lp (- i 1))))))
|
||||
|
||||
(define (char-set-union . csets)
|
||||
(if (pair? csets)
|
||||
(let ((cset (copy-char-set (car csets))))
|
||||
(for-each (lambda (cs)
|
||||
(char-set-for-each (lambda (c) (set-char-set! cset c #t))
|
||||
cs))
|
||||
(cdr csets))
|
||||
(define (char-set-invert! cset)
|
||||
(let ((s (char-set:s cset)))
|
||||
(string-iter (lambda (i c)
|
||||
(string-set! s i (ascii->char (- 1 (char->ascii c)))))
|
||||
s))
|
||||
cset)
|
||||
char-set:empty))
|
||||
|
||||
(define (char-set-intersection . csets)
|
||||
(if (pair? csets)
|
||||
(let* ((cset (copy-char-set (car csets)))
|
||||
(s (char-set:s cset)))
|
||||
(for-each (lambda (cs)
|
||||
(string-iter (char-set:s cs)
|
||||
(lambda (i c)
|
||||
(if (= 0 (char->ascii c))
|
||||
(string-set! s i (ascii->char 0))))))
|
||||
(cdr csets))
|
||||
cset)
|
||||
char-set:full))
|
||||
(define (char-set-union! cset1 . csets)
|
||||
(let ((s (char-set:s cset1)))
|
||||
(for-each (lambda (cset)
|
||||
(char-set-for-each (lambda (c)
|
||||
(string-set! s (char->ascii c)
|
||||
(ascii->char 1)))
|
||||
cset))
|
||||
csets))
|
||||
cset1)
|
||||
|
||||
(define (char-set-intersection! cset1 . csets)
|
||||
(let ((s (char-set:s cset1)))
|
||||
(for-each (lambda (cset)
|
||||
(string-iter (lambda (i c)
|
||||
(if (zero? (char->ascii c))
|
||||
(string-set! s i (ascii->char 0))))
|
||||
(char-set:s cset)))
|
||||
csets))
|
||||
cset1)
|
||||
|
||||
(define (char-set-difference! cset1 . csets)
|
||||
(let ((s (char-set:s cset1)))
|
||||
(for-each (lambda (cset)
|
||||
(char-set-for-each (lambda (c)
|
||||
(string-set! s (char->ascii c)
|
||||
(ascii->char 0)))
|
||||
cset))
|
||||
csets))
|
||||
cset1)
|
||||
|
||||
|
||||
(define (char-set-difference cs1 . csets)
|
||||
(if (pair? csets)
|
||||
(let* ((cset (copy-char-set cs1))
|
||||
(s (char-set:s cset)))
|
||||
(for-each (lambda (cs)
|
||||
(string-iter (char-set:s cs)
|
||||
(lambda (i c)
|
||||
(if (= 1 (char->ascii c))
|
||||
(string-set! s i (ascii->char 0))))))
|
||||
csets)
|
||||
cset)
|
||||
cs1))
|
||||
|
||||
;;;; System Character Sets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define char-set:lower-case (ascii-range->char-set #x61 #x7B))
|
||||
(define char-set:upper-case (ascii-range->char-set #x41 #x5B))
|
||||
|
|
|
@ -11,17 +11,28 @@
|
|||
;;; root Search starts from here. Usefully "." (cwd)
|
||||
;;; dots? => if true, dot files will be matched.
|
||||
;;; if false, dot files will not be matched.
|
||||
;;; pattern-list := a list of regular expressions or predicates
|
||||
;;; Each member of the list corresponds
|
||||
;;; to one or more levels in a directory.
|
||||
;;; (A member with embedded "/" characters
|
||||
;;; corresponds to multiple levels.)
|
||||
;;; Example: ("foo" "bar" "\\.c$")
|
||||
;;; pattern-list := a list of
|
||||
;;; - strings
|
||||
;;; These are split at /'s and then
|
||||
;;; treated as Posix regexp strings.
|
||||
;;; - regexps (typically made with RX macro)
|
||||
;;; - predicates
|
||||
;;; Each member of the list corresponds to one
|
||||
;;; or more levels in a directory. (A string
|
||||
;;; with embedded "/" characters corresponds
|
||||
;;; to multiple levels.)
|
||||
;;; Example:
|
||||
;;; (file-match "." #f "foo" "bar" "\\.c$")
|
||||
;;; means match files that end in ".c"
|
||||
;;; if they reside in a directory with
|
||||
;;; a name that contains "bar", which
|
||||
;;; itself must reside in a directory
|
||||
;;; with a name that contains "foo".
|
||||
;;; Here are two more equivalent specs
|
||||
;;; for the example above:
|
||||
;;; (file-match "." #f "foo/bar/\\.c$")
|
||||
;;; (file-match "." #f (rx "foo") (rx "bar")
|
||||
;;; (rx ".c" eos))
|
||||
;;; If a member in the list is a predicate,
|
||||
;;; the predicate must be a procedure of
|
||||
;;; one argument. This procedure is applied
|
||||
|
@ -40,16 +51,19 @@
|
|||
;;; when FILE-DIRECTORY? is applied to the bogus symlink.
|
||||
|
||||
(define (file-match root dot-files? . patterns)
|
||||
(let ((patterns (apply append (map split-pat patterns))))
|
||||
(let ((patterns (apply append
|
||||
(map (lambda (p) (if (string? p)
|
||||
(map posix-string->regexp (split-pat p))
|
||||
p))
|
||||
patterns))))
|
||||
(let recur ((root root)
|
||||
(patterns patterns))
|
||||
(if (pair? patterns)
|
||||
(let* ((pattern (car patterns))
|
||||
(patterns (cdr patterns))
|
||||
(dir (file-name-as-directory root))
|
||||
(matcher (cond ((string? pattern)
|
||||
(let ((re (make-regexp pattern)))
|
||||
(lambda (f) (regexp-exec re f))))
|
||||
(matcher (cond ((regexp? pattern)
|
||||
(lambda (f) (regexp-search? re f)))
|
||||
|
||||
;; This arm makes a file-matcher using
|
||||
;; predicate PATTERN. If PATTERN signals
|
||||
|
@ -87,7 +101,8 @@
|
|||
(if (procedure? pat) (list pat)
|
||||
(let lp ((i (string-length pat))
|
||||
(ans '()))
|
||||
(cond ((rindex pat #\/ i) =>
|
||||
(lambda (j) (lp (cons (substring pat (+ j 1) i) ans) j)))
|
||||
(cond ((string-index-right pat #\/ i) =>
|
||||
(lambda (j) (lp j (cons (substring pat (+ j 1) i) ans))))
|
||||
(else
|
||||
(cons (substring pat 0 i) ans))))))
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
|
||||
;;; Returns FNAME's directory component in *directory form.*
|
||||
(define (file-name-directory fname)
|
||||
(cond ((rindex fname #\/) =>
|
||||
(cond ((string-index-right fname #\/) =>
|
||||
(lambda (rslash)
|
||||
(if (last-non-slash fname)
|
||||
(substring fname 0 (+ 1 rslash))
|
||||
|
@ -77,7 +77,7 @@
|
|||
|
||||
|
||||
(define (file-name-nondirectory fname)
|
||||
(cond ((rindex fname #\/) =>
|
||||
(cond ((string-index-right fname #\/) =>
|
||||
(lambda (rslash)
|
||||
(if (last-non-slash fname)
|
||||
(substring fname (+ 1 rslash) (string-length fname))
|
||||
|
@ -90,7 +90,7 @@
|
|||
(len (string-length fname)))
|
||||
(let split ((start 0))
|
||||
(cond ((>= start len) '())
|
||||
((index fname #\/ start) =>
|
||||
((string-index fname #\/ start) =>
|
||||
(lambda (slash)
|
||||
(cons (substring fname start slash)
|
||||
(split (+ slash 1)))))
|
||||
|
@ -128,7 +128,7 @@
|
|||
;;; /usr/shivers/.login are not considered extensions.
|
||||
|
||||
(define (file-name-extension-index fname)
|
||||
(let ((dot (rindex fname #\.)))
|
||||
(let ((dot (string-index-right fname #\.)))
|
||||
(if (and dot
|
||||
(> dot 0)
|
||||
(not (char=? #\/ (string-ref fname (- dot 1)))))
|
||||
|
@ -154,7 +154,7 @@
|
|||
(let* ((user (substring fname 1 end))
|
||||
(ui (name->user-info user)))
|
||||
(user-info:home-dir ui))))))
|
||||
(cond ((index fname #\/ 1) =>
|
||||
(cond ((string-index fname #\/ 1) =>
|
||||
(lambda (slash)
|
||||
(string-append (tilde->homedir slash) "/"
|
||||
(substring fname (+ slash 1) len))))
|
||||
|
@ -256,7 +256,7 @@
|
|||
(let ((len (string-length s)))
|
||||
(cond
|
||||
((zero? len) (apply string-append (reverse! ans)))
|
||||
((index s #\$) =>
|
||||
((string-index s #\$) =>
|
||||
(lambda (i)
|
||||
(let ((ans (cons (substring s 0 i) ans))
|
||||
(s (substring s (+ i 1) len))
|
||||
|
@ -264,13 +264,13 @@
|
|||
(if (zero? len) (lp ans "")
|
||||
(let ((next-char (string-ref s 0)))
|
||||
(cond ((char=? #\{ next-char)
|
||||
(cond ((index s #\}) =>
|
||||
(cond ((string-index s #\}) =>
|
||||
(lambda (i)
|
||||
(lp (cons (getenv (substring s 1 i)) ans)
|
||||
(substring s (+ i 1) len))))
|
||||
(else (error "Unbalanced ${ delimiter in string" s))))
|
||||
(else
|
||||
(let ((i (or (index s #\/) len)))
|
||||
(let ((i (or (string-index s #\/) len)))
|
||||
(lp (cons (getenv (substring s 0 i)) ans)
|
||||
(substring s i len))))))))))
|
||||
(else (lp (cons s ans) ""))))))
|
||||
|
|
62
scsh/fr.scm
62
scsh/fr.scm
|
@ -54,43 +54,12 @@
|
|||
;;; This has the effect you want with field parsing. For example, if you split
|
||||
;;; a string with the empty pattern, you will explode the string into its
|
||||
;;; individual characters:
|
||||
;;; ((suffix-splitter "") "foo") -> #("" "f" "o" "o")
|
||||
;;; ((suffix-splitter (rx "")) "foo") -> #("" "f" "o" "o")
|
||||
;;; However, even though this boundary case is handled correctly, we don't
|
||||
;;; recommend using it. Say what you mean -- just use a field splitter:
|
||||
;;; ((field-splitter ".") "foo") -> #("f" "o" "o")
|
||||
;;; ((field-splitter (rx any)) "foo") -> #("f" "o" "o")
|
||||
|
||||
|
||||
|
||||
;;; (join-strings string-list [delimiter grammar]) => string
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Paste strings together using the delimiter string.
|
||||
;;;
|
||||
;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz"
|
||||
;;;
|
||||
;;; DELIMITER defaults to a single space " "
|
||||
;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix.
|
||||
|
||||
;;; (join-strings strings [delim grammar])
|
||||
|
||||
(define (join-strings strings . args)
|
||||
(if (pair? strings)
|
||||
(let-optionals args ((delim " ") (grammar 'infix))
|
||||
(check-arg string? delim join-strings)
|
||||
(let ((strings (reverse strings)))
|
||||
(let lp ((strings (cdr strings))
|
||||
(ans (case grammar
|
||||
((infix) (list (car strings)))
|
||||
((suffix) (list (car strings) delim))
|
||||
(else (error "Illegal grammar" grammar)))))
|
||||
(if (pair? strings)
|
||||
(lp (cdr strings)
|
||||
(cons (car strings) (cons delim ans)))
|
||||
|
||||
; All done
|
||||
(apply string-append ans)))))
|
||||
|
||||
"")) ; Special-cased for infix grammar.
|
||||
|
||||
;;; FIELD PARSERS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; This section defines routines to split a string into fields.
|
||||
|
@ -100,15 +69,11 @@
|
|||
|
||||
(define (->delim-matcher x)
|
||||
(if (procedure? x) x ; matcher proc
|
||||
(let ((re (cond ((regexp? x) x) ; regexp pattern
|
||||
((string? x) (make-regexp x)) ; regexp string
|
||||
(else (error "Illegal pattern/parser" x)))))
|
||||
|
||||
;; The matcher proc.
|
||||
(lambda (s i)
|
||||
(cond ((regexp-exec re s i) =>
|
||||
(cond ((regexp-search x s i) =>
|
||||
(lambda (m) (values (match:start m 0) (match:end m 0))))
|
||||
(else (values #f #f)))))))
|
||||
(else (values #f #f))))))
|
||||
|
||||
;;; (infix-splitter [re num-fields handle-delim]) -> parser
|
||||
;;; (suffix-splitter [re num-fields handle-delim]) -> parser
|
||||
|
@ -154,7 +119,8 @@
|
|||
match-delim cons-field
|
||||
num-fields nfields-exact?))))))))
|
||||
|
||||
(define default-field-matcher (->delim-matcher "[^ \t\n]+"))
|
||||
;;; Default field spec is runs of non-whitespace chars.
|
||||
(define default-field-matcher (->delim-matcher (rx (+ (~ white)))))
|
||||
|
||||
;;; (field-splitter [field-spec num-fields])
|
||||
|
||||
|
@ -313,8 +279,8 @@
|
|||
|
||||
;;; Now, build the exported procedures: {infix,suffix,sloppy-suffix}-splitter.
|
||||
|
||||
(define default-suffix-matcher (->delim-matcher "[ \t\n]+|$"))
|
||||
(define default-infix-matcher (->delim-matcher "[ \t\n]+"))
|
||||
(define default-suffix-matcher (->delim-matcher (rx (| (+ white) eos))))
|
||||
(define default-infix-matcher (->delim-matcher (rx (+ white))))
|
||||
|
||||
(define infix-splitter
|
||||
(make-field-parser-generator default-infix-matcher infix-field-loop))
|
||||
|
@ -408,21 +374,17 @@
|
|||
;;; Repeatedly do (APPLY PROC M STATE) to generate new state values,
|
||||
;;; where M is a regexp match structure made from matching against STRING.
|
||||
|
||||
;(define (regexp-reduce string start regexp proc . state)
|
||||
; (let ((end (string-length string))
|
||||
; (regexp (if (string? regexp)
|
||||
; (make-regexp regexp)
|
||||
; regexp)))
|
||||
;
|
||||
;(define (regexp-fold string start regexp proc . state)
|
||||
; (let ((end (string-length string)))
|
||||
; (let lp ((i start) (state state) (last-null? #f))
|
||||
; (let ((j (if last-null? (+ i 1) i)))
|
||||
; (cond ((and (<= j end) (regexp-exec regexp string j)) =>
|
||||
; (cond ((and (<= j end) (regexp-search regexp string j)) =>
|
||||
; (lambda (m)
|
||||
; (receive state (apply proc m state)
|
||||
; (lp (match:end m) state (= (match:start m) (match:end m))))))
|
||||
; (else (apply values state)))))))
|
||||
;
|
||||
;(define (all-regexp-matches regexp string)
|
||||
; (reverse (regexp-reduce string 0 regexp
|
||||
; (reverse (regexp-fold string 0 regexp
|
||||
; (lambda (m ans) (cons (match:substring m 0) ans))
|
||||
; '())))
|
||||
|
|
119
scsh/glob.scm
119
scsh/glob.scm
|
@ -76,8 +76,8 @@
|
|||
|
||||
(else (let* ((dots? (char=? #\. (string-ref pat 0))) ; Match dot files?
|
||||
(candidates (maybe-directory-files fname dots?))
|
||||
(re (make-regexp (glob->regexp pat))))
|
||||
(values (filter (lambda (f) (regexp-exec re f)) candidates)
|
||||
(re (glob->regexp pat)))
|
||||
(values (filter (lambda (f) (regexp-search? re f)) candidates)
|
||||
#t))))) ; These guys exist for sure.
|
||||
|
||||
;;; The initial special-case above isn't really for the fast-path; it's
|
||||
|
@ -87,43 +87,96 @@
|
|||
|
||||
;;; Translate a brace-free glob pattern to a regular expression.
|
||||
|
||||
(define (glob->regexp pat)
|
||||
(let ((pat-len (string-length pat)))
|
||||
(let lp ((result '(#\^))
|
||||
(i 0)
|
||||
(state 'normal))
|
||||
(define glob->regexp
|
||||
(let ((dot-star (re-repeat 0 #f re-any))) ; ".*" or (* any)
|
||||
(lambda (pat)
|
||||
(let ((pat-len (string-length pat))
|
||||
|
||||
(str-cons (lambda (chars res) ; Reverse CHARS and cons the
|
||||
(if (pair? chars) ; result string-re onto RES.
|
||||
(cons (re-string (list->string (reverse chars)))
|
||||
res)
|
||||
res))))
|
||||
|
||||
;; We accumulate chars into CHARS, and coalesce into a single string
|
||||
;; with STR-CONS when we run across a non-char.
|
||||
(let lp ((chars '())
|
||||
(res (list re-bos))
|
||||
(i 0))
|
||||
(if (= i pat-len)
|
||||
|
||||
(if (eq? state 'normal)
|
||||
(list->string (reverse (cons #\$ result)))
|
||||
(error "Illegal glob pattern" pat))
|
||||
|
||||
(re-seq (reverse (str-cons chars res)))
|
||||
|
||||
(let ((c (string-ref pat i))
|
||||
(i (+ i 1)))
|
||||
(case state
|
||||
((char-set)
|
||||
(lp (cons c result)
|
||||
i
|
||||
(if (char=? c #\]) 'normal 'char-set)))
|
||||
(case c
|
||||
((#\\) (if (< i pat-len)
|
||||
(lp (cons (string-ref pat i) chars)
|
||||
res (+ i 1))
|
||||
(error "Ill-formed glob pattern -- ends in backslash" pat)))
|
||||
|
||||
((escape)
|
||||
(lp (case c
|
||||
((#\$ #\^ #\. #\+ #\? #\* #\| #\( #\) #\[)
|
||||
(cons c (cons #\\ result)))
|
||||
(else (cons c result)))
|
||||
i
|
||||
'normal))
|
||||
((#\*) (lp '()
|
||||
(cons dot-star (str-cons chars res))
|
||||
i))
|
||||
((#\?) (lp '()
|
||||
(cons re-any (str-cons chars res))
|
||||
i))
|
||||
|
||||
;; Normal
|
||||
(else (case c
|
||||
((#\\) (lp result i 'escape))
|
||||
((#\*) (lp (cons #\* (cons #\. result)) i 'normal))
|
||||
((#\?) (lp (cons #\. result) i 'normal))
|
||||
((#\[) (lp (cons c result) i 'char-set))
|
||||
((#\$ #\^ #\. #\+ #\| #\( #\))
|
||||
(lp (cons c (cons #\\ result)) i 'normal))
|
||||
(else (lp (cons c result) i 'normal))))))))))
|
||||
((#\[) (receive (cset i) (parse-glob-bracket pat i)
|
||||
(lp '()
|
||||
(cons (re-char-set cset)
|
||||
(str-cons chars res))
|
||||
i)))
|
||||
|
||||
(else (lp (cons c chars) res i))))))))))
|
||||
|
||||
|
||||
;;; A glob bracket expression is [...] or [^...].
|
||||
;;; The body is a sequence of <char> and <char>-<char> ranges.
|
||||
;;; A <char> is any character except right-bracket, carat, hypen or backslash,
|
||||
;;; or a backslash followed by any character at all.
|
||||
|
||||
(define (parse-glob-bracket pat i)
|
||||
(let ((pat-len (string-length pat)))
|
||||
(receive (negate? i) (if (and (< i pat-len) (char=? #\^ (string-ref pat i)))
|
||||
(values #t (+ i 1))
|
||||
(values #f i))
|
||||
|
||||
(let lp ((elts '()) (i i))
|
||||
(if (>= i pat-len)
|
||||
(error "Ill-formed glob pattern -- no terminating close-bracket" pat)
|
||||
|
||||
(let ((c (string-ref pat i))
|
||||
(i (+ i 1)))
|
||||
(case c
|
||||
((#\])
|
||||
(let ((cset (fold (lambda (elt cset)
|
||||
(char-set-union
|
||||
cset
|
||||
(if (char? elt)
|
||||
(char-set elt)
|
||||
(ascii-range->char-set (char->ascii (car elt))
|
||||
(+ 1 (char->ascii (cdr elt)))))))
|
||||
char-set:empty
|
||||
elts)))
|
||||
(values (re-char-set (if negate?
|
||||
(char-set-invert cset)
|
||||
cset))
|
||||
i)))
|
||||
|
||||
((#\\)
|
||||
(if (>= i pat-len)
|
||||
(error "Ill-formed glob pattern -- ends in backslash" pat)
|
||||
(lp (cons (string-ref pat i) elts) (+ i 1))))
|
||||
|
||||
((#\-)
|
||||
(cond ((>= i pat-len)
|
||||
(error "Ill-formed glob pattern -- unterminated range." pat))
|
||||
((or (null? elts) (not (char? (car elts))))
|
||||
(error "Ill-formed glob pattern -- range has no beginning." pat))
|
||||
(else (lp (cons (cons (car elts) (string-ref pat i)) elts)
|
||||
(+ i 1)))))
|
||||
|
||||
(else (lp (cons c elts) i)))))))))
|
||||
|
||||
|
||||
;;; Is the glob pattern free of *'s, ?'s and [...]'s?
|
||||
|
|
|
@ -296,7 +296,7 @@
|
|||
(define reaped-procs '()) ; Reaped, but not yet waited.
|
||||
|
||||
(define (filter-weak-ptr-list pred lis)
|
||||
(foldr (lambda (wptr result) (let ((val (weak-pointer-ref wptr)))
|
||||
(fold-right (lambda (wptr result) (let ((val (weak-pointer-ref wptr)))
|
||||
(if (and val (pred val))
|
||||
(cons wptr result)
|
||||
result)))
|
||||
|
|
|
@ -264,7 +264,7 @@
|
|||
;;; (read-paragraph [port handle-delim])
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define blank-line-regexp (make-regexp "^[ \t]*\n$"))
|
||||
(define blank-line-regexp (rx bos (* white) #\newline eos))
|
||||
|
||||
(define (read-paragraph . args)
|
||||
(let-optionals args ((port (current-input-port))
|
||||
|
@ -275,14 +275,14 @@
|
|||
(cond ((eof-object? line)
|
||||
(if (eq? handle-delim 'split) (values line line) line))
|
||||
|
||||
((regexp-exec blank-line-regexp line) (lp))
|
||||
((regexp-search? blank-line-regexp line) (lp))
|
||||
|
||||
;; Then, read in non-blank lines.
|
||||
(else
|
||||
(let lp ((lines (list line)))
|
||||
(let ((line (read-line port 'concat)))
|
||||
(if (and (string? line)
|
||||
(not (regexp-exec blank-line-regexp line)))
|
||||
(not (regexp-search? blank-line-regexp line)))
|
||||
|
||||
(lp (cons line lines))
|
||||
|
||||
|
|
69
scsh/re.c
69
scsh/re.c
|
@ -1,69 +0,0 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by cig.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> /* For malloc. */
|
||||
#include "libcig.h"
|
||||
|
||||
/* Make sure foreign-function stubs interface to the C funs correctly: */
|
||||
#include "re1.h"
|
||||
|
||||
scheme_value df_re_byte_len(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_byte_len(const char *, int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(2, nargs, "re_byte_len");
|
||||
r1 = re_byte_len(cig_string_body(args[1]), &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_FIXNUM(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_compile(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_compile(const char *, scheme_value );
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
|
||||
cig_check_nargs(3, nargs, "re_compile");
|
||||
r1 = re_compile(cig_string_body(args[2]), args[1]);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_exec(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_exec(scheme_value , const char *, int , scheme_value , scheme_value , int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(6, nargs, "re_exec");
|
||||
r1 = re_exec(args[5], cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_match(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *re_match(const char *, const char *, int , scheme_value , scheme_value , int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(6, nargs, "re_match");
|
||||
r1 = re_match(cig_string_body(args[5]), cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
283
scsh/re.scm
283
scsh/re.scm
|
@ -1,283 +0,0 @@
|
|||
;;; Regular expression matching for scsh
|
||||
;;; Copyright (c) 1994 by Olin Shivers.
|
||||
|
||||
(foreign-source
|
||||
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
|
||||
"#include \"re1.h\""
|
||||
"" ""
|
||||
)
|
||||
|
||||
;;; Match data for regexp matches.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define-record regexp-match
|
||||
string ; The string against which we matched.
|
||||
start ; 10 elt vec
|
||||
end) ; 10 elt vec
|
||||
|
||||
(define (match:start match . maybe-index)
|
||||
(vector-ref (regexp-match:start match)
|
||||
(:optional maybe-index 0)))
|
||||
|
||||
(define (match:end match . maybe-index)
|
||||
(vector-ref (regexp-match:end match)
|
||||
(:optional maybe-index 0)))
|
||||
|
||||
(define (match:substring match . maybe-index)
|
||||
(let* ((i (:optional maybe-index 0))
|
||||
(start (vector-ref (regexp-match:start match) i)))
|
||||
(and start (substring (regexp-match:string match)
|
||||
start
|
||||
(vector-ref (regexp-match:end match) i)))))
|
||||
|
||||
;;; Compiling regexps
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define-record %regexp
|
||||
string ; The string form of the regexp.
|
||||
bytes ; The compiled representation, stuffed into a Scheme string.
|
||||
((disclose self) (list "Regexp" (%regexp:string self))))
|
||||
|
||||
(define regexp? %regexp?)
|
||||
|
||||
|
||||
(define (make-regexp pattern)
|
||||
(receive (err len) (%regexp-compiled-length pattern)
|
||||
(if err (error err make-regexp pattern)
|
||||
(let ((buf (make-string len)))
|
||||
(%regexp-compile pattern buf)
|
||||
(make-%regexp pattern buf)))))
|
||||
|
||||
(define-foreign %regexp-compiled-length (re_byte_len (string pattern))
|
||||
static-string ; Error msg or #f
|
||||
integer) ; number of bytes needed to compile REGEXP.
|
||||
|
||||
(define-foreign %regexp-compile (re_compile (string pattern)
|
||||
(string-desc bytes))
|
||||
static-string) ; Error msg or #f
|
||||
|
||||
|
||||
(define (->regexp x)
|
||||
(cond ((string? x) (make-regexp x))
|
||||
((regexp? x) x)
|
||||
(else (error "Not a regexp or string." x))))
|
||||
|
||||
|
||||
;;; Executing compiled regexps
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (regexp-exec regexp str . maybe-start)
|
||||
(let ((start (:optional maybe-start 0))
|
||||
(start-vec (make-vector 10))
|
||||
(end-vec (make-vector 10)))
|
||||
(receive (err match?)
|
||||
(%regexp-exec (%regexp:bytes regexp) str start start-vec end-vec)
|
||||
(if err (error err regexp-exec regexp str start)
|
||||
(and match?
|
||||
(make-regexp-match str start-vec end-vec))))))
|
||||
|
||||
(define-foreign %regexp-exec (re_exec (string-desc compiled-regexp)
|
||||
(string s)
|
||||
(integer start)
|
||||
(vector-desc start-vec)
|
||||
(vector-desc end-vec))
|
||||
static-string ; Error msg or #f
|
||||
bool) ; Matched?
|
||||
|
||||
|
||||
;;; Compile&match regexps in one go
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; I could do this with the separate compile and execute procedures,
|
||||
;;; but I go straight to C just for fun.
|
||||
|
||||
(define (string-match pattern string . maybe-start)
|
||||
(let ((start (:optional maybe-start 0))
|
||||
(start-vec (make-vector 10))
|
||||
(end-vec (make-vector 10)))
|
||||
(receive (err match?) (if (regexp? pattern)
|
||||
(%regexp-exec (%regexp:bytes pattern)
|
||||
string start start-vec end-vec)
|
||||
(%string-match pattern string start
|
||||
start-vec end-vec))
|
||||
(if err (error err string-match pattern string start)
|
||||
(and match? (make-regexp-match string start-vec end-vec))))))
|
||||
|
||||
(define-foreign %string-match (re_match (string pattern)
|
||||
(string s)
|
||||
(integer start)
|
||||
(vector-desc start-vec)
|
||||
(vector-desc end-vec))
|
||||
static-string ; Error string or #f if all is ok.
|
||||
bool) ; match?
|
||||
|
||||
|
||||
|
||||
;;; Substitutions
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define (regexp-substitute port match . items)
|
||||
(let* ((str (regexp-match:string match))
|
||||
(sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(range (lambda (item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values 0 (vector-ref sv 0)))
|
||||
((eq? 'post item) (values (vector-ref ev 0)
|
||||
(string-length str)))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
regexp-substitute))))))
|
||||
(if port
|
||||
|
||||
;; Output port case.
|
||||
(for-each (lambda (item)
|
||||
(if (string? item) (write-string item port)
|
||||
(receive (si ei) (range item)
|
||||
(write-string str port si ei))))
|
||||
items)
|
||||
|
||||
;; Here's the string case. Make two passes -- one to
|
||||
;; compute the length of the target string, one to fill it in.
|
||||
(let* ((len (reduce (lambda (i item)
|
||||
(+ i (if (string? item) (string-length item)
|
||||
(receive (si ei) (range item) (- ei si)))))
|
||||
0 items))
|
||||
(ans (make-string len)))
|
||||
|
||||
(reduce (lambda (index item)
|
||||
(cond ((string? item)
|
||||
(string-replace! ans index item)
|
||||
(+ index (string-length item)))
|
||||
(else (receive (si ei) (range item)
|
||||
(substring-replace! ans index str si ei)
|
||||
(+ index (- ei si))))))
|
||||
0 items)
|
||||
ans))))
|
||||
|
||||
|
||||
|
||||
(define (regexp-substitute/global port re str . items)
|
||||
(let ((re (->regexp re))
|
||||
(str-len (string-length str))
|
||||
(range (lambda (start sv ev item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values start (vector-ref sv 0)))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
regexp-substitute/global)))))
|
||||
(num-posts (reduce (lambda (count item)
|
||||
(+ count (if (eq? item 'post) 1 0)))
|
||||
0 items)))
|
||||
|
||||
(if (and port (< num-posts 2))
|
||||
|
||||
;; Output port case, with zero or one POST items.
|
||||
(let recur ((start 0))
|
||||
(if (<= start str-len)
|
||||
(let ((match (regexp-exec re str start)))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(s (vector-ref sv 0))
|
||||
(e (vector-ref ev 0))
|
||||
(empty? (= s e)))
|
||||
(for-each (lambda (item)
|
||||
(cond ((string? item) (write-string item port))
|
||||
|
||||
((procedure? item) (write-string (item match) port))
|
||||
|
||||
((eq? 'post0 item)
|
||||
(if (and empty? (< s str-len))
|
||||
(write-char (string-ref str s) port)))
|
||||
|
||||
((eq? 'post item)
|
||||
(recur (if empty? (+ 1 e) e)))
|
||||
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(write-string str port si ei)))))
|
||||
items))
|
||||
|
||||
(write-string str port start))))) ; No match.
|
||||
|
||||
;; Either we're making a string, or >1 POST.
|
||||
(let* ((pieces (let recur ((start 0))
|
||||
(if (> start str-len) '()
|
||||
(let ((match (regexp-exec re str start))
|
||||
(cached-post #f))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(s (vector-ref sv 0))
|
||||
(e (vector-ref ev 0))
|
||||
(empty? (= s e)))
|
||||
(reduce (lambda (pieces item)
|
||||
(cond ((string? item)
|
||||
(cons item pieces))
|
||||
|
||||
((procedure? item)
|
||||
(cons (item match) pieces))
|
||||
|
||||
((eq? 'post0 item)
|
||||
(if (and empty? (< s str-len))
|
||||
(cons (string (string-ref str s))
|
||||
pieces)
|
||||
pieces))
|
||||
|
||||
((eq? 'post item)
|
||||
(if (not cached-post)
|
||||
(set! cached-post
|
||||
(recur (if empty? (+ e 1) e))))
|
||||
(append cached-post pieces))
|
||||
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(cons (substring str si ei)
|
||||
pieces)))))
|
||||
'() items))
|
||||
|
||||
;; No match. Return str[start,end].
|
||||
(list (if (zero? start) str
|
||||
(substring str start (string-length str)))))))))
|
||||
|
||||
(pieces (reverse pieces)))
|
||||
(if port (for-each (lambda (p) (write-string p port)) pieces)
|
||||
(apply string-append pieces))))))
|
||||
|
||||
|
||||
|
||||
;;; Miscellaneous
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Convert a string into a regex pattern that matches that string exactly --
|
||||
;;; in other words, quote the special chars with backslashes.
|
||||
|
||||
(define (regexp-quote string)
|
||||
(let lp ((i (- (string-length string) 1))
|
||||
(result '()))
|
||||
(if (< i 0) (list->string result)
|
||||
(lp (- i 1)
|
||||
(let* ((c (string-ref string i))
|
||||
(result (cons c result)))
|
||||
(if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+))
|
||||
(cons #\\ result)
|
||||
result))))))
|
||||
|
||||
|
||||
;;; Count the number of possible sub-matches in a regexp
|
||||
;;; (i.e., the number of left parens).
|
||||
|
||||
(define (regexp-num-submatches s)
|
||||
(let* ((len (string-length s))
|
||||
(len-1 (- len 1)))
|
||||
(let lp ((i 0) (nsm 0))
|
||||
(if (= i len) nsm
|
||||
(case (string-ref s i)
|
||||
((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm))
|
||||
((#\() (lp (+ i 1) (+ nsm 1)))
|
||||
(else (lp (+ i 1) nsm)))))))
|
194
scsh/re1.c
194
scsh/re1.c
|
@ -1,194 +0,0 @@
|
|||
/* Scheme48 interface to Henry Spencer's regular expression package.
|
||||
** Copyright (c) 1993, 1994 by Olin Shivers.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "regexp.h"
|
||||
#include "cstuff.h"
|
||||
|
||||
/* Make sure our exports match up w/the implementation: */
|
||||
#include "re1.h"
|
||||
|
||||
/* Not multi-threaded reentrant. */
|
||||
static char *regexp_error;
|
||||
|
||||
/* Stash error msg in global. */
|
||||
void regerror(char *msg) {regexp_error = msg;}
|
||||
|
||||
/*
|
||||
** Return NULL normally, error string on error.
|
||||
** Stash number of bytes needed for compiled regexp into `*len'
|
||||
*/
|
||||
|
||||
char *re_byte_len(const char *re, int *len)
|
||||
{
|
||||
int l;
|
||||
|
||||
regexp_error = 0;
|
||||
*len = regcomp_len(re);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return NULL normally, error string on error.
|
||||
** Compile regexp into string described by `cr'.
|
||||
*/
|
||||
|
||||
char *re_compile(const char *re, scheme_value cr)
|
||||
{
|
||||
int len = STRING_LENGTH(cr);
|
||||
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
||||
|
||||
regexp_error = 0;
|
||||
regcomp_comp(re, r, len);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
/* Return NULL normally, error string on error.
|
||||
** Stash match info in start_vec and end_vec.
|
||||
** Returns boolean match/no-match in hit.
|
||||
*/
|
||||
|
||||
char *re_exec(scheme_value cr, const char *string, int start,
|
||||
scheme_value start_vec, scheme_value end_vec, int *hit)
|
||||
{
|
||||
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
||||
|
||||
*hit = 0;
|
||||
|
||||
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
||||
return "Illegal start vector"; /* never trigger. */
|
||||
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
|
||||
return "Illegal end vector";
|
||||
|
||||
regexp_error = 0;
|
||||
|
||||
if( regexec(r, string+start) ) {
|
||||
int i;
|
||||
for(i=0; i<NSUBEXP; i++) {
|
||||
const char *s = r->startp[i];
|
||||
const char *e = r->endp[i];
|
||||
VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE;
|
||||
VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE;
|
||||
r->startp[i] = 0; /* Why did Sommerfeld */
|
||||
r->endp[i] = 0; /* put these here? */
|
||||
}
|
||||
*hit = 1;
|
||||
}
|
||||
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
|
||||
char *re_subst(scheme_value cr, const char *match,
|
||||
const char *src, int start,
|
||||
scheme_value start_vec, scheme_value end_vec,
|
||||
scheme_value outbuf, int *len)
|
||||
{
|
||||
int i;
|
||||
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
||||
|
||||
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
||||
return "Illegal start vector"; /* never trigger. */
|
||||
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
|
||||
return "Illegal end vector";
|
||||
|
||||
for (i=0; i<NSUBEXP; i++) {
|
||||
scheme_value se = VECTOR_REF(start_vec, i);
|
||||
scheme_value ee = VECTOR_REF(end_vec, i);
|
||||
r->startp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0;
|
||||
r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0;
|
||||
}
|
||||
|
||||
regexp_error = 0;
|
||||
regnsub(r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
|
||||
*len = strlen(&STRING_REF(outbuf, 0));
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
char *re_subst_len(scheme_value cr, const char *match,
|
||||
const char *src, int start,
|
||||
scheme_value start_vec, scheme_value end_vec,
|
||||
int *len)
|
||||
{
|
||||
int i;
|
||||
regexp *r = (regexp *) &STRING_REF(cr, 0);
|
||||
|
||||
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
|
||||
return "Illegal start vector"; /* never trigger. */
|
||||
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
|
||||
return "Illegal end vector";
|
||||
|
||||
for (i=0; i<NSUBEXP; i++) {
|
||||
scheme_value se = VECTOR_REF(start_vec, i);
|
||||
scheme_value ee = VECTOR_REF(end_vec, i);
|
||||
r->startp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0;
|
||||
r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0;
|
||||
}
|
||||
|
||||
regexp_error = 0;
|
||||
*len = regsublen(r, src);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
|
||||
/* Return NULL normally, error string on error.
|
||||
** Stash match info in start_vec and end_vec.
|
||||
** Returns boolean match/no-match in hit.
|
||||
*/
|
||||
|
||||
char *re_match(const char *re, const char *string, int start,
|
||||
scheme_value start_vec, scheme_value end_vec, int *hit)
|
||||
{
|
||||
regexp *prog;
|
||||
|
||||
regexp_error = 0;
|
||||
*hit = 0;
|
||||
prog = regcomp(re);
|
||||
if( !prog ) return regexp_error;
|
||||
|
||||
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) { /* These two tests */
|
||||
Free(prog);
|
||||
return "Illegal start vector";
|
||||
}
|
||||
|
||||
if( VECTOR_LENGTH(end_vec) != NSUBEXP ) { /* should never trigger. */
|
||||
Free(prog);
|
||||
return "Illegal end vector";
|
||||
}
|
||||
|
||||
if( regexec(prog, string+start) ) {
|
||||
int i;
|
||||
for(i=0; i<NSUBEXP; i++) {
|
||||
const char *s = prog->startp[i];
|
||||
const char *e = prog->endp[i];
|
||||
VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE;
|
||||
VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE;
|
||||
}
|
||||
*hit = 1;
|
||||
}
|
||||
|
||||
Free(prog);
|
||||
return regexp_error;
|
||||
}
|
||||
|
||||
|
||||
char *filter_stringvec(const char *re, char const **stringvec, int *nummatch)
|
||||
{
|
||||
regexp *prog;
|
||||
regexp_error = 0;
|
||||
|
||||
if( prog=regcomp(re) ) {
|
||||
char const **p = stringvec;
|
||||
char const **q = p;
|
||||
|
||||
while(*p) {
|
||||
if( regexec(prog, *p) ) *q++ = *p;
|
||||
p++;
|
||||
}
|
||||
Free(prog);
|
||||
*nummatch = q-stringvec;
|
||||
}
|
||||
|
||||
return regexp_error;
|
||||
}
|
11
scsh/re1.h
11
scsh/re1.h
|
@ -1,11 +0,0 @@
|
|||
/* Exports from re1.c */
|
||||
|
||||
char *re_byte_len(const char *re, int *len);
|
||||
char *re_compile(const char *re, scheme_value target);
|
||||
|
||||
char *re_exec(scheme_value cr, const char *string, int start,
|
||||
scheme_value start_vec, scheme_value end_vec, int *hit);
|
||||
|
||||
char *re_match(const char *re, const char *string, int start,
|
||||
scheme_value start_vec, scheme_value end_vec,
|
||||
int *hit);
|
|
@ -0,0 +1,61 @@
|
|||
/* This is an Scheme48/C interface file,
|
||||
** automatically generated by cig.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> /* For malloc. */
|
||||
#include "libcig.h"
|
||||
|
||||
/* Make sure foreign-function stubs interface to the C funs correctly: */
|
||||
#include <sys/types.h>
|
||||
#include "../regexp/regex.h"
|
||||
#include "re1.h"
|
||||
|
||||
scheme_value df_compile_re(long nargs, scheme_value *args)
|
||||
{
|
||||
extern int compile_re(scheme_value , int , regex_t* *);
|
||||
scheme_value ret1;
|
||||
int r1;
|
||||
regex_t* r2;
|
||||
|
||||
cig_check_nargs(3, nargs, "compile_re");
|
||||
r1 = compile_re(args[2], EXTRACT_BOOLEAN(args[1]), &r2);
|
||||
ret1 = ENTER_FIXNUM(r1);
|
||||
AlienVal(VECTOR_REF(*args,0)) = (long) r2;
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_search(long nargs, scheme_value *args)
|
||||
{
|
||||
extern scheme_value re_search(const regex_t *, scheme_value , int , scheme_value , int , scheme_value , scheme_value );
|
||||
scheme_value ret1;
|
||||
scheme_value r1;
|
||||
|
||||
cig_check_nargs(7, nargs, "re_search");
|
||||
r1 = re_search((const regex_t *)AlienVal(args[6]), args[5], EXTRACT_FIXNUM(args[4]), args[3], EXTRACT_FIXNUM(args[2]), args[1], args[0]);
|
||||
ret1 = r1;
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_re_errint2str(long nargs, scheme_value *args)
|
||||
{
|
||||
extern const char *re_errint2str(int , const regex_t *);
|
||||
scheme_value ret1;
|
||||
const char *r1;
|
||||
|
||||
cig_check_nargs(3, nargs, "re_errint2str");
|
||||
r1 = re_errint2str(EXTRACT_FIXNUM(args[2]), (const regex_t *)AlienVal(args[1]));
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_free_re(long nargs, scheme_value *args)
|
||||
{
|
||||
extern void free_re(regex_t* );
|
||||
|
||||
cig_check_nargs(1, nargs, "free_re");
|
||||
free_re((regex_t* )AlienVal(args[0]));
|
||||
return SCHFALSE;
|
||||
}
|
||||
|
|
@ -4,8 +4,8 @@
|
|||
(foreign-source
|
||||
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
|
||||
"#include <sys/types.h>"
|
||||
"#include \"regex.h\""
|
||||
"#include \"scsh/re1.h\""
|
||||
"#include \"../regexp/regex.h\""
|
||||
"#include \"re1.h\""
|
||||
"" ""
|
||||
)
|
||||
|
||||
|
@ -53,7 +53,7 @@
|
|||
(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec))
|
||||
|
||||
(define (max-live-posix-submatch tvec)
|
||||
(vfoldl (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
|
||||
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
|
||||
|
||||
(define (compile-posix-re->c-struct re-string sm?)
|
||||
(receive (errcode c-struct) (%compile-re re-string sm?)
|
||||
|
@ -143,7 +143,7 @@
|
|||
|
||||
(define (clean-up-cres)
|
||||
(set! *master-cre-list*
|
||||
(foldl (lambda (elt lis)
|
||||
(fold (lambda (elt lis)
|
||||
(if (weak-pointer-ref (car elt)) ; Still alive
|
||||
(cons elt lis)
|
||||
(begin (%free-re (cdr elt))
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
/* Scheme48 interface to Henry Spencer's Posix regular expression package.
|
||||
** Copyright (c) 1993, 1994, 1998 by Olin Shivers.
|
||||
*/
|
||||
|
||||
/* Todo:
|
||||
** not_eol not_bol support on searchers
|
||||
** error code -> err msg
|
||||
** regex freeing
|
||||
** regexp-string -> regex_t caching
|
||||
** make filter_stringvec return an error code.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include "../regexp/regex.h"
|
||||
#include "../cstuff.h"
|
||||
|
||||
/* Make sure our exports match up w/the implementation: */
|
||||
#include "re1.h"
|
||||
|
||||
/*
|
||||
** Compile regexp into a malloc'd struct.
|
||||
** The flag sm_p is true if we want to compile for submatches.
|
||||
** On success, store pointer to struct into cr and return 0.
|
||||
** On failure, free the struct, store NULL into cr,
|
||||
** and return a non-zero error code.
|
||||
*/
|
||||
|
||||
int compile_re(scheme_value re_str, int sm_p, regex_t **cr)
|
||||
{
|
||||
char *s = &STRING_REF(re_str, 0);
|
||||
int len = STRING_LENGTH(re_str);
|
||||
int err;
|
||||
regex_t *re = Alloc(regex_t);
|
||||
|
||||
if( !re ) return -1;
|
||||
|
||||
re->re_endp = s + len;
|
||||
err = regcomp(re, s, REG_EXTENDED | REG_PEND
|
||||
| (sm_p ? 0 : REG_NOSUB));
|
||||
if( err ) {Free(re); *cr=0;}
|
||||
else *cr=re;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Do a regex search of RE through string STR, beginning at STR[START].
|
||||
** - STR is passed as a Scheme value as it is allowed to contain nul bytes.
|
||||
**
|
||||
** - trans_vec contains the translation from the user's "virtual" submatches to
|
||||
** the actual submatches the engine will report:
|
||||
** - trans_vec[i] = #F means user submatch #i is a dead submatch.
|
||||
** - trans_vec[i] = j means user submatch #i corresponds to paren #j in re.
|
||||
**
|
||||
** Indexing fence-posts are a little complicated due to the fact that you
|
||||
** get an extra match elt back from the matcher -- match 0 is not a
|
||||
** paren-based *sub*match, but rather the match info for the whole thing.
|
||||
**
|
||||
** So, here is how it works:
|
||||
** length(start_vec) = length(end_vec) = length(trans_vec) + 1
|
||||
** because trans_vec doesn't have a translation for submatch 0, which
|
||||
** is SRE submatch #0 => Posix submatch #0. For SRE submatch #i (1, 2, ...),
|
||||
** we want the submatch associated with Posix paren # trans_vec[i-1].
|
||||
**
|
||||
** - MAX_PSM is the maximum paren in which we have submatch interest -- the
|
||||
** max element in TRANS_VEC. Any parens after paren #MAX_PSM are just for
|
||||
** grouping, not for marking submatches. We only have to allocate MAX_PSM+1
|
||||
** elements in the submatch vector we pass into the search engine. If
|
||||
** MAX_PSM = -1, then we don't even want the whole-match match bounds, which
|
||||
** is really good -- the search engine can really fly in this case.
|
||||
**
|
||||
** If we match, map re's submatches over to the exported start_vec and
|
||||
** end_vec match vectors using trans_vec.
|
||||
**
|
||||
** Return 0 on success; #f if no match; non-zero integer error code otherwise.
|
||||
*/
|
||||
|
||||
scheme_value re_search(const regex_t *re, scheme_value str, int start,
|
||||
scheme_value trans_vec, int max_psm,
|
||||
scheme_value start_vec, scheme_value end_vec)
|
||||
{
|
||||
char *s = &STRING_REF(str,0); /* Passed as a scheme_value because */
|
||||
int len = STRING_LENGTH(str); /* it might contain nul bytes. */
|
||||
|
||||
int vlen = VECTOR_LENGTH(start_vec);
|
||||
int retval;
|
||||
|
||||
regmatch_t static_pmatch[10], *pm;
|
||||
|
||||
/* If max_psm+1 > 10, we can't use static_pmatch. */
|
||||
if( max_psm < 10 ) pm = static_pmatch;
|
||||
else {
|
||||
pm = Malloc(regmatch_t, max_psm+1);/* Add 1 for the whole-match info. */
|
||||
if( !pm ) return ENTER_FIXNUM(-1);
|
||||
}
|
||||
|
||||
pm[0].rm_so = start;
|
||||
pm[0].rm_eo = len;
|
||||
|
||||
retval = regexec(re, s, max_psm+1, pm, REG_STARTEND); /* Do it. */
|
||||
|
||||
/* We matched and have match-bound info, so translate it over. */
|
||||
if( !retval && max_psm >= 0 ) {
|
||||
int i;
|
||||
|
||||
VECTOR_REF(start_vec,0) = ENTER_FIXNUM(pm[0].rm_so); /* whole-match */
|
||||
VECTOR_REF(end_vec,0) = ENTER_FIXNUM(pm[0].rm_eo);
|
||||
|
||||
for( i=vlen-1; --i >= 0; ) { /* submatches */
|
||||
scheme_value j_scm = VECTOR_REF(trans_vec,i);
|
||||
if( j_scm != SCHFALSE ) {
|
||||
int j = EXTRACT_FIXNUM(j_scm);
|
||||
int k = pm[j].rm_so,
|
||||
l = pm[j].rm_eo;
|
||||
VECTOR_REF(start_vec,i+1) = (k != -1) ? ENTER_FIXNUM(k) : SCHFALSE;
|
||||
VECTOR_REF(end_vec, i+1) = (l != -1) ? ENTER_FIXNUM(l) : SCHFALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( max_psm >= 10 ) Free(pm);
|
||||
|
||||
if( retval==REG_NOMATCH ) return SCHFALSE;
|
||||
if( ! retval ) return SCHTRUE;
|
||||
return ENTER_FIXNUM(retval);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Filter a vector of strings by regexp RE_STR.
|
||||
** Stringvec is a NULL-terminated vector of strings;
|
||||
** filter it in-place, copying the survivors back to compact them.
|
||||
** Put the number of survivors in nummatch.
|
||||
*/
|
||||
|
||||
int filter_stringvec(scheme_value re_str, char const **stringvec)
|
||||
{
|
||||
int re_len = STRING_LENGTH(re_str);/* Passed as a scheme_value because */
|
||||
char *re_chars = &STRING_REF(re_str,0);/* it might contain nul bytes. */
|
||||
regex_t re;
|
||||
|
||||
char const **p, **q;
|
||||
|
||||
/* REG_NOSUB -- We just want to know if it matches or not. */
|
||||
re.re_endp = re_chars + re_len;
|
||||
if( regcomp(&re, re_chars, REG_EXTENDED | REG_PEND | REG_NOSUB) ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(p=q=stringvec; *p; p++) {
|
||||
char const *s = *p;
|
||||
if( ! regexec(&re, s, 0, 0, 0) ) *q++ = s;
|
||||
}
|
||||
|
||||
regfree(&re);
|
||||
return q-stringvec;
|
||||
}
|
||||
|
||||
|
||||
const char *re_errint2str(int errcode, const regex_t *re)
|
||||
{
|
||||
int size = regerror(errcode, re, 0, 0);
|
||||
char *s = Malloc(char,size);
|
||||
if(s) regerror(errcode, re, s, size);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
void free_re(regex_t *re)
|
||||
{
|
||||
regfree(re);
|
||||
Free(re);
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
/* Exports from re1.c */
|
||||
|
||||
int compile_re(scheme_value sre, int sm_p, regex_t **cr);
|
||||
|
||||
scheme_value re_search(const regex_t *re, scheme_value str, int start,
|
||||
scheme_value trans_vec, int max_psm,
|
||||
scheme_value start_vec, scheme_value end_vec);
|
||||
|
||||
/* Filter a vector of strings by a regexp. */
|
||||
int filter_stringvec(scheme_value re_str, char const **stringvec);
|
||||
|
||||
/* Error code -> error msg */
|
||||
const char *re_errint2str(int errcode, const regex_t *re);
|
||||
|
||||
void free_re(regex_t *re); /* Free the malloc'd regexp. */
|
|
@ -208,7 +208,7 @@
|
|||
port->sexp-list
|
||||
port->string-list
|
||||
port->list
|
||||
reduce-port
|
||||
port-fold reduce-port
|
||||
port->fdes
|
||||
read-string
|
||||
read-string!
|
||||
|
@ -283,7 +283,7 @@
|
|||
directory-files
|
||||
glob
|
||||
glob-quote
|
||||
file-match
|
||||
; file-match
|
||||
|
||||
create-temp-file
|
||||
temp-file-iterate
|
||||
|
@ -418,6 +418,7 @@
|
|||
exec-path-list))
|
||||
|
||||
|
||||
;;; Kill me?
|
||||
(define-interface scsh-regexp-interface
|
||||
(export string-match
|
||||
regexp-match?
|
||||
|
@ -427,14 +428,14 @@
|
|||
make-regexp
|
||||
->regexp
|
||||
regexp?
|
||||
regexp-exec
|
||||
regexp-search
|
||||
regexp-substitute
|
||||
regexp-substitute/global
|
||||
regexp-quote))
|
||||
|
||||
|
||||
(define-interface scsh-string-interface
|
||||
(export substitute-env-vars index rindex))
|
||||
(export substitute-env-vars string-index string-index-right))
|
||||
|
||||
(define-interface scsh-file-names-interface
|
||||
(export file-name-as-directory
|
||||
|
@ -567,13 +568,15 @@
|
|||
|
||||
|
||||
(define-interface scsh-utilities-interface
|
||||
(export del delete index rindex reduce filter first any first? nth
|
||||
any? every? mapv mapv! vector-every? copy-vector initialize-vector
|
||||
(export del delete filter first first? nth
|
||||
fold fold-right
|
||||
any every
|
||||
mapv mapv! vector-every? copy-vector initialize-vector vector-append
|
||||
vfold vfold-right
|
||||
check-arg conjoin disjoin negate compose reverse! call/cc
|
||||
deprecated-proc
|
||||
deposit-bit-field
|
||||
real->exact-integer
|
||||
string-replace! substring-replace!))
|
||||
real->exact-integer))
|
||||
|
||||
;;; semi-standard network magic numbers
|
||||
;;; should be available on all platforms
|
||||
|
@ -683,14 +686,15 @@
|
|||
char-ascii?
|
||||
|
||||
char-set?
|
||||
copy-char-set
|
||||
char-set-copy
|
||||
char-set=
|
||||
char-set<=
|
||||
char-set-size
|
||||
|
||||
set-char-set!
|
||||
char-set-adjoin char-set-delete
|
||||
char-set-adjoin! char-set-delete!
|
||||
char-set-for-each
|
||||
reduce-char-set
|
||||
char-set-fold reduce-char-set
|
||||
|
||||
char-set
|
||||
chars->char-set
|
||||
|
@ -702,11 +706,19 @@
|
|||
char-set-members
|
||||
char-set-contains?
|
||||
|
||||
char-set-every?
|
||||
char-set-any
|
||||
|
||||
char-set-invert
|
||||
char-set-union
|
||||
char-set-intersection
|
||||
char-set-difference
|
||||
|
||||
char-set-invert!
|
||||
char-set-union!
|
||||
char-set-intersection!
|
||||
char-set-difference!
|
||||
|
||||
char-set:lower-case
|
||||
char-set:upper-case
|
||||
char-set:alphabetic
|
||||
|
@ -758,7 +770,8 @@
|
|||
skip-char-set))
|
||||
|
||||
(define-interface awk-interface
|
||||
(export (awk :syntax)))
|
||||
(export (awk :syntax)
|
||||
(awk/posix-string :syntax)))
|
||||
|
||||
(define-interface scsh-dbm-interface
|
||||
(export dbm-open
|
||||
|
|
|
@ -27,12 +27,16 @@
|
|||
|
||||
|
||||
(define-structure error-package (export error warn)
|
||||
(open signals))
|
||||
(open signals)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structure scsh-utilities scsh-utilities-interface
|
||||
(open bitwise error-package let-opt scheme)
|
||||
(files utilities))
|
||||
(open bitwise error-package loopholes let-opt scheme)
|
||||
(files utilities)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
;;; This guy goes into the FOR-SYNTAX part of scsh's syntax exports.
|
||||
|
@ -44,27 +48,36 @@
|
|||
scsh-utilities ; check-arg
|
||||
scheme
|
||||
)
|
||||
(files syntax-helpers))
|
||||
(files syntax-helpers)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
;;; The bufpol/{block, line, none} values
|
||||
(define-structure buffered-io-flags buffered-io-flags-interface
|
||||
(open defenum-package scheme)
|
||||
(files (machine bufpol)))
|
||||
(files (machine bufpol))
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structure char-set-package char-set-interface
|
||||
(open error-package
|
||||
ascii
|
||||
define-record-types ; JAR's record macro.
|
||||
scsh-utilities ; For DEPRECATED-PROC
|
||||
scheme)
|
||||
(files char-set))
|
||||
(files char-set)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structures ((tty-flags tty-flags-interface)
|
||||
(scsh-internal-tty-flags scsh-internal-tty-flags-interface))
|
||||
(open scheme ascii bitwise)
|
||||
(files (machine tty-consts)))
|
||||
(files (machine tty-consts))
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structure scsh-version scsh-version-interface
|
||||
|
@ -79,7 +92,9 @@
|
|||
$current-input-port
|
||||
$current-output-port
|
||||
$error-output-port)
|
||||
(open ports))
|
||||
(open ports)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
;;; The scsh-level-0 package is for implementation convenience.
|
||||
;;; The scsh startup and top-level modules need access to scsh
|
||||
|
@ -124,7 +139,8 @@
|
|||
(scsh-level-0-internals (export set-command-line-args!
|
||||
init-scsh-hindbrain
|
||||
init-scsh-vars))
|
||||
(scsh-regexp-package scsh-regexp-interface))
|
||||
; (scsh-regexp-package scsh-regexp-interface)
|
||||
)
|
||||
(for-syntax (open scsh-syntax-helpers scheme))
|
||||
(open externals
|
||||
structure-refs
|
||||
|
@ -160,6 +176,13 @@
|
|||
|
||||
interrupts ; signal handler code
|
||||
|
||||
re-level-0
|
||||
rx-syntax
|
||||
|
||||
string-lib
|
||||
|
||||
loopholes ; For my bogus CALL-TERMINALLY implementation.
|
||||
|
||||
scheme
|
||||
)
|
||||
|
||||
|
@ -188,7 +211,7 @@
|
|||
filesys
|
||||
fileinfo
|
||||
glob
|
||||
filemtch
|
||||
; filemtch
|
||||
time ; New in release 0.2.
|
||||
(machine time_dep)
|
||||
network ; New in release 0.3.
|
||||
|
@ -198,19 +221,25 @@
|
|||
pty ; New in release 0.4.
|
||||
sighandlers ; New in release 0.5.
|
||||
scsh
|
||||
re
|
||||
; re
|
||||
rdelim
|
||||
))
|
||||
)
|
||||
; (optimize auto-integrate)
|
||||
)
|
||||
|
||||
(define-structure defrec-package (export (define-record :syntax))
|
||||
(open records scheme)
|
||||
(for-syntax (open scheme error-package receiving))
|
||||
(files defrec))
|
||||
(files defrec)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
(define-structure defenum-package (export (define-enum-constant :syntax)
|
||||
(define-enum-constants :syntax))
|
||||
(open scheme)
|
||||
(files enumconst))
|
||||
(files enumconst)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
;;; This code opens so many modules of gruesome, low-level S48 internals
|
||||
;;; that these two modules are segregated into separate packages, each
|
||||
|
@ -229,7 +258,9 @@
|
|||
filenames ; translate
|
||||
scheme-level-2-internal ; usual-resumer
|
||||
scheme)
|
||||
(files startup))
|
||||
(files startup)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
(define-structure scsh-top-package (export parse-switches-and-execute repl)
|
||||
(open command-processor
|
||||
|
@ -249,54 +280,78 @@
|
|||
; with-current-output-port exit
|
||||
scsh-level-0-internals ; set-command-line-args! init-scsh-vars
|
||||
scheme)
|
||||
(files top meta-arg))
|
||||
(files top meta-arg)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structure field-reader-package scsh-field-reader-interface
|
||||
(open receiving ; receive
|
||||
char-set-package
|
||||
scsh-utilities
|
||||
scsh-utilities ; nth
|
||||
error-package ; error
|
||||
scsh-level-0 ; delimited readers
|
||||
scsh-regexp-package
|
||||
; scsh-regexp-package
|
||||
re-exports
|
||||
string-lib ; join-strings
|
||||
let-opt ; optional-arg parsing & defaulting
|
||||
scheme
|
||||
)
|
||||
(files fr))
|
||||
(files fr)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structures
|
||||
((awk-expander-package (export expand-awk))
|
||||
((awk-expander-package (export expand-awk expand-awk/obsolete))
|
||||
(awk-support-package (export next-range next-:range
|
||||
next-range: next-:range:)))
|
||||
(open receiving ; receive
|
||||
scsh-utilities
|
||||
error-package ; error
|
||||
scsh-regexp-package
|
||||
; scsh-regexp-package
|
||||
; re-exports
|
||||
sre-syntax-tools
|
||||
scheme
|
||||
)
|
||||
(files awk))
|
||||
(files awk)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
|
||||
(define-structure awk-package awk-interface
|
||||
(open awk-support-package scsh-regexp-package receiving scheme)
|
||||
(open awk-support-package ; These packages provide all the stuff
|
||||
re-exports ; that appears in the code produced by
|
||||
receiving ; an awk expansion.
|
||||
scheme)
|
||||
(for-syntax (open awk-expander-package scheme))
|
||||
(begin (define-syntax awk expand-awk)))
|
||||
(begin (define-syntax awk expand-awk)
|
||||
(define-syntax awk/posix-string expand-awk/obsolete)))
|
||||
|
||||
;;; Exports an AWK macro that is just AWK/POSIX-STRING.
|
||||
(define-structure obsolete-awk-package (export (awk :syntax))
|
||||
(open awk-package)
|
||||
(begin (define-syntax awk
|
||||
(syntax-rules () ((awk body ...) (awk/posix-string body ....))))))
|
||||
|
||||
(define-structure scsh
|
||||
(compound-interface (interface-of scsh-level-0)
|
||||
(interface-of scsh-startup-package)
|
||||
scsh-regexp-interface
|
||||
; scsh-regexp-interface
|
||||
re-exports-interface
|
||||
re-old-funs-interface
|
||||
scsh-field-reader-interface ; new in 0.3
|
||||
; scsh-dbm-interface
|
||||
(export repl)
|
||||
awk-interface)
|
||||
awk-interface
|
||||
)
|
||||
|
||||
(open structure-refs
|
||||
scsh-level-0
|
||||
scsh-level-0-internals
|
||||
scsh-regexp-package
|
||||
re-exports
|
||||
re-old-funs
|
||||
; scsh-regexp-package
|
||||
scsh-startup-package
|
||||
; dbm
|
||||
awk-package
|
||||
|
@ -304,7 +359,9 @@
|
|||
scheme)
|
||||
|
||||
(access scsh-top-package)
|
||||
(begin (define repl (structure-ref scsh-top-package repl))))
|
||||
(begin (define repl (structure-ref scsh-top-package repl)))
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
||||
(define-structure scsh-here-string-hax (export)
|
||||
(open reading
|
||||
|
@ -312,11 +369,6 @@
|
|||
scsh ; Just need the delimited readers.
|
||||
features ; make-immutable!
|
||||
scheme)
|
||||
(files here))
|
||||
|
||||
(define-structure test-package (export test-proc)
|
||||
(open scsh-regexp-package scheme)
|
||||
(begin (define (test-proc p)
|
||||
(regexp-substitute p
|
||||
(string-match "(foo)(.*)(bar)" "Hello foo Olin bar quux")
|
||||
'post 3 1 2 'pre))))
|
||||
(files here)
|
||||
(optimize auto-integrate)
|
||||
)
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
;;; and dynamic env to be gc'd away, since this procedure never returns.
|
||||
|
||||
(define (call-terminally thunk)
|
||||
(with-continuation #f (lambda () (thunk) (exit 0))))
|
||||
(with-continuation (loophole :escape #f) ; Bogus
|
||||
(lambda () (thunk) (exit 0))))
|
||||
;; Alternatively: (with-continuation #f thunk)
|
||||
|
||||
;;; More portably, but less usefully:
|
||||
|
@ -125,7 +126,7 @@
|
|||
|
||||
;; Main loop.
|
||||
(let split ((i 0))
|
||||
(cond ((index clist #\: i) =>
|
||||
(cond ((string-index clist #\: i) =>
|
||||
(lambda (colon)
|
||||
(cons (substring clist i colon)
|
||||
(split (+ colon 1)))))
|
||||
|
@ -207,7 +208,7 @@
|
|||
|
||||
(define (with-env* alist-delta thunk)
|
||||
(let* ((old-env #f)
|
||||
(new-env (reduce (lambda (alist key/val)
|
||||
(new-env (fold (lambda (key/val alist)
|
||||
(alist-update (car key/val) (cdr key/val) alist))
|
||||
(env->alist)
|
||||
alist-delta)))
|
||||
|
@ -435,12 +436,12 @@
|
|||
;;; (port->list reader port)
|
||||
;;; Repeatedly applies READER to PORT, accumulating results into a list.
|
||||
;;; On EOF, returns the list of items thus collected.
|
||||
;;; (reduce-port port reader op . seeds)
|
||||
;;; (port-fold port reader op . seeds)
|
||||
;;; Repeatedly read things from PORT with READER. Each time you read
|
||||
;;; some value V, compute a new set of seeds with (apply OP V SEEDS).
|
||||
;;; (More than 1 seed means OP must return multiple values).
|
||||
;;; On eof, return the seeds.
|
||||
;;; PORT->LIST is just (REDUCE-PORT PORT READ CONS '())
|
||||
;;; On eof, return the seeds: (apply value SEEDS).
|
||||
;;; PORT->LIST is just (PORT-FOLD PORT READ CONS '())
|
||||
|
||||
(define (run/port+proc* thunk)
|
||||
(receive (r w) (pipe)
|
||||
|
@ -505,13 +506,16 @@
|
|||
(define (port->string-list port)
|
||||
(port->list read-line port))
|
||||
|
||||
(define (reduce-port port reader op . seeds)
|
||||
(letrec ((reduce (lambda seeds
|
||||
(define (port-fold port reader op . seeds)
|
||||
(letrec ((fold (lambda seeds
|
||||
(let ((x (reader port)))
|
||||
(if (eof-object? x) (apply values seeds)
|
||||
(call-with-values (lambda () (apply op x seeds))
|
||||
reduce))))))
|
||||
(apply reduce seeds)))
|
||||
fold))))))
|
||||
(apply fold seeds)))
|
||||
|
||||
(define reduce-port
|
||||
(deprecated-proc port-fold 'reduce-port "Use port-fold instead."))
|
||||
|
||||
;;; Not defined:
|
||||
;;; (field-reader field-delims record-delims)
|
||||
|
@ -671,7 +675,7 @@
|
|||
(define (exec-path/env prog env . arglist)
|
||||
(flush-all-ports)
|
||||
(let ((prog (stringify prog)))
|
||||
(if (index prog #\/)
|
||||
(if (string-index prog #\/)
|
||||
|
||||
;; Contains a slash -- no path search.
|
||||
(%exec prog (cons prog arglist) env)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,380 @@
|
|||
(define-interface string-lib-interface
|
||||
(export
|
||||
;; string-map proc s [start end] -> s
|
||||
(string-map (proc ((proc (:char) :char)
|
||||
:string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-map! proc s [start end] -> unspecific
|
||||
(string-map! (proc ((proc (:char) :values)
|
||||
:string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-fold kons knil s [start end] -> value
|
||||
;; string-fold-right kons knil s [start end] -> value
|
||||
((string-fold string-fold-right)
|
||||
(proc ((proc (:char :value) :value)
|
||||
:value :string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-unfold p f g seed -> string
|
||||
(string-unfold (proc ((proc (:value) :boolean)
|
||||
(proc (:value) :char)
|
||||
(proc (:value) :value)
|
||||
:value)
|
||||
:string))
|
||||
|
||||
; Enough is enough.
|
||||
; ;; string-unfoldn p f g seed ... -> string
|
||||
; (string-unfoldn (proc ((procedure :values :boolean)
|
||||
; (procedure :values :char)
|
||||
; (procedure :values :values)
|
||||
; &rest :value)
|
||||
; :string))
|
||||
|
||||
;; string-tabulate proc len -> string
|
||||
(string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-for-each proc s [start end] -> unspecific
|
||||
;; string-iter proc s [start end] -> unspecific
|
||||
((string-for-each string-iter)
|
||||
(proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-every? pred s [start end]
|
||||
;; string-any pred s [start end]
|
||||
(string-every?
|
||||
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
|
||||
:boolean))
|
||||
(string-any
|
||||
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-compare string1 string2 lt-proc eq-proc gt-proc
|
||||
;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc
|
||||
((string-compare string-compare-ci)
|
||||
(proc (:string :string (proc (:exact-integer) :values)
|
||||
(proc (:exact-integer) :values)
|
||||
(proc (:exact-integer) :values))
|
||||
:values))
|
||||
|
||||
;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt
|
||||
;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt
|
||||
((substring-compare substring-compare-ci)
|
||||
(proc (:string :exact-integer :exact-integer
|
||||
:string :exact-integer :exact-integer
|
||||
(proc (:exact-integer) :values)
|
||||
(proc (:exact-integer) :values)
|
||||
(proc (:exact-integer) :values))
|
||||
:values))
|
||||
|
||||
;; string< string1 string2
|
||||
((string= string< string> string<= string>= string<>
|
||||
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>)
|
||||
(proc (&rest :string) :value))
|
||||
|
||||
;; substring< string1 start1 end1 string2 start2 end2
|
||||
((substring= substring<> substring-ci= substring-ci<>
|
||||
substring< substring> substring-ci< substring-ci>
|
||||
substring<= substring>= substring-ci<= substring-ci>=)
|
||||
(proc (:string :exact-integer :exact-integer
|
||||
:string :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-upper-case? string [start end]
|
||||
;; string-lower-case? string [start end]
|
||||
((string-upper-case? string-lower-case?)
|
||||
(proc (:string &opt :exact-integer :exact-integer) :boolean))
|
||||
|
||||
;; capitalize-string string [start end]
|
||||
;; capitalize-words string [start end]
|
||||
;; string-downcase string [start end]
|
||||
;; string-upcase string [start end]
|
||||
;; capitalize-string! string [start end]
|
||||
;; capitalize-words! string [start end]
|
||||
;; string-downcase! string [start end]
|
||||
;; string-upcase! string [start end]
|
||||
((capitalize-string capitalize-words string-downcase string-upcase)
|
||||
(proc (:string &opt :exact-integer :exact-integer) :string))
|
||||
((capitalize-string! capitalize-words! string-downcase! string-upcase!)
|
||||
(proc (:string &opt :exact-integer :exact-integer) :unspecific))
|
||||
|
||||
;; string-take string nchars
|
||||
;; string-drop string nchars
|
||||
((string-take string-drop) (proc (:string :exact-integer) :string))
|
||||
|
||||
;; string-padl string k [char start end]
|
||||
;; string-padr string k [char start end]
|
||||
((string-padl string-padr)
|
||||
(proc (:string :exact-integer &opt :char :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-trim string [char/char-set/pred start end]
|
||||
;; string-triml string [char/char-set/pred start end]
|
||||
;; string-trimr string [char/char-set/pred start end]
|
||||
((string-trim string-triml string-trimr)
|
||||
(proc (:string &opt :value :exact-integer :exact-integer)
|
||||
:string))
|
||||
|
||||
;; string-filter char/char-set/pred string [start end]
|
||||
;; string-delete char/char-set/pred string [start end]
|
||||
((string-filter string-delete)
|
||||
(proc (:value :string &opt :exact-integer :exact-integer) :string))
|
||||
|
||||
;; string-index string char/char-set/pred [start end]
|
||||
;; string-index-right string char/char-set/pred [end start]
|
||||
;; string-skip string char/char-set/pred [start end]
|
||||
;; string-skip-right string char/char-set/pred [end start]
|
||||
((string-index string-index-right string-skip string-skip-right)
|
||||
(proc (:string :value &opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-prefix-count string1 string2
|
||||
;; string-suffix-count string1 string2
|
||||
;; string-prefix-count-ci string1 string2
|
||||
;; string-suffix-count-ci string1 string2
|
||||
((string-prefix-count string-prefix-count-ci
|
||||
string-suffix-count string-suffix-count-ci)
|
||||
(proc (:string :string) :exact-integer))
|
||||
|
||||
;; substring-prefix-count string1 start1 end1 string2 start2 end2
|
||||
;; substring-suffix-count string1 start1 end1 string2 start2 end2
|
||||
;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2
|
||||
;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2
|
||||
((substring-prefix-count substring-prefix-count-ci
|
||||
substring-suffix-count substring-suffix-count-ci)
|
||||
(proc (:string :exact-integer :exact-integer
|
||||
:string :exact-integer :exact-integer)
|
||||
:exact-integer))
|
||||
|
||||
|
||||
;; string-prefix? string1 string2
|
||||
;; string-suffix? string1 string2
|
||||
;; string-prefix-ci? string1 string2
|
||||
;; string-suffix-ci? string1 string2
|
||||
((string-prefix? string-prefix-ci?
|
||||
string-suffix? string-suffix-ci?)
|
||||
(proc (:string :string) :boolean))
|
||||
|
||||
;; substring-prefix? string1 start1 end1 string2 start2 end2
|
||||
;; substring-suffix? string1 start1 end1 string2 start2 end2
|
||||
;; substring-prefix-ci? string1 start1 end1 string2 start2 end2
|
||||
;; substring-suffix-ci? string1 start1 end1 string2 start2 end2
|
||||
((substring-prefix? substring-prefix-ci?
|
||||
substring-suffix? substring-suffix-ci?)
|
||||
(proc (:string :exact-integer :exact-integer
|
||||
:string :exact-integer :exact-integer)
|
||||
:boolean))
|
||||
|
||||
;; substring? pattern string [start end]
|
||||
;; substring-ci? pattern string [start end]
|
||||
((substring? substring-ci?)
|
||||
(proc (:string :string &opt :exact-integer :exact-integer)
|
||||
:value))
|
||||
|
||||
;; string-fill! string char [start end]
|
||||
(string-fill! (proc (:string :char &opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-copy! to tstart from [fstart fend]
|
||||
(string-copy! (proc (:string :exact-integer :string
|
||||
&opt :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-copy s [start end] -> string
|
||||
;; substring s start [end] -> string
|
||||
(string-copy (proc (:string &opt :exact-integer :exact-integer) :string))
|
||||
(substring (proc (:string :exact-integer &opt :exact-integer) :string))
|
||||
|
||||
;; string-reverse s [start end]
|
||||
;; string-reverse! s [start end]
|
||||
(string-reverse (proc (:string &opt :exact-integer :exact-integer) :string))
|
||||
(string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific))
|
||||
|
||||
;; reverse-list->string char-list
|
||||
;; string->list s [start end]
|
||||
;; string-concat string-list
|
||||
;; string-concat/shared string-list
|
||||
;; string-append/shared s ...
|
||||
(reverse-list->string (proc (:value) :string))
|
||||
(string->list (proc (:string &opt :exact-integer :exact-integer) :value))
|
||||
((string-concat string-concat/shared) (proc (:value) :string))
|
||||
(string-append/shared (proc (&rest :string) :string))
|
||||
|
||||
;; xsubstring s from [to start end]
|
||||
;; string-xcopy! target tstart s from [to start end]
|
||||
(xsubstring (proc (:string :exact-integer &opt
|
||||
:exact-integer :exact-integer :exact-integer)
|
||||
:string))
|
||||
(string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt
|
||||
:exact-integer :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
|
||||
;; string-null? s
|
||||
(string-null? (proc (:string) :boolean))
|
||||
|
||||
(join-strings (proc (:value &opt :string :symbol) :string))
|
||||
|
||||
;; Here are the R4RS procs
|
||||
(string? (proc (:value) :boolean))
|
||||
(make-string (proc (:exact-integer &opt :char) :string))
|
||||
(string (proc (&rest :char) :string))
|
||||
(string-length (proc (:string) :exact-integer))
|
||||
(string-ref (proc (:string :exact-integer) :char))
|
||||
(string-set! (proc (:string :exact-integer :char) :unspecific))
|
||||
|
||||
; Not provided by string-lib.
|
||||
;((string=? string-ci=? string<? string-ci<?
|
||||
; string>? string-ci>? string<=? string-ci<=?
|
||||
; string>=? string-ci>=?) (proc (:string :string) :boolean))
|
||||
|
||||
;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!,
|
||||
;; and STRING->LIST. The string-lib types are different -- extended.
|
||||
;(substring (proc (:string :exact-integer :exact-integer) :string))
|
||||
;(string-copy (proc (:string) :string))
|
||||
;(string-fill! (proc (:string :char) :unspecific))
|
||||
;(string->list (proc (:string) :value))
|
||||
|
||||
(string-append (proc (&rest :string) :string))
|
||||
(list->string (proc (:value) :string))
|
||||
))
|
||||
|
||||
|
||||
(define-interface string-lib-internals-interface
|
||||
(export
|
||||
(parse-final-start+end (proc ((procedure :values :values) :string :value)
|
||||
(some-values :exact-integer :exact-integer)))
|
||||
(parse-start+end (proc ((procedure :values :values) :string :value)
|
||||
(some-values :exact-integer :exact-integer :value)))
|
||||
(check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer)
|
||||
:unspecific))
|
||||
(make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean))
|
||||
:vector))))
|
||||
|
||||
|
||||
(define-structures ((string-lib string-lib-interface)
|
||||
(string-lib-internals string-lib-internals-interface))
|
||||
(open char-set-package
|
||||
receiving
|
||||
error-package
|
||||
let-opt
|
||||
structure-refs
|
||||
scsh-utilities ; FOLD-RIGHT
|
||||
scheme)
|
||||
(access scheme ; Original SUBSTRING
|
||||
silly) ; Primitive reverse-list->string
|
||||
(files stringlib)
|
||||
(optimize auto-integrate))
|
||||
|
||||
|
||||
|
||||
;;; CPP Lib
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Character->Character Partial functions
|
||||
|
||||
;;; Many of these types are pretty weak, but there is no way to
|
||||
;;; specify that a parameter must be a particular record type.
|
||||
;;; Every little bit helps, though.
|
||||
|
||||
(define-interface ccp-lib-interface
|
||||
(export
|
||||
;; ccp? x -> boolean
|
||||
(ccp? (proc (:value) :boolean))
|
||||
|
||||
;; ccp-domain ccp -> char-set
|
||||
(ccp-domain (proc (:value) :value)) ; Not very informative.
|
||||
|
||||
;; ccp-copy ccp -> ccp
|
||||
(ccp-copy (proc (:value) :value))
|
||||
|
||||
;; ccp= ccp1 ccp2 ...
|
||||
;; ccp<= ccp1 ccp2 ...
|
||||
((ccp= ccp<=) (proc (&rest :value) :boolean)) ; Not very informative.
|
||||
|
||||
;; ccp-fold kons knil ccp -> value
|
||||
(ccp-fold (proc ((proc (:char :char :value) :value) :value :value) :value))
|
||||
|
||||
;; ccp-for-each proc ccp
|
||||
(ccp-for-each (proc ((proc (:char :char) :values)) :unspecific))
|
||||
|
||||
;; ccp->alist ccp -> alist
|
||||
(ccp->alist (proc (:value) :value))
|
||||
|
||||
;; ccp-restrict ccp cset -> ccp
|
||||
;; ccp-restrict! ccp cset -> ccp
|
||||
((ccp-restrict ccp-restrict!) (proc (:value :value) :value))
|
||||
|
||||
;; ccp-adjoin ccp from-char1 to-char1 ... -> ccp
|
||||
;; ccp-adjoin! ccp from-char1 to-char1 ... -> ccp
|
||||
;; ccp-delete ccp from-char1 ... -> ccp
|
||||
;; ccp-delete! ccp from-char1 ... -> ccp
|
||||
((ccp-adjoin ccp-adjoin!) (proc (:value &rest :char) :value))
|
||||
((ccp-delete ccp-delete!) (proc (:value &rest :char) :value))
|
||||
|
||||
;; ccp-extend ccp1 ... -> ccp
|
||||
;; ccp-extend! ccp1 ... -> ccp
|
||||
((ccp-extend ccp-extend!) (proc (&rest :value) :value))
|
||||
|
||||
;; ccp-compose ccp1 ... -> ccp
|
||||
(ccp-compose (proc (&rest :value) :value))
|
||||
|
||||
;; alist->ccp char/char-alist [ccp] -> ccp
|
||||
;; alist->ccp! char/char-alist [ccp] -> ccp
|
||||
((alist->ccp alist->ccp!) (proc (:value &opt :value) :value))
|
||||
|
||||
;; proc->ccp proc [domain ccp] -> ccp
|
||||
;; proc->ccp! proc [domain ccp] -> ccp
|
||||
((proc->ccp proc->ccp!) (proc ((proc (:char) :char) &opt :value :value)
|
||||
:value))
|
||||
|
||||
;; constant-ccp char [domain ccp] -> ccp
|
||||
;; constant-ccp! char domain ccp -> ccp
|
||||
((constant-ccp constant-ccp!) (proc (:char &opt :value :value) :value))
|
||||
|
||||
;; ccp/mappings from1 to1 ... -> ccp
|
||||
;; extend-ccp/mappings ccp from1 to1 ... -> ccp
|
||||
;; extend-ccp/mappings! ccp from1 to1 ... -> ccp
|
||||
(ccp/mappings (proc (&rest :value) :value))
|
||||
((extend-ccp/mappings extend-ccp/mappings!)
|
||||
(proc (:value &rest :value) :value))
|
||||
|
||||
;; construct-ccp ccp elt ... -> ccp
|
||||
;; construct-ccp! ccp elt ... -> ccp
|
||||
((construct-ccp construct-ccp!) (proc (:value &rest :value) :value))
|
||||
|
||||
;; ccp-unfold p f g seed -> ccp
|
||||
(ccp-unfold (proc ((proc (:value) :boolean)
|
||||
(procedure :value (some-values :char :char))
|
||||
(proc (:value) :value)
|
||||
:value)
|
||||
:value))
|
||||
|
||||
;; tr ccp string [start end] -> string
|
||||
;; ccp-map ccp string [start end] -> string
|
||||
;; ccp-map! ccp string [start end]
|
||||
;; ccp-app ccp char -> char or false
|
||||
((tr ccp-map)
|
||||
(proc (:value :string &opt :exact-integer :exact-integer) :string))
|
||||
(ccp-map! (proc (:value :string &opt :exact-integer :exact-integer) :unspecific))
|
||||
(ccp-app (proc (:value :char) :value))
|
||||
|
||||
;; Primitive CCP's.
|
||||
ccp:0 ccp:1 ccp:upcase ccp:downcase
|
||||
))
|
||||
|
||||
(define-structure ccp-lib ccp-lib-interface
|
||||
(open char-set-package
|
||||
ascii
|
||||
defrec-package
|
||||
string-lib
|
||||
let-opt
|
||||
receiving
|
||||
scsh-utilities ; every
|
||||
error-package
|
||||
scheme)
|
||||
(files ccp)
|
||||
(optimize auto-integrate))
|
|
@ -855,21 +855,6 @@ scheme_value df_scm_sort_filevec(long nargs, scheme_value *args)
|
|||
return SCHFALSE;
|
||||
}
|
||||
|
||||
scheme_value df_filter_stringvec(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char *filter_stringvec(const char *, char const ** , int *);
|
||||
scheme_value ret1;
|
||||
char *r1;
|
||||
int r2;
|
||||
|
||||
cig_check_nargs(3, nargs, "filter_stringvec");
|
||||
r1 = filter_stringvec(cig_string_body(args[2]), (char const ** )AlienVal(args[1]), &r2);
|
||||
ret1 = VECTOR_REF(*args,0);
|
||||
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
|
||||
VECTOR_REF(*args,1) = ENTER_FIXNUM(r2);
|
||||
return ret1;
|
||||
}
|
||||
|
||||
scheme_value df_scm_envvec(long nargs, scheme_value *args)
|
||||
{
|
||||
extern char** scm_envvec(int *);
|
||||
|
|
|
@ -850,23 +850,26 @@
|
|||
;;; I do this one in C, I'm not sure why:
|
||||
;;; It is used by MATCH-FILES.
|
||||
|
||||
(define-foreign %filter-C-strings!
|
||||
(filter_stringvec (string pattern) ((C "char const ** ~a") cvec))
|
||||
static-string ; error message -- #f if no error.
|
||||
integer) ; number of files that pass the filter.
|
||||
;;; 99/7: No one is using this function, so I'm commenting it out.
|
||||
;;; Later, we could tune up the globber or regexp file-matcher to use
|
||||
;;; it (but should shift it into the rx directory). But I should also go
|
||||
;;; to a file-at-a-time cursor model for directory fetching. -Olin
|
||||
|
||||
;(define-foreign %filter-C-strings!
|
||||
; (filter_stringvec (string-desc pattern) ((C "char const ** ~a") cvec))
|
||||
; integer) ; number of files that pass the filter.
|
||||
|
||||
(define (match-files regexp . maybe-dir)
|
||||
(let ((dir (:optional maybe-dir ".")))
|
||||
(check-arg string? dir match-files)
|
||||
(receive (err cvec numfiles)
|
||||
(%open-dir (ensure-file-name-is-nondirectory dir))
|
||||
(if err (errno-error err match-files regexp dir))
|
||||
(receive (err numfiles) (%filter-C-strings! regexp cvec)
|
||||
(if err (error err match-files))
|
||||
(%sort-file-vector cvec numfiles)
|
||||
(let ((files (C-string-vec->Scheme&free cvec numfiles)))
|
||||
(vector->list files))))))
|
||||
;(define (match-files regexp . maybe-dir)
|
||||
; (let ((dir (:optional maybe-dir ".")))
|
||||
; (check-arg string? dir match-files)
|
||||
; (receive (err cvec numfiles)
|
||||
; (%open-dir (ensure-file-name-is-nondirectory dir))
|
||||
; (if err (errno-error err match-files regexp dir))
|
||||
; (receive (numfiles) (%filter-C-strings! regexp cvec)
|
||||
; ;(if err (error err match-files))
|
||||
; (%sort-file-vector cvec numfiles)
|
||||
; (let ((files (C-string-vec->Scheme&free cvec numfiles)))
|
||||
; (vector->list files))))))
|
||||
|
||||
|
||||
;;; Environment manipulation
|
||||
|
@ -875,7 +878,7 @@
|
|||
;;; (var . val) / "var=val" rep conversion:
|
||||
|
||||
(define (split-env-string var=val)
|
||||
(let ((i (index var=val #\=)))
|
||||
(let ((i (string-index var=val #\=)))
|
||||
(if i (values (substring var=val 0 i)
|
||||
(substring var=val (+ i 1) (string-length var=val)))
|
||||
(error "No \"=\" in environment string" var=val))))
|
||||
|
|
|
@ -16,30 +16,18 @@
|
|||
(define (delete pred lis)
|
||||
(filter (lambda (x) (not (pred x))) lis))
|
||||
|
||||
(define (index str c . maybe-start)
|
||||
(let ((start (max 0 (:optional maybe-start 0)))
|
||||
(len (string-length str)))
|
||||
(do ((i start (+ 1 i)))
|
||||
((or (>= i len)
|
||||
(char=? c (string-ref str i)))
|
||||
(and (< i len) i)))))
|
||||
(define (fold kons knil lis)
|
||||
(let lp ((lis lis) (ans knil))
|
||||
(if (pair? lis)
|
||||
(lp (cdr lis) (kons (car lis) ans))
|
||||
ans)))
|
||||
|
||||
(define (rindex str c . maybe-start)
|
||||
(let* ((len (string-length str))
|
||||
(start (min (:optional maybe-start len)
|
||||
len)))
|
||||
(do ((i (- start 1) (- i 1)))
|
||||
((or (< i 0)
|
||||
(char=? c (string-ref str i)))
|
||||
(and (>= i 0) i)))))
|
||||
|
||||
;;; (f (f (f zero x1) x2) x3)
|
||||
;;; [Richard's does (f x3 (f x2 (f x1 zero)))
|
||||
(define (reduce f zero l)
|
||||
(letrec ((lp (lambda (val rest)
|
||||
(if (pair? rest) (lp (f val (car rest)) (cdr rest))
|
||||
val))))
|
||||
(lp zero l)))
|
||||
(define (fold-right kons knil lis)
|
||||
(let recur ((lis lis))
|
||||
(if (pair? lis)
|
||||
(let ((head (car lis))) ; Won't need LIS after RECUR call.
|
||||
(kons head (recur (cdr lis))))
|
||||
knil)))
|
||||
|
||||
(define (filter pred list)
|
||||
(letrec ((filter (lambda (list)
|
||||
|
@ -62,8 +50,6 @@
|
|||
(lp (cdr list))))))))
|
||||
(lp list)))
|
||||
|
||||
(define any first)
|
||||
|
||||
;;; Returns the first true value produced by PRED, not the list element
|
||||
;;; that satisfied PRED.
|
||||
|
||||
|
@ -74,26 +60,27 @@
|
|||
(lp (cdr list)))))))
|
||||
(lp list)))
|
||||
|
||||
(define any? first?)
|
||||
(define any first?)
|
||||
|
||||
(define (every? pred list)
|
||||
(letrec ((lp (lambda (list)
|
||||
(define (every pred list)
|
||||
(or (not (pair? list))
|
||||
(and (pred (car list))
|
||||
(lp (cdr list)))))))
|
||||
(lp list)))
|
||||
(let lp ((head (car list)) (tail (cdr list)))
|
||||
(if (pair? tail)
|
||||
(and (pred head) (lp (car tail) (cdr tail)))
|
||||
(pred head))))) ; Tail-call the last PRED call.
|
||||
|
||||
|
||||
(define (mapv f v)
|
||||
(let* ((len (vector-length v))
|
||||
(ans (make-vector len)))
|
||||
(do ((i 0 (+ i 1)))
|
||||
((= i len) ans)
|
||||
((>= i len) ans)
|
||||
(vector-set! ans i (f (vector-ref v i))))))
|
||||
|
||||
(define (mapv! f v)
|
||||
(let ((len (vector-length v)))
|
||||
(do ((i 0 (+ i 1)))
|
||||
((= i len) v)
|
||||
((>= i len) v)
|
||||
(vector-set! v i (f (vector-ref v i))))))
|
||||
|
||||
(define (vector-every? pred v)
|
||||
|
@ -115,9 +102,42 @@
|
|||
((< i 0) v)
|
||||
(vector-set! v i (init i)))))
|
||||
|
||||
(define (vector-append . vecs)
|
||||
(let* ((vlen (fold (lambda (v len) (+ (vector-length v) len)) 0 vecs))
|
||||
(ans (make-vector vlen)))
|
||||
(let lp1 ((vecs vecs) (to 0))
|
||||
(if (pair? vecs)
|
||||
(let* ((vec (car vecs))
|
||||
(len (vector-length vec)))
|
||||
(let lp2 ((from 0) (to to))
|
||||
(cond ((< from len)
|
||||
(vector-set! ans to (vector-ref vec from))
|
||||
(lp2 (+ from 1) (+ to 1)))
|
||||
(else (lp1 (cdr vecs) to)))))))
|
||||
ans))
|
||||
|
||||
|
||||
(define (vfold kons knil v)
|
||||
(let ((len (vector-length v)))
|
||||
(do ((i 0 (+ i 1))
|
||||
(ans knil (kons (vector-ref v i) ans)))
|
||||
((>= i len) ans))))
|
||||
|
||||
(define (vfold-right kons knil v)
|
||||
(do ((i (- (vector-length v) 1) (- i 1))
|
||||
(ans knil (kons (vector-ref v i) ans)))
|
||||
((< i 0) ans)))
|
||||
|
||||
|
||||
;;; We loophole the call to ERROR -- the point is that perhaps the
|
||||
;;; user will interact with a breakpoint, and proceed with a new
|
||||
;;; value, which we will then pass to a new invocation of CHECK-ARG
|
||||
;;; for approval.
|
||||
(define (check-arg pred val caller)
|
||||
(if (pred val) val
|
||||
(check-arg pred (error "Bad argument" val pred caller) caller)))
|
||||
(check-arg pred
|
||||
(loophole :value (error "Bad argument" val pred caller))
|
||||
caller)))
|
||||
|
||||
(define (conjoin f g)
|
||||
(lambda args (and (apply f args) (apply g args))))
|
||||
|
@ -170,31 +190,3 @@
|
|||
(let ((f (round x)))
|
||||
(if (inexact? f) (inexact->exact f) f)))
|
||||
|
||||
|
||||
;;; Copy string SOURCE into TARGET[start,...]
|
||||
|
||||
(define (string-replace! target start source)
|
||||
(let ((len (string-length source)))
|
||||
(do ((i (+ start len -1) (- i 1))
|
||||
(j (- len 1) (- j 1)))
|
||||
((< j 0) target)
|
||||
(string-set! target i (string-ref source j)))))
|
||||
|
||||
|
||||
;;; Copy SOURCE[source-start, source-end) into TARGET[start,)
|
||||
|
||||
(define (substring-replace! target start source source-start source-end)
|
||||
(do ((i (+ start (- source-end source-start) -1) (- i 1))
|
||||
(j (- source-end 1) (- j 1)))
|
||||
((< j source-start) target)
|
||||
(string-set! target i (string-ref source j))))
|
||||
|
||||
|
||||
;;; Compute (... (f (f (f zero c0) c1) c2) ...)
|
||||
|
||||
(define (string-reduce f zero s)
|
||||
(let ((len (string-length s)))
|
||||
(let lp ((v zero) (i 0))
|
||||
(if (= i len)
|
||||
v
|
||||
(lp (f v (string-ref s i)) (+ i 1))))))
|
||||
|
|
Loading…
Reference in New Issue