Checking in a slew of systemic changes:

- Switching scsh over to the SRE regexp system.
    - Upgraded string & character handling
    - Switching code over to list-lib SRFI names. Some switchover to
      probably new string-lib SRFI names, though that SRFI hasn't happened,
      so it's probably a waste of time at the moment to stress out about it.
    - General switching from "reduce-" lexeme to "fold-" lexeme.
	-Olin

A per-file breakdown of changes:
    ccp.scm		New
    char-set.scm	New features
    stringlib.scm	New
    stringpack.scm	New

    awk.scm		SRE
    filemtch.scm	SRE string-lib
    fname.scm		string-lib
    fr.scm		SRE string-lib
    glob.scm		SRE
    procobj.scm		list-lib
    rdelim.scm		SRE
    re.c		SRE killed
    re.scm		SRE killed
    re1.c		SRE moved to rx/ dir
    re1.h		SRE moved to rx/ dir
    rx/...		SRE General fitting-it-in to the upgraded scsh.
    scsh-interfaces.scm
    scsh-package.scm
    scsh.scm		type-check loophole added; string-lib; list-lib
		    	reduce-port -> foldl-port

    syscalls.scm	string-lib; diked out unused MATCH-FILES code
    syscalls.c

    utilities.scm	string-lib; {any?,every?} -> {any,every}
    procobj		list-lib
This commit is contained in:
shivers 1999-07-11 20:38:42 +00:00
parent 1bb7a2e494
commit 96d9b0e068
25 changed files with 3188 additions and 978 deletions

View File

@ -1,10 +1,27 @@
;;; An awk loop, after the design of David Albertz and Olin Shivers.
;;; Copyright (c) 1994 by Olin Shivers.
;;; This uses the new RX SRE syntax. Defines a Clinger-Rees expander for
;;; the old, pre-SRE syntax AWK, and one for the new SRE-syntax AWK.
;;; Imports:
;;; - Requires RECEIVE from RECEIVING package.
;;; - Would require DESTRUCTURE from DESTRUCTURING package, but it appears
;;; to be broken, so we hack it w/cars and cdrs.
;;; - Requires STRING-MATCH from SCSH package.
;;; - Requires STRING-MATCH and STRING-MATCH? from RE-EXPORTS package.
;;; - Requires regexp manipulation stuff from SRE-SYNTAX-TOOLS
;;; - Requires ERROR from ERROR-PACKAGE.
;;; - Requires ANY and FILTER frm SCSH-UTILITIES.
;;;
;;; Needs error-package receiving sre-syntax-tools scsh-utilities
;;;
;;; Exports:
;;; (expand-awk exp r c) Clinger-Rees macro expander, new syntax
;;; (expand-awk/obsolete exp r c) Clinger-Rees macro expander, old syntax
;;;
;;; next-range next-:range These four functions are used in the
;;; next-range: next-:range: code output by the expander.
;;; Examples:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -15,13 +32,14 @@
;;;
;;; ;;; Count the number of non-comment lines of code in my Scheme source.
;;; (awk (read-line) (line) ((nlines 0))
;;; ("^[ \t]*;" nlines) ; A comment line.
;;; ((: bos (* white) ";") nlines) ; A comment line.
;;; (else (+ nlines 1))) ; Not a comment line.
;;;
;;; ;;; Read numbers, counting the evens and odds.
;;; ;;; Read numbers, counting the evens and odds,
;;; ;;; and printing out sign information.
;;; (awk (read) (val) ((evens 0) (odds 0))
;;; ((zero? val) (display "zero ") (values evens odds)) ; Tell me about
;;; ((> val 0) (display "pos ") (values evens odds)) ; sign, too.
;;; ((zero? val) (display "zero ") (values evens odds))
;;; ((> val 0) (display "pos ") (values evens odds))
;;; (else (display "neg ") (values evens odds))
;;;
;;; ((even? val) (values (+ evens 1) odds))
@ -33,6 +51,17 @@
;;; .
;;; .
;;; <clausen>)
;;;
;;; <clause> ::= (ELSE body ...)
;;; | (:RANGE test1 test2 body ...) ; RANGE :RANGE RANGE: :RANGE:
;;; | (AFTER body ...)
;;; | (test => proc)
;;; | (test ==> vars body ...)
;;; | (test body ...)
;;;
;;; test ::= integer | sre | (WHEN exp) | exp
;;; (sre/exp ambiguities resolved in favor of SRE)
;;; This macro is written using Clinger/Rees explicit-renaming low-level
;;; macros. So it is pretty ugly. It takes a little care to generate
@ -141,17 +170,23 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (expand-awk exp r c)
;;; If STRING-REGEXPS? is true, we use the old, obsolete syntax, where
;;; a test form that is a string, such as "shivers|bdc", is treated as
;;; a regular expression in the Posix string syntax. Otherwise, we use the
;;; new SRE syntax, where strings are treated as SRE constants.
(define (expand-awk exp r c) (really-expand-awk exp r c #f))
(define (expand-awk/obsolete exp r c) (really-expand-awk exp r c #t))
(define (really-expand-awk exp r c string-regexps?)
(let* ((%lambda (r 'lambda)) ; Bind a mess of keywords.
(%let (r 'let))
(%receive (r 'receive))
(%values (r 'values))
(%if (r 'if))
(%eof-object? (r 'eof-object?))
(%after (r 'after))
(%else (r 'else))
(%+ (r '+))
(%make-regexp (r 'make-regexp))
(%rx (r 'rx))
(gensym (let ((i 0))
(lambda (s)
@ -169,7 +204,9 @@
;; Rip the form apart.
(reader-exp (cadr exp))
(rec/field-vars (caddr exp))
;; Replace #F's with gensym'd variables in the record/field vars.
(rec/field-vars (map (lambda (v) (or v (r (gensym "anon-rfval"))))
(caddr exp)))
(rec-var (car rec/field-vars)) ; The var bound to the record.
(rest (cdddr exp))) ; Stuff after the rec&field-vars.
@ -178,10 +215,18 @@
(values #f (car rest) (cdr rest)) ; form.
(values (car rest) (cadr rest) (cddr rest)))
;; If we are doing the old, obsolete Posix-string syntax, map
;; the clause tests over to the new syntax.
(let* ((clauses (if string-regexps?
(map (lambda (clause)
(hack-clause-for-posix-string-syntax clause r c))
clauses)
clauses))
;; Some analysis: what have we got?
;; Range clauses, else clauses, line num tests,...
(let* ((recnum-tests? ; Do any of the clauses test the record
(any? (lambda (clause) ; count? (I.e., any integer tests?)
(recnum-tests? ; Do any of the clauses test the record
(any (lambda (clause) ; count? (I.e., any integer tests?)
(let ((test (car clause)))
(or (integer? test)
(and (range? clause)
@ -191,25 +236,25 @@
;; If any ELSE clauses, bind this to the var in which we
;; will keep the else state, otherwise #f.
(else-var (and (any? (lambda (clause)
(else-var (and (any (lambda (clause)
(c (car clause) %else))
clauses)
(r 'else)))
(r 'else-state)))
;; We compile all of the regexp patterns into regexp
;; We compile all of the *static* regexp patterns into regexp
;; data structures outside the AWK loop. So we need to
;; make a list of all the regexps that are used as tests.
(patterns (apply append
(map (lambda (clause)
(let ((test (car clause)))
(cond ((string? test) (list test))
(cond ((sre-form? test r c) (list test))
((range? clause)
(let ((t1 (cadr clause))
(t2 (caddr clause)))
(append (if (string? t1)
(append (if (sre-form? t1 r c)
(list t1)
'())
(if (string? t2)
(if (sre-form? t2 r c)
(list t2)
'()))))
(else '()))))
@ -223,16 +268,28 @@
(if (member pat ans) ans (cons pat ans)))
'())))
;; An alist matching regexp patterns with the vars to which
;; we will bind their compiled regexp data structure.
(pats/vars (map (lambda (p) (cons p (r (gensym "re."))))
(pats-static? (map (lambda (sre)
(static-regexp? (parse-sre sre r c)))
patterns))
;; A LET-list binding the regexp vars to their compiled regexps.
(regexp-inits (map (lambda (p/v)
`(,(cdr p/v) (,%make-regexp ,(car p/v))))
pats/vars))
;; An alist matching each pattern with the exp that refers
;; to it -- a var if it's static, a Scheme (RX ...) exp otw.
(pats/refs (map (lambda (pat static?)
(cons pat
(if static?
(r (gensym "re."))
`(,%rx ,pat))))
patterns pats-static?))
;; A LET-list binding the regexp vars to their
;; compiled static regexps.
(regexp-inits (apply append
(map (lambda (p/r static?)
(if static?
`((,(cdr p/r) (,%rx ,(car p/r))))
'()))
pats/refs
pats-static?)))
;; Make a list of state vars for the range clauses.
;; For each range clause, we need a boolean var to track
@ -279,7 +336,7 @@
(loop-body (awk-loop-body lp-var rec-var else-var
rec-counter range-vars svars
clauses pats/vars r c))
clauses pats/refs r c))
;; Variables that have to be updated per-iteration, as a LET list.
;; Note that we are careful not to increment the record counter
@ -302,12 +359,32 @@
`(,%if (,%eof-object? ,rec-var) ,after-exp
,loop-body))))))))
;;; This maps a clause in the old, obsolete syntax over to a clause
;;; in the new, SRE syntax.
(define (hack-clause-for-posix-string-syntax clause r c)
(let ((hack-simple-test (lambda (test)
(cond ((string? test)
`(,(r 'posix-string) ,test))
((integer? test) test)
(else `(,(r 'when) ,test)))))
(test (car clause)))
(cond ((range-keyword? test r c)
`(,test ,(hack-simple-test (cadr clause))
,(hack-simple-test (caddr clause))
. ,(cdddr clause)))
((or (c test (r 'else))
(c test (r 'after)))
clause)
(else `(,(hack-simple-test test) . ,(cdr clause))))))
;;; Expand into the body of the awk loop -- the code that tests & executes
;;; each clause, and then jumps to the top of the loop.
(define (awk-loop-body lp-var rec-var else-var rec-counter
range-vars svars clauses pats/vars r c)
range-vars svars clauses pats/refs r c)
(let ((clause-vars (if else-var (cons else-var svars) svars))
(loop-vars (append (if rec-counter (list rec-counter) '())
range-vars
@ -325,7 +402,7 @@
(let ((tail (expand (cdr clauses) (cdr range-vars))))
(expand-range-clause clause tail (car range-vars)
rec-var else-var rec-counter svars
pats/vars
pats/refs
r c)))
((c test %after) ; An AFTER clause. Skip it.
@ -339,7 +416,7 @@
(let ((tail (expand (cdr clauses) range-vars)))
(expand-simple-clause clause tail
rec-var else-var rec-counter svars
pats/vars r c)))))
pats/refs r c)))))
;; No clauses -- just jump to top of loop.
`(,lp-var . ,loop-vars)))))
@ -347,49 +424,92 @@
;;; Make a Scheme expression out of a test form.
;;; Integer i => (= i <record-counter>)
;;; String s => (regexp-exec s <record>)
;;; SRE s => (regexp-search <re> <record>)
;;; (when e) => e
;;; Expression e => e
;;;
;;; If FOR-VALUE? is true, then we do regexp searches with REGEXP-SEARCH,
;;; otherwise, we use the cheaper REGEXP-SEARCH?.
(define (->simple-clause-test test-form rec-var rec-counter pats/vars r)
(define (->simple-clause-test test-form for-value? rec-var rec-counter pats/refs r c)
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
((string? test-form)
(let ((re-var (cond ((assoc test-form pats/vars) => cdr)
(else (error "Impossible AWK error -- unknown regexp"
test-form pats/vars)))))
`(,(r 'regexp-exec) ,re-var ,rec-var)))
((sre-form? test-form r c)
`(,(r (if for-value? 'regexp-search 'regexp-search?))
,(cdr (assoc test-form pats/refs))
,rec-var))
((and (pair? test-form)
(c (r 'when) (car test-form)))
(if (= 2 (length test-form)) (cadr test-form)
(error "Illegal WHEN test in AWK" test-form)))
(else test-form)))
(define (expand-simple-clause clause tail
rec-var else-var rec-counter svars
pats/vars r c)
pats/refs r c)
(let* ((%let (r 'let))
(%= (r '=))
(%string-match (r 'string-match))
(%arrow (r '=>))
(%long-arrow (r '==>))
(%if (r 'if))
(%mss (r 'match:substring))
(test (car clause))
(test (->simple-clause-test test rec-var rec-counter pats/vars r))
(mktest (lambda (for-value?)
(->simple-clause-test test for-value? rec-var
rec-counter pats/refs r c)))
;; Is clause of the form (test => proc)
(arrow? (and (= 3 (length clause))
(c (cadr clause) %arrow)))
;; How about (test ==> (var ...) body ...)?
(long-arrow? (and (< 3 (length clause))
(c (cadr clause) %long-arrow)))
(null-clause-list (null-clause-action else-var svars r))
;; The core form conditionally executes the body.
;; It returns the new else var and the new state vars, if any.
(core (if arrow?
(core (cond (arrow?
(let* ((tv (r 'tval)) ; APP is the actual
(app `(,(caddr clause) ,tv))) ; body: (proc tv).
(app `(,(caddr clause) ,tv)) ; body: (proc tv).
(test (mktest #t)))
`(,%let ((,tv ,test))
(,%if ,tv
,(clause-action (list app) else-var svars r c)
. ,null-clause-list)))
. ,null-clause-list))))
`(,%if ,test ,(clause-action (cdr clause) else-var svars r c)
. ,null-clause-list)))
(long-arrow?
(let* ((tv (r 'tval))
(test (mktest #t))
(bindings ; List of LET bindings for submatches.
(let lp ((i 0)
(vars (caddr clause))
(bindings '()))
(if (pair? vars)
(let ((var (car vars)))
(lp (+ i 1) (cdr vars)
(if var
`((,var (,%mss ,tv ,i)) . ,bindings)
bindings))) ; #F = "don't-care"
bindings))))
`(,%let ((,tv ,test))
(,%if ,tv
(,%let ,bindings ; Bind submatches.
. ,(deblock (clause-action (cdddr clause)
else-var svars
r c)
r c))
. ,null-clause-list))))
(else
`(,%if ,(mktest #f) ,(clause-action (cdr clause)
else-var svars r c)
. ,null-clause-list))))
(loop-vars (if else-var (cons else-var svars) svars)))
@ -402,7 +522,7 @@
(define (expand-range-clause clause tail range-var
rec-var else-var rec-counter svars
pats/vars r c)
pats/refs r c)
(let* ((start-test (cadr clause))
(stop-test (caddr clause))
(body (cdddr clause))
@ -419,10 +539,10 @@
(else (error "Unrecognised range keyword!" clause)))))
;; Convert the start and stop test forms to code.
(start-test (->simple-clause-test start-test rec-var
rec-counter pats/vars r))
(stop-test (->simple-clause-test stop-test rec-var
rec-counter pats/vars r))
(start-test (->simple-clause-test start-test #f rec-var
rec-counter pats/refs r c))
(stop-test (->simple-clause-test stop-test #f rec-var
rec-counter pats/refs r c))
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
(stop-thunk `(,%lambda () ,stop-test))
@ -444,7 +564,6 @@
(tail-exps (deblock tail r c))
(%if (r 'if))
(%receive (r 'receive))
(%let (r 'let))
;; We are hard-wiring the else var to #t after this, so the core

576
scsh/ccp.scm Normal file
View File

@ -0,0 +1,576 @@
;;; Char->char partial maps -*- Scheme -*-
;;; Copyright (C) 1998 by Olin Shivers.
;;; CCPs are an efficient data structure for doing simple string transforms,
;;; similar to the kinds of things you would do with the tr(1) program.
;;;
;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit
;;; character types would need a more sophisticated data structure, tuned
;;; for sparseness. I would suggest something like this:
;;; (define-record ccp
;;; domain ; The domain char-set
;;; map ; Sorted vector of (char . string) pairs
;;; ; specifying the map.
;;; id?) ; If true, mappings not specified by MAP are
;;; ; identity mapping. If false, MAP must
;;; ; specify a mapping for every char in DOMAIN.
;;;
;;; A (char . string) elements in MAP specifies a mapping for the contiguous
;;; sequence of L chars beginning with CHAR (in the sequence of the underlying
;;; char type representation), where L is the length of STRING. These MAP elements
;;; are sorted by CHAR, so that binary search can be used to get from an input
;;; character C to the right MAP element quickly.
;;;
;;; This representation should be reasonably compact for standard mappings on,
;;; say, a Unicode CCP. An implementation might wish to have a cache field
;;; in the record for storing the full 8kb bitset when performing ccp-map
;;; operations. Or, an implementation might want to store the Latin-1 subset
;;; of the map in a dense format, and keep the remainder in a sparse format.
(define num-chars (char-set-size char-set:full)) ; AKA 256.
(define-record ccp
domain ; The domain char-set
dshared? ; Is the domain value shared or linear?
map ; 256-elt string
mshared?) ; Is the map string shared or linear?
;;; Accessors and setters that manage the linear bookkeeping
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (ccp-domain ccp)
(set-ccp:dshared? ccp #t)
(ccp:domain ccp))
;;; CCP is a linear ccp. PROC is a domain->domain function; it must be
;;; linear in its parameter and result.
;;;
;;; Updates the domain of the CCP with PROC, returns the resulting
;;; CCP; reuses the old one to construct the new one.
(define (restrict-linear-ccp-domain ccp proc)
(let ((new-d (proc (if (ccp:dshared? ccp)
(begin (set-ccp:dshared? ccp #f)
(char-set-copy (ccp:domain ccp)))
(ccp:domain ccp)))))
(set-ccp:domain ccp new-d)
ccp))
;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function.
;;; It is passed a linear domain and cmap string. It may side-effect
;;; the cmap string, and returns the resulting updated domain.
;;; We return the resulting CCP, reusing the parameter to construct it.
(define (linear-update-ccp ccp proc)
(let* ((cmap (if (ccp:mshared? ccp)
(begin (set-ccp:mshared? ccp #f)
(string-copy (ccp:map ccp)))
(ccp:map ccp)))
(new-d (proc (if (ccp:dshared? ccp)
(begin (set-ccp:dshared? ccp #f)
(char-set-copy (ccp:domain ccp)))
(ccp:domain ccp))
cmap)))
(set-ccp:domain ccp new-d)
ccp))
;;; Return CCP's map field, and mark it as shared. CCP functions that
;;; restrict a ccp's domain share map strings, so they use this guy.
(define (ccp:map/shared ccp)
(set-ccp:mshared? ccp #t)
(ccp:map ccp))
(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f
(string-copy (ccp:map ccp)) #f))
;;; N-ary equality relation for partial maps
(define (ccp= ccp1 . rest)
(let ((domain (ccp:domain ccp1))
(cmap (ccp:map ccp1)))
(every (lambda (ccp2)
(and (char-set= domain (ccp:domain ccp2))
(let ((cmap2 (ccp:map ccp2)))
(char-set-every? (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap i)
(string-ref cmap2 i))))
domain))))
rest)))
;;; N-ary subset relation for partial maps
(define (ccp<= ccp1 . rest)
(let lp ((domain1 (ccp:domain ccp1))
(cmap1 (ccp:map ccp1))
(rest rest))
(or (not (pair? rest))
(let* ((ccp2 (car rest))
(domain2 (ccp:domain ccp2))
(cmap2 (ccp:map ccp2))
(rest (cdr rest)))
(and (char-set<= domain1 domain2)
(let ((cmap2 (ccp:map ccp2)))
(char-set-every? (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap1 i)
(string-ref cmap2 i))))
domain1))
(lp domain2 cmap2 rest))))))
;;; CCP iterators
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (ccp-fold kons knil ccp)
(let ((cmap (ccp:map ccp)))
(char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v))
knil
(ccp:domain ccp))))
(define (ccp-for-each proc ccp)
(let ((cmap (ccp:map ccp)))
(char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c))))
(ccp:domain ccp))))
(define (ccp->alist ccp)
(ccp-fold (lambda (from to alist) (cons (cons from to) alist))
'()
ccp))
;;; CCP-RESTRICT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Restrict a ccp's domain.
(define (ccp-restrict ccp cset)
(make-ccp (char-set-intersection cset (ccp:domain ccp))
#f
(ccp:map/shared ccp)
#t))
(define (ccp-restrict! ccp cset)
(restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset))))
;;; CCP-ADJOIN ccp from-char1 to-char1 ...
;;; CCP-DELETE ccp char1 ...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Add & delete mappings to/from a ccp.
(define (ccp-delete ccp . chars)
(make-ccp (apply char-set-delete (ccp:domain ccp) chars)
#f
(ccp:map/shared ccp)
#t))
(define (ccp-delete! ccp . chars)
(restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars))))
(define (ccp-adjoin ccp . chars)
(let ((cmap (string-copy (ccp:map ccp))))
(make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars)
#f
cmap
#f)))
(define (ccp-adjoin! ccp . chars)
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars))))
(define (install-ccp-adjoin! cmap domain chars)
(let lp ((chars chars) (d domain))
(if (pair? chars)
(let ((from (car chars))
(to (cadr chars))
(chars (cddr chars)))
(string-set! cmap (char->ascii from) to)
(lp chars (char-set-adjoin! d from)))
d)))
;;; CCP-EXTEND ccp1 ...
;;; CCP-EXTEND! ccp1 ccp2 ...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Extend ccp1 with ccp2, etc.
(define (ccp-extend . ccps)
(if (pair? ccps)
(let ((ccp0 (car ccps))
(ccps (cdr ccps)))
(if (pair? ccps)
(let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap.
;; The FOLD installs each ccp in CCPS into CMAP and produces
;; the new domain.
(make-ccp (fold (lambda (ccp d)
(install-ccp-extension! cmap d ccp))
(char-set-copy (ccp:domain ccp0))
ccps)
#f cmap #f))
ccp0)) ; Only 1 parameter
ccp:0)) ; 0 parameters
(define (ccp-extend! ccp0 . ccps)
(linear-update-ccp ccp0
(lambda (domain cmap)
(fold (lambda (ccp d) (install-ccp-extension! cmap d ccp))
domain
ccps))))
;;; Side-effect CMAP, linear-update and return DOMAIN.
(define (install-ccp-extension! cmap domain ccp)
(let ((cmap1 (ccp:map ccp))
(domain1 (ccp:domain ccp)))
(char-set-for-each (lambda (c)
(let ((i (char->ascii c)))
(string-set! cmap i (string-ref cmap1 i))))
domain1)
(char-set-union! domain domain1)))
;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; For each character C-IN in the original domain, we push it
;;; through the pipeline of CCPs. If we ever land outside the
;;; domain of a ccp, we punt C-IN. If we push it all the way
;;; through, we add C-IN to our result domain, and add the mapping
;;; into the cmap we are assembling.
;;;
;;; Looping this way avoids building up intermediate temporary
;;; CCPs. If CCP's were small bitsets, we might be better off
;;; slicing the double-nested loops the other way around.
(define (ccp-compose . ccps)
(cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1
((not (pair? (cdr ccps))) (car ccps)) ; 1 arg
(else
(let* ((v (list->vector ccps))
(vlen-2 (- (vector-length v) 2))
(cmap (make-string num-chars))
(d1 (ccp:domain (vector-ref v (+ vlen-2 1))))
(d (char-set-fold (lambda (c-in d)
(let lp ((c c-in) (i vlen-2))
(if (>= i 0)
(let ((ccp (vector-ref v i)))
(if (char-set-contains? (ccp:domain ccp) c)
(lp (string-ref (ccp:map ccp)
(char->ascii c))
(- i 1))
;; Lose: remove c-in from d.
(char-set-delete! d c-in)))
;; Win: C-IN -> C
(begin (string-set! cmap
(char->ascii c-in)
c)
d))))
(char-set-copy d1)
d1)))
(make-ccp d #f cmap #f)))))
;;; ALIST->CPP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (alist->ccp cc-alist . maybe-base-ccp)
(let ((base (:optional maybe-base-ccp ccp:0)))
(if (pair? cc-alist)
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-ccp-alist! cmap
(char-set-copy (ccp:domain base))
cc-alist)
#f cmap #f))
base)))
(define (alist->ccp! alist base)
(linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist))))
;;; Side-effect CMAP, linear-update and return DOMAIN.
(define (install-ccp-alist! cmap domain alist)
(fold (lambda (from/to d) (let ((from (car from/to))
(to (cdr from/to)))
(string-set! cmap (char->ascii from) to)
(char-set-adjoin! domain from)))
domain
alist))
;;; PROC->CCP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (proc->ccp proc [domain base-ccp])
(define (proc->ccp proc . args)
(let-optionals args ((proc-domain char-set:full)
(base ccp:0))
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base))
proc proc-domain)
#f cmap #f))))
(define (proc->ccp! proc proc-domain base)
(linear-update-ccp base
(lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain))))
(define (install-ccp-proc! cmap domain proc proc-domain)
(char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c)))
proc-domain)
(char-set-union! domain proc-domain))
;;; CONSTANT-CCP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (constant-ccp char [domain base-ccp])
;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR.
;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0.
(define (constant-ccp char . args)
(let-optionals args ((char-domain char-set:full) (base ccp:0))
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base))
char char-domain)
#f cmap #f))))
(define (constant-ccp! char char-domain base)
(linear-update-ccp base
(lambda (d cmap) (install-constant-ccp! cmap d char char-domain))))
;;; Install the constant mapping into CMAP0 by side-effect,
;;; linear-update & return DOMAIN0 with the constant-mapping's domain.
(define (install-constant-ccp! cmap0 domain0 char char-domain)
(char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char))
char-domain)
(char-set-union! domain0 char-domain))
;;; CCP/MAPPINGS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp
;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp
;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp
;;; Each FROM element is either a string or a (lo-char . hi-char) range.
;;; Each TO element is either a string or a lo-char. Strings are replicated
;;; to match the length of the corresponding FROM element.
;;; CCP/MAPPINGS's base CCP is CCP:0
;;;
;;; Tedious code.
;;; Internal utility.
;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect.
;;; Return the new domain.
(define (install-ccp-mapping-pair! cmap domain from to)
;; Tedium -- four possibilities here:
;; str->str, str->lo-char,
;; range->str, range->lo-char.
(if (string? from)
(if (string? to)
;; "abc" -> "ABC"
(let ((len1 (string-length from))
(len2 (string-length to)))
(let lp2 ((i (- len1 1))
(j (modulo (- len2 1) len1))
(d domain))
(if (>= i 0)
(let ((c (string-ref from i)))
(string-set! cmap
(char->ascii c)
(string-ref to i))
(lp2 (- i 1)
(- (if (> j 0) j len2) 1)
(char-set-adjoin! d c)))
d)))
;; "abc" -> #\A
(let lp2 ((i (- (string-length from) 1))
(j (char->ascii to))
(d domain))
(if (>= i 0)
(let ((c (string-ref from i)))
(string-set! cmap
(char->ascii c)
(ascii->char j))
(lp2 (- i 1)
(- j 1)
(char-set-adjoin! d c)))
d)))
(let ((from-start (char->ascii (car from)))
(from-end (char->ascii (cdr from))))
(if (string? to)
(let ((len2-1 (- (string-length to) 1)))
;; (#\a . #\c) -> "ABC"
(let lp2 ((i from-start) (j 0) (d domain))
(if (<= i from-end)
(let ((c (string-ref to j)))
(string-set! cmap i c)
(lp2 (+ i 1)
(if (= j len2-1) 0 (+ j 1))
(char-set-adjoin! d c)))
d)))
;; (#\a . #\c) -> #\A
(do ((i from-start (+ i 1))
(j (char->ascii to) (+ j 1))
(d domain (begin (string-set! cmap i (ascii->char j))
(char-set-adjoin d (ascii->char i)))))
((> i from-end) d))))))
;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN.
(define (install-mapping-pairs cmap domain args)
(let lp ((domain domain) (args args))
(if (pair? args)
(lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args))
(cddr args))
domain)))
(define (ccp/mappings . args)
(let ((cmap (make-string num-chars)))
(make-ccp (install-mapping-pairs (make-string num-chars)
(char-set-copy char-set:empty)
args)
#f cmap #f)))
(define (extend-ccp/mappings base . args)
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args)
#f cmap #f)))
(define (extend-ccp/mappings! base . args)
(linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args))))
;;; CONSTRUCT-CCP! ccp elt ...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; The kitchen-sink constructor; static typing be damned.
;;; ELTS are interpreted as follows:
;;; (lo-char . hi-char) to-string|lo-char ; ccp/range
;;; from-string to-string|lo-char ; ccp/range
;;; ccp ; ccp-extend
;;; alist ; alist->ccp
;;; domain char ; ccp-constant
;;; domain proc ; proc->ccp
(define (construct-ccp! ccp . elts)
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts))))
(define (construct-ccp base . elts)
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts)
#f cmap #f)))
;;; Install the mappings into CMAP by side-effect,
;;; linear-update & return DOMAIN with the final domain.
(define (install-ccp-construct! cmap domain elts)
(let lp ((d domain) (elts elts))
;(format #t "d=~s elts=~s\n" d elts)
(if (not (pair? elts)) d
(let ((elt (car elts))
(elts (cdr elts)))
(cond ((pair? elt)
(cond ((pair? (car elt)) ; ELT is an alist.
(lp (install-ccp-alist! cmap d elt) elts))
((char? (car elt)) ; ELT is (lo-char . hi-char) range.
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
(cdr elts)))
(else (error "Illegal elt to construct-ccp" elt))))
((string? elt)
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
(cdr elts)))
((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts))
((char-set? elt)
(let ((elt2 (car elts))
(elts (cdr elts)))
(lp (cond ((char? elt2)
(install-constant-ccp! cmap d elt2 elt))
((procedure? elt2)
(install-ccp-proc! cmap d elt2 elt))
(else (error "Illegal elt-pair to construct-ccp"
elt elt2)))
elts)))
(else (error "Illegal elt to construct-ccp" elt)))))))
;;; CCP unfold
(define (ccp-unfold p f g seed)
(let lp ((seed seed) (ccp (ccp-copy ccp:0)))
(if (p seed) ccp
(lp (g seed)
(receive (from to) (f seed)
(lp (g seed) (ccp-adjoin! ccp from to)))))))
;;; Using CCPs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; TR ccp string [start end] -> string
;;; CCP-MAP ccp string [start end] -> string
;;; CCP-MAP! ccp string [start end] -> undefined
;;; CCP-APP ccp char -> char or false
;;; If a char in S is not in CCP's domain, it is dropped from the result.
;;; You can use this to map and delete chars from a string.
(define (tr ccp s . maybe-start+end)
(let-optionals maybe-start+end ((start 0) (end (string-length s)))
;; Count up the chars in S that are in the domain,
;; and allocate the answer string ANS:
(let* ((len (- end start))
(domain (ccp:domain ccp))
(ans-len (string-fold (lambda (c numchars)
(if (char-set-contains? domain c)
(+ numchars 1)
numchars))
0 s start end))
(ans (make-string ans-len)))
;; Apply the map, installing the resulting chars into ANS:
(string-fold (lambda (c i) (cond ((ccp-app ccp c) =>
(lambda (c)
(string-set! ans i c)
(+ i 1)))
(else i))) ; Not in domain -- drop it.
0 s start end)
ans)))
(define (ccp-map ccp s . maybe-start+end)
(apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end))
(define (ccp-map! ccp s . maybe-start+end)
(apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end))
(define (ccp-app ccp char)
(and (char-set-contains? (ccp:domain ccp) char)
(string-ref (ccp:map ccp) (char->ascii char))))
;;; Primitive CCPs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define id-cmap
(let ((m (make-string num-chars)))
(do ((i (- num-chars 1) (- i 1)))
((< i 0))
(string-set! m i (ascii->char i)))
m))
(define ccp:0 (make-ccp char-set:empty #t id-cmap #t))
(define ccp:1 (make-ccp char-set:full #t id-cmap #t))
(define ccp:upcase (proc->ccp char-upcase char-set:full))
(define ccp:downcase (proc->ccp char-downcase char-set:full))

View File

@ -19,12 +19,11 @@
;;; copy of the record fields.
;;; New dfns:
;;; (char-set= cs1 cs2)
;;; (char-set<= cs1 cs2)
;;; (reduce-char-set kons knil cs)
;;; (set-char-set! cs char in?)
;;; (char-set= cs1 cs2 ...)
;;; (char-set<= cs1 cs2 ...)
;;; (char-set-fold kons knil cs)
;;; (char-set-for-each f cs)
;;; (copy-char-set cs)
;;; (char-set-copy cs)
;;; (char-set-size cs)
;;; char-set:printing (char-printing? c)
;;; char-set:blank (char-blank? c)
@ -33,6 +32,12 @@
;;; char-set:ascii (char-ascii? c)
;;; char-set:empty
;;; char-set:full
;;; char-set-every? pred cs
;;; char-set-any pred cs
;;; char-set-adjoin cset char -> cset
;;; char-set-adjoin! cset char -> cset
;;; char-set-delete cset char -> cset
;;; char-set-delete! cset char -> cset
(define char:newline (ascii->char 13))
(define char:tab (ascii->char 9))
@ -41,6 +46,8 @@
(define char:return (ascii->char 10))
(define char:space (ascii->char 32))
(define (string-copy s) (substring s 0 (string-length s)))
(define (string-fill-range! str lower upper ch)
(do ((index lower (+ index 1)))
((>= index upper) str)
@ -61,24 +68,26 @@
char-set?
(s char-set:s))
(define (copy-char-set cs) (make-char-set (string-copy (char-set:s cs))))
(define (char-set-copy cs) (make-char-set (string-copy (char-set:s cs))))
(define (char-set= cs1 cs2)
(let ((s1 (char-set:s cs1))
(s2 (char-set:s cs2)))
(let lp ((i 255))
(or (< i 0)
(and (char=? (string-ref s1 i) (string-ref s2 i))
(lp (- i 1)))))))
;;; The = and <= code is ugly because it's n-ary.
(define (char-set<= cs1 cs2)
(let ((s1 (char-set:s cs1))
(s2 (char-set:s cs2)))
(let lp ((i 255))
(or (< i 0)
(define (char-set= cs1 . rest)
(let ((s1 (char-set:s cs1)))
(every (lambda (cs) (string=? s1 (char-set:s cs)))
rest)))
(define (char-set<= cs1 . rest)
(let lp ((s1 (char-set:s cs1))
(rest rest))
(or (not (pair? rest))
(let ((s2 (char-set:s (car rest)))
(rest (cdr rest)))
(let lp2 ((i 255))
(if (< i 0) (lp s2 rest)
(and (<= (char->ascii (string-ref s1 i))
(char->ascii (string-ref s2 i)))
(lp (- i 1)))))))
(lp2 (- i 1)))))))))
(define (char-set-size cs)
@ -88,20 +97,34 @@
(lp (- i 1)
(if (= 0 (char->ascii (string-ref s i))) size (+ size 1)))))))
(define (set-char-set! cs char in?)
(string-set! (char-set:s cs)
(char->ascii char)
(ascii->char (if in? 1 0))))
(define (set-char-set cs in? . chars)
(let ((s (string-copy (char-set:s cs)))
(val (if in? (ascii->char 1) (ascii->char 0))))
(for-each (lambda (c) (string-set! s (char->ascii c) val))
chars)
(make-char-set s)))
(define (char-set-for-each f cs)
(define (set-char-set! cs in? . chars)
(let ((s (char-set:s cs))
(val (if in? (ascii->char 1) (ascii->char 0))))
(for-each (lambda (c) (string-set! s (char->ascii c) val))
chars))
cs)
(define (char-set-adjoin cs . chars) (apply set-char-set cs #t chars))
(define (char-set-adjoin! cs . chars) (apply set-char-set! cs #t chars))
(define (char-set-delete cs . chars) (apply set-char-set cs #f chars))
(define (char-set-delete! cs . chars) (apply set-char-set! cs #f chars))
(define (char-set-for-each proc cs)
(let ((s (char-set:s cs)))
(let lp ((i 255))
(cond ((>= i 0)
(if (not (= 0 (char->ascii (string-ref s i))))
(f (ascii->char i)))
(proc (ascii->char i)))
(lp (- i 1)))))))
(define (reduce-char-set kons knil cs)
(define (char-set-fold kons knil cs)
(let ((s (char-set:s cs)))
(let lp ((i 255) (ans knil))
(if (< i 0) ans
@ -110,6 +133,28 @@
ans
(kons (ascii->char i) ans)))))))
(define reduce-char-set (deprecated-proc char-set-fold 'char-set-fold
"Use char-set-fold instead."))
(define (char-set-every? pred cs)
(let ((s (char-set:s cs)))
(let lp ((i 255))
(or (< i 0)
(if (= 0 (char->ascii (string-ref s i)))
(lp (- i 1))
(and (pred (ascii->char i))
(lp (- i 1))))))))
(define (char-set-any pred cs)
(let ((s (char-set:s cs)))
(let lp ((i 255))
(and (>= i 0)
(if (= 0 (char->ascii (string-ref s i)))
(lp (- i 1))
(or (pred (ascii->char i))
(lp (- i 1))))))))
(define (char-set . chars)
(chars->char-set chars))
@ -181,58 +226,88 @@
(define (char-set-member? . args)
(error "CHAR-SET-MEMBER? is no longer provided. Use CHAR-SET-CONTAINS? instead."))
;;; Set algebra
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (char-set-invert cs)
(predicate->char-set (lambda (char)
(not (char-set-contains? cs char)))))
;;; The union, intersection, and difference code is ugly,
;;; because the ops are n-ary.
(define (char-set-union . csets)
(if (pair? csets)
(apply char-set-union! (char-set-copy (car csets)) (cdr csets))
char-set:empty))
(define (char-set-intersection . csets)
(if (pair? csets)
(apply char-set-intersection! (char-set-copy (car csets)) (cdr csets))
char-set:full))
(define (char-set-difference cs1 . csets)
(if (pair? csets)
(apply char-set-difference! (char-set-copy cs1) csets)
cs1))
;;; Linear set-algebraic ops
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; These guys are allowed, but not required, to side-effect their first
;;; argument when computing their result. In other words, you must use them
;;; as if they were completely functional, just like their non-! counterparts,
;;; and you must additionally ensure that their first arguments are "dead"
;;; at the point of call. In return, we promise a more efficient result, plus
;;; allowing you to always assume char-sets are unchangeable values.
;;; Apply P to each index and it's char in S: (P I C).
;;; Used by the intersection & difference.
(define (string-iter s p)
(define (string-iter p s)
(let lp ((i (- (string-length s) 1)))
(cond ((>= i 0)
(p i (string-ref s i))
(lp (- i 1))))))
(define (char-set-union . csets)
(if (pair? csets)
(let ((cset (copy-char-set (car csets))))
(for-each (lambda (cs)
(char-set-for-each (lambda (c) (set-char-set! cset c #t))
cs))
(cdr csets))
(define (char-set-invert! cset)
(let ((s (char-set:s cset)))
(string-iter (lambda (i c)
(string-set! s i (ascii->char (- 1 (char->ascii c)))))
s))
cset)
char-set:empty))
(define (char-set-intersection . csets)
(if (pair? csets)
(let* ((cset (copy-char-set (car csets)))
(s (char-set:s cset)))
(for-each (lambda (cs)
(string-iter (char-set:s cs)
(lambda (i c)
(if (= 0 (char->ascii c))
(string-set! s i (ascii->char 0))))))
(cdr csets))
cset)
char-set:full))
(define (char-set-union! cset1 . csets)
(let ((s (char-set:s cset1)))
(for-each (lambda (cset)
(char-set-for-each (lambda (c)
(string-set! s (char->ascii c)
(ascii->char 1)))
cset))
csets))
cset1)
(define (char-set-intersection! cset1 . csets)
(let ((s (char-set:s cset1)))
(for-each (lambda (cset)
(string-iter (lambda (i c)
(if (zero? (char->ascii c))
(string-set! s i (ascii->char 0))))
(char-set:s cset)))
csets))
cset1)
(define (char-set-difference! cset1 . csets)
(let ((s (char-set:s cset1)))
(for-each (lambda (cset)
(char-set-for-each (lambda (c)
(string-set! s (char->ascii c)
(ascii->char 0)))
cset))
csets))
cset1)
(define (char-set-difference cs1 . csets)
(if (pair? csets)
(let* ((cset (copy-char-set cs1))
(s (char-set:s cset)))
(for-each (lambda (cs)
(string-iter (char-set:s cs)
(lambda (i c)
(if (= 1 (char->ascii c))
(string-set! s i (ascii->char 0))))))
csets)
cset)
cs1))
;;;; System Character Sets
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define char-set:lower-case (ascii-range->char-set #x61 #x7B))
(define char-set:upper-case (ascii-range->char-set #x41 #x5B))

View File

@ -11,17 +11,28 @@
;;; root Search starts from here. Usefully "." (cwd)
;;; dots? => if true, dot files will be matched.
;;; if false, dot files will not be matched.
;;; pattern-list := a list of regular expressions or predicates
;;; Each member of the list corresponds
;;; to one or more levels in a directory.
;;; (A member with embedded "/" characters
;;; corresponds to multiple levels.)
;;; Example: ("foo" "bar" "\\.c$")
;;; pattern-list := a list of
;;; - strings
;;; These are split at /'s and then
;;; treated as Posix regexp strings.
;;; - regexps (typically made with RX macro)
;;; - predicates
;;; Each member of the list corresponds to one
;;; or more levels in a directory. (A string
;;; with embedded "/" characters corresponds
;;; to multiple levels.)
;;; Example:
;;; (file-match "." #f "foo" "bar" "\\.c$")
;;; means match files that end in ".c"
;;; if they reside in a directory with
;;; a name that contains "bar", which
;;; itself must reside in a directory
;;; with a name that contains "foo".
;;; Here are two more equivalent specs
;;; for the example above:
;;; (file-match "." #f "foo/bar/\\.c$")
;;; (file-match "." #f (rx "foo") (rx "bar")
;;; (rx ".c" eos))
;;; If a member in the list is a predicate,
;;; the predicate must be a procedure of
;;; one argument. This procedure is applied
@ -40,16 +51,19 @@
;;; when FILE-DIRECTORY? is applied to the bogus symlink.
(define (file-match root dot-files? . patterns)
(let ((patterns (apply append (map split-pat patterns))))
(let ((patterns (apply append
(map (lambda (p) (if (string? p)
(map posix-string->regexp (split-pat p))
p))
patterns))))
(let recur ((root root)
(patterns patterns))
(if (pair? patterns)
(let* ((pattern (car patterns))
(patterns (cdr patterns))
(dir (file-name-as-directory root))
(matcher (cond ((string? pattern)
(let ((re (make-regexp pattern)))
(lambda (f) (regexp-exec re f))))
(matcher (cond ((regexp? pattern)
(lambda (f) (regexp-search? re f)))
;; This arm makes a file-matcher using
;; predicate PATTERN. If PATTERN signals
@ -87,7 +101,8 @@
(if (procedure? pat) (list pat)
(let lp ((i (string-length pat))
(ans '()))
(cond ((rindex pat #\/ i) =>
(lambda (j) (lp (cons (substring pat (+ j 1) i) ans) j)))
(cond ((string-index-right pat #\/ i) =>
(lambda (j) (lp j (cons (substring pat (+ j 1) i) ans))))
(else
(cons (substring pat 0 i) ans))))))

View File

@ -68,7 +68,7 @@
;;; Returns FNAME's directory component in *directory form.*
(define (file-name-directory fname)
(cond ((rindex fname #\/) =>
(cond ((string-index-right fname #\/) =>
(lambda (rslash)
(if (last-non-slash fname)
(substring fname 0 (+ 1 rslash))
@ -77,7 +77,7 @@
(define (file-name-nondirectory fname)
(cond ((rindex fname #\/) =>
(cond ((string-index-right fname #\/) =>
(lambda (rslash)
(if (last-non-slash fname)
(substring fname (+ 1 rslash) (string-length fname))
@ -90,7 +90,7 @@
(len (string-length fname)))
(let split ((start 0))
(cond ((>= start len) '())
((index fname #\/ start) =>
((string-index fname #\/ start) =>
(lambda (slash)
(cons (substring fname start slash)
(split (+ slash 1)))))
@ -128,7 +128,7 @@
;;; /usr/shivers/.login are not considered extensions.
(define (file-name-extension-index fname)
(let ((dot (rindex fname #\.)))
(let ((dot (string-index-right fname #\.)))
(if (and dot
(> dot 0)
(not (char=? #\/ (string-ref fname (- dot 1)))))
@ -154,7 +154,7 @@
(let* ((user (substring fname 1 end))
(ui (name->user-info user)))
(user-info:home-dir ui))))))
(cond ((index fname #\/ 1) =>
(cond ((string-index fname #\/ 1) =>
(lambda (slash)
(string-append (tilde->homedir slash) "/"
(substring fname (+ slash 1) len))))
@ -256,7 +256,7 @@
(let ((len (string-length s)))
(cond
((zero? len) (apply string-append (reverse! ans)))
((index s #\$) =>
((string-index s #\$) =>
(lambda (i)
(let ((ans (cons (substring s 0 i) ans))
(s (substring s (+ i 1) len))
@ -264,13 +264,13 @@
(if (zero? len) (lp ans "")
(let ((next-char (string-ref s 0)))
(cond ((char=? #\{ next-char)
(cond ((index s #\}) =>
(cond ((string-index s #\}) =>
(lambda (i)
(lp (cons (getenv (substring s 1 i)) ans)
(substring s (+ i 1) len))))
(else (error "Unbalanced ${ delimiter in string" s))))
(else
(let ((i (or (index s #\/) len)))
(let ((i (or (string-index s #\/) len)))
(lp (cons (getenv (substring s 0 i)) ans)
(substring s i len))))))))))
(else (lp (cons s ans) ""))))))

View File

@ -54,43 +54,12 @@
;;; This has the effect you want with field parsing. For example, if you split
;;; a string with the empty pattern, you will explode the string into its
;;; individual characters:
;;; ((suffix-splitter "") "foo") -> #("" "f" "o" "o")
;;; ((suffix-splitter (rx "")) "foo") -> #("" "f" "o" "o")
;;; However, even though this boundary case is handled correctly, we don't
;;; recommend using it. Say what you mean -- just use a field splitter:
;;; ((field-splitter ".") "foo") -> #("f" "o" "o")
;;; ((field-splitter (rx any)) "foo") -> #("f" "o" "o")
;;; (join-strings string-list [delimiter grammar]) => string
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Paste strings together using the delimiter string.
;;;
;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz"
;;;
;;; DELIMITER defaults to a single space " "
;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix.
;;; (join-strings strings [delim grammar])
(define (join-strings strings . args)
(if (pair? strings)
(let-optionals args ((delim " ") (grammar 'infix))
(check-arg string? delim join-strings)
(let ((strings (reverse strings)))
(let lp ((strings (cdr strings))
(ans (case grammar
((infix) (list (car strings)))
((suffix) (list (car strings) delim))
(else (error "Illegal grammar" grammar)))))
(if (pair? strings)
(lp (cdr strings)
(cons (car strings) (cons delim ans)))
; All done
(apply string-append ans)))))
"")) ; Special-cased for infix grammar.
;;; FIELD PARSERS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This section defines routines to split a string into fields.
@ -100,15 +69,11 @@
(define (->delim-matcher x)
(if (procedure? x) x ; matcher proc
(let ((re (cond ((regexp? x) x) ; regexp pattern
((string? x) (make-regexp x)) ; regexp string
(else (error "Illegal pattern/parser" x)))))
;; The matcher proc.
(lambda (s i)
(cond ((regexp-exec re s i) =>
(cond ((regexp-search x s i) =>
(lambda (m) (values (match:start m 0) (match:end m 0))))
(else (values #f #f)))))))
(else (values #f #f))))))
;;; (infix-splitter [re num-fields handle-delim]) -> parser
;;; (suffix-splitter [re num-fields handle-delim]) -> parser
@ -154,7 +119,8 @@
match-delim cons-field
num-fields nfields-exact?))))))))
(define default-field-matcher (->delim-matcher "[^ \t\n]+"))
;;; Default field spec is runs of non-whitespace chars.
(define default-field-matcher (->delim-matcher (rx (+ (~ white)))))
;;; (field-splitter [field-spec num-fields])
@ -313,8 +279,8 @@
;;; Now, build the exported procedures: {infix,suffix,sloppy-suffix}-splitter.
(define default-suffix-matcher (->delim-matcher "[ \t\n]+|$"))
(define default-infix-matcher (->delim-matcher "[ \t\n]+"))
(define default-suffix-matcher (->delim-matcher (rx (| (+ white) eos))))
(define default-infix-matcher (->delim-matcher (rx (+ white))))
(define infix-splitter
(make-field-parser-generator default-infix-matcher infix-field-loop))
@ -408,21 +374,17 @@
;;; Repeatedly do (APPLY PROC M STATE) to generate new state values,
;;; where M is a regexp match structure made from matching against STRING.
;(define (regexp-reduce string start regexp proc . state)
; (let ((end (string-length string))
; (regexp (if (string? regexp)
; (make-regexp regexp)
; regexp)))
;
;(define (regexp-fold string start regexp proc . state)
; (let ((end (string-length string)))
; (let lp ((i start) (state state) (last-null? #f))
; (let ((j (if last-null? (+ i 1) i)))
; (cond ((and (<= j end) (regexp-exec regexp string j)) =>
; (cond ((and (<= j end) (regexp-search regexp string j)) =>
; (lambda (m)
; (receive state (apply proc m state)
; (lp (match:end m) state (= (match:start m) (match:end m))))))
; (else (apply values state)))))))
;
;(define (all-regexp-matches regexp string)
; (reverse (regexp-reduce string 0 regexp
; (reverse (regexp-fold string 0 regexp
; (lambda (m ans) (cons (match:substring m 0) ans))
; '())))

View File

@ -76,8 +76,8 @@
(else (let* ((dots? (char=? #\. (string-ref pat 0))) ; Match dot files?
(candidates (maybe-directory-files fname dots?))
(re (make-regexp (glob->regexp pat))))
(values (filter (lambda (f) (regexp-exec re f)) candidates)
(re (glob->regexp pat)))
(values (filter (lambda (f) (regexp-search? re f)) candidates)
#t))))) ; These guys exist for sure.
;;; The initial special-case above isn't really for the fast-path; it's
@ -87,43 +87,96 @@
;;; Translate a brace-free glob pattern to a regular expression.
(define (glob->regexp pat)
(let ((pat-len (string-length pat)))
(let lp ((result '(#\^))
(i 0)
(state 'normal))
(define glob->regexp
(let ((dot-star (re-repeat 0 #f re-any))) ; ".*" or (* any)
(lambda (pat)
(let ((pat-len (string-length pat))
(str-cons (lambda (chars res) ; Reverse CHARS and cons the
(if (pair? chars) ; result string-re onto RES.
(cons (re-string (list->string (reverse chars)))
res)
res))))
;; We accumulate chars into CHARS, and coalesce into a single string
;; with STR-CONS when we run across a non-char.
(let lp ((chars '())
(res (list re-bos))
(i 0))
(if (= i pat-len)
(if (eq? state 'normal)
(list->string (reverse (cons #\$ result)))
(error "Illegal glob pattern" pat))
(re-seq (reverse (str-cons chars res)))
(let ((c (string-ref pat i))
(i (+ i 1)))
(case state
((char-set)
(lp (cons c result)
i
(if (char=? c #\]) 'normal 'char-set)))
(case c
((#\\) (if (< i pat-len)
(lp (cons (string-ref pat i) chars)
res (+ i 1))
(error "Ill-formed glob pattern -- ends in backslash" pat)))
((escape)
(lp (case c
((#\$ #\^ #\. #\+ #\? #\* #\| #\( #\) #\[)
(cons c (cons #\\ result)))
(else (cons c result)))
i
'normal))
((#\*) (lp '()
(cons dot-star (str-cons chars res))
i))
((#\?) (lp '()
(cons re-any (str-cons chars res))
i))
;; Normal
(else (case c
((#\\) (lp result i 'escape))
((#\*) (lp (cons #\* (cons #\. result)) i 'normal))
((#\?) (lp (cons #\. result) i 'normal))
((#\[) (lp (cons c result) i 'char-set))
((#\$ #\^ #\. #\+ #\| #\( #\))
(lp (cons c (cons #\\ result)) i 'normal))
(else (lp (cons c result) i 'normal))))))))))
((#\[) (receive (cset i) (parse-glob-bracket pat i)
(lp '()
(cons (re-char-set cset)
(str-cons chars res))
i)))
(else (lp (cons c chars) res i))))))))))
;;; A glob bracket expression is [...] or [^...].
;;; The body is a sequence of <char> and <char>-<char> ranges.
;;; A <char> is any character except right-bracket, carat, hypen or backslash,
;;; or a backslash followed by any character at all.
(define (parse-glob-bracket pat i)
(let ((pat-len (string-length pat)))
(receive (negate? i) (if (and (< i pat-len) (char=? #\^ (string-ref pat i)))
(values #t (+ i 1))
(values #f i))
(let lp ((elts '()) (i i))
(if (>= i pat-len)
(error "Ill-formed glob pattern -- no terminating close-bracket" pat)
(let ((c (string-ref pat i))
(i (+ i 1)))
(case c
((#\])
(let ((cset (fold (lambda (elt cset)
(char-set-union
cset
(if (char? elt)
(char-set elt)
(ascii-range->char-set (char->ascii (car elt))
(+ 1 (char->ascii (cdr elt)))))))
char-set:empty
elts)))
(values (re-char-set (if negate?
(char-set-invert cset)
cset))
i)))
((#\\)
(if (>= i pat-len)
(error "Ill-formed glob pattern -- ends in backslash" pat)
(lp (cons (string-ref pat i) elts) (+ i 1))))
((#\-)
(cond ((>= i pat-len)
(error "Ill-formed glob pattern -- unterminated range." pat))
((or (null? elts) (not (char? (car elts))))
(error "Ill-formed glob pattern -- range has no beginning." pat))
(else (lp (cons (cons (car elts) (string-ref pat i)) elts)
(+ i 1)))))
(else (lp (cons c elts) i)))))))))
;;; Is the glob pattern free of *'s, ?'s and [...]'s?

View File

@ -296,7 +296,7 @@
(define reaped-procs '()) ; Reaped, but not yet waited.
(define (filter-weak-ptr-list pred lis)
(foldr (lambda (wptr result) (let ((val (weak-pointer-ref wptr)))
(fold-right (lambda (wptr result) (let ((val (weak-pointer-ref wptr)))
(if (and val (pred val))
(cons wptr result)
result)))

View File

@ -264,7 +264,7 @@
;;; (read-paragraph [port handle-delim])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define blank-line-regexp (make-regexp "^[ \t]*\n$"))
(define blank-line-regexp (rx bos (* white) #\newline eos))
(define (read-paragraph . args)
(let-optionals args ((port (current-input-port))
@ -275,14 +275,14 @@
(cond ((eof-object? line)
(if (eq? handle-delim 'split) (values line line) line))
((regexp-exec blank-line-regexp line) (lp))
((regexp-search? blank-line-regexp line) (lp))
;; Then, read in non-blank lines.
(else
(let lp ((lines (list line)))
(let ((line (read-line port 'concat)))
(if (and (string? line)
(not (regexp-exec blank-line-regexp line)))
(not (regexp-search? blank-line-regexp line)))
(lp (cons line lines))

View File

@ -1,69 +0,0 @@
/* This is an Scheme48/C interface file,
** automatically generated by cig.
*/
#include <stdio.h>
#include <stdlib.h> /* For malloc. */
#include "libcig.h"
/* Make sure foreign-function stubs interface to the C funs correctly: */
#include "re1.h"
scheme_value df_re_byte_len(long nargs, scheme_value *args)
{
extern char *re_byte_len(const char *, int *);
scheme_value ret1;
char *r1;
int r2;
cig_check_nargs(2, nargs, "re_byte_len");
r1 = re_byte_len(cig_string_body(args[1]), &r2);
ret1 = VECTOR_REF(*args,0);
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
VECTOR_REF(*args,1) = ENTER_FIXNUM(r2);
return ret1;
}
scheme_value df_re_compile(long nargs, scheme_value *args)
{
extern char *re_compile(const char *, scheme_value );
scheme_value ret1;
char *r1;
cig_check_nargs(3, nargs, "re_compile");
r1 = re_compile(cig_string_body(args[2]), args[1]);
ret1 = VECTOR_REF(*args,0);
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
return ret1;
}
scheme_value df_re_exec(long nargs, scheme_value *args)
{
extern char *re_exec(scheme_value , const char *, int , scheme_value , scheme_value , int *);
scheme_value ret1;
char *r1;
int r2;
cig_check_nargs(6, nargs, "re_exec");
r1 = re_exec(args[5], cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2);
ret1 = VECTOR_REF(*args,0);
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2);
return ret1;
}
scheme_value df_re_match(long nargs, scheme_value *args)
{
extern char *re_match(const char *, const char *, int , scheme_value , scheme_value , int *);
scheme_value ret1;
char *r1;
int r2;
cig_check_nargs(6, nargs, "re_match");
r1 = re_match(cig_string_body(args[5]), cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2);
ret1 = VECTOR_REF(*args,0);
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2);
return ret1;
}

View File

@ -1,283 +0,0 @@
;;; Regular expression matching for scsh
;;; Copyright (c) 1994 by Olin Shivers.
(foreign-source
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
"#include \"re1.h\""
"" ""
)
;;; Match data for regexp matches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record regexp-match
string ; The string against which we matched.
start ; 10 elt vec
end) ; 10 elt vec
(define (match:start match . maybe-index)
(vector-ref (regexp-match:start match)
(:optional maybe-index 0)))
(define (match:end match . maybe-index)
(vector-ref (regexp-match:end match)
(:optional maybe-index 0)))
(define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0))
(start (vector-ref (regexp-match:start match) i)))
(and start (substring (regexp-match:string match)
start
(vector-ref (regexp-match:end match) i)))))
;;; Compiling regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record %regexp
string ; The string form of the regexp.
bytes ; The compiled representation, stuffed into a Scheme string.
((disclose self) (list "Regexp" (%regexp:string self))))
(define regexp? %regexp?)
(define (make-regexp pattern)
(receive (err len) (%regexp-compiled-length pattern)
(if err (error err make-regexp pattern)
(let ((buf (make-string len)))
(%regexp-compile pattern buf)
(make-%regexp pattern buf)))))
(define-foreign %regexp-compiled-length (re_byte_len (string pattern))
static-string ; Error msg or #f
integer) ; number of bytes needed to compile REGEXP.
(define-foreign %regexp-compile (re_compile (string pattern)
(string-desc bytes))
static-string) ; Error msg or #f
(define (->regexp x)
(cond ((string? x) (make-regexp x))
((regexp? x) x)
(else (error "Not a regexp or string." x))))
;;; Executing compiled regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (regexp-exec regexp str . maybe-start)
(let ((start (:optional maybe-start 0))
(start-vec (make-vector 10))
(end-vec (make-vector 10)))
(receive (err match?)
(%regexp-exec (%regexp:bytes regexp) str start start-vec end-vec)
(if err (error err regexp-exec regexp str start)
(and match?
(make-regexp-match str start-vec end-vec))))))
(define-foreign %regexp-exec (re_exec (string-desc compiled-regexp)
(string s)
(integer start)
(vector-desc start-vec)
(vector-desc end-vec))
static-string ; Error msg or #f
bool) ; Matched?
;;; Compile&match regexps in one go
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; I could do this with the separate compile and execute procedures,
;;; but I go straight to C just for fun.
(define (string-match pattern string . maybe-start)
(let ((start (:optional maybe-start 0))
(start-vec (make-vector 10))
(end-vec (make-vector 10)))
(receive (err match?) (if (regexp? pattern)
(%regexp-exec (%regexp:bytes pattern)
string start start-vec end-vec)
(%string-match pattern string start
start-vec end-vec))
(if err (error err string-match pattern string start)
(and match? (make-regexp-match string start-vec end-vec))))))
(define-foreign %string-match (re_match (string pattern)
(string s)
(integer start)
(vector-desc start-vec)
(vector-desc end-vec))
static-string ; Error string or #f if all is ok.
bool) ; match?
;;; Substitutions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (regexp-substitute port match . items)
(let* ((str (regexp-match:string match))
(sv (regexp-match:start match))
(ev (regexp-match:end match))
(range (lambda (item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
(values (vector-ref sv item)
(vector-ref ev item)))
((eq? 'pre item) (values 0 (vector-ref sv 0)))
((eq? 'post item) (values (vector-ref ev 0)
(string-length str)))
(else (error "Illegal substitution item."
item
regexp-substitute))))))
(if port
;; Output port case.
(for-each (lambda (item)
(if (string? item) (write-string item port)
(receive (si ei) (range item)
(write-string str port si ei))))
items)
;; Here's the string case. Make two passes -- one to
;; compute the length of the target string, one to fill it in.
(let* ((len (reduce (lambda (i item)
(+ i (if (string? item) (string-length item)
(receive (si ei) (range item) (- ei si)))))
0 items))
(ans (make-string len)))
(reduce (lambda (index item)
(cond ((string? item)
(string-replace! ans index item)
(+ index (string-length item)))
(else (receive (si ei) (range item)
(substring-replace! ans index str si ei)
(+ index (- ei si))))))
0 items)
ans))))
(define (regexp-substitute/global port re str . items)
(let ((re (->regexp re))
(str-len (string-length str))
(range (lambda (start sv ev item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
(values (vector-ref sv item)
(vector-ref ev item)))
((eq? 'pre item) (values start (vector-ref sv 0)))
(else (error "Illegal substitution item."
item
regexp-substitute/global)))))
(num-posts (reduce (lambda (count item)
(+ count (if (eq? item 'post) 1 0)))
0 items)))
(if (and port (< num-posts 2))
;; Output port case, with zero or one POST items.
(let recur ((start 0))
(if (<= start str-len)
(let ((match (regexp-exec re str start)))
(if match
(let* ((sv (regexp-match:start match))
(ev (regexp-match:end match))
(s (vector-ref sv 0))
(e (vector-ref ev 0))
(empty? (= s e)))
(for-each (lambda (item)
(cond ((string? item) (write-string item port))
((procedure? item) (write-string (item match) port))
((eq? 'post0 item)
(if (and empty? (< s str-len))
(write-char (string-ref str s) port)))
((eq? 'post item)
(recur (if empty? (+ 1 e) e)))
(else (receive (si ei)
(range start sv ev item)
(write-string str port si ei)))))
items))
(write-string str port start))))) ; No match.
;; Either we're making a string, or >1 POST.
(let* ((pieces (let recur ((start 0))
(if (> start str-len) '()
(let ((match (regexp-exec re str start))
(cached-post #f))
(if match
(let* ((sv (regexp-match:start match))
(ev (regexp-match:end match))
(s (vector-ref sv 0))
(e (vector-ref ev 0))
(empty? (= s e)))
(reduce (lambda (pieces item)
(cond ((string? item)
(cons item pieces))
((procedure? item)
(cons (item match) pieces))
((eq? 'post0 item)
(if (and empty? (< s str-len))
(cons (string (string-ref str s))
pieces)
pieces))
((eq? 'post item)
(if (not cached-post)
(set! cached-post
(recur (if empty? (+ e 1) e))))
(append cached-post pieces))
(else (receive (si ei)
(range start sv ev item)
(cons (substring str si ei)
pieces)))))
'() items))
;; No match. Return str[start,end].
(list (if (zero? start) str
(substring str start (string-length str)))))))))
(pieces (reverse pieces)))
(if port (for-each (lambda (p) (write-string p port)) pieces)
(apply string-append pieces))))))
;;; Miscellaneous
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Convert a string into a regex pattern that matches that string exactly --
;;; in other words, quote the special chars with backslashes.
(define (regexp-quote string)
(let lp ((i (- (string-length string) 1))
(result '()))
(if (< i 0) (list->string result)
(lp (- i 1)
(let* ((c (string-ref string i))
(result (cons c result)))
(if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+))
(cons #\\ result)
result))))))
;;; Count the number of possible sub-matches in a regexp
;;; (i.e., the number of left parens).
(define (regexp-num-submatches s)
(let* ((len (string-length s))
(len-1 (- len 1)))
(let lp ((i 0) (nsm 0))
(if (= i len) nsm
(case (string-ref s i)
((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm))
((#\() (lp (+ i 1) (+ nsm 1)))
(else (lp (+ i 1) nsm)))))))

View File

@ -1,194 +0,0 @@
/* Scheme48 interface to Henry Spencer's regular expression package.
** Copyright (c) 1993, 1994 by Olin Shivers.
*/
#include <stdlib.h>
#include "regexp.h"
#include "cstuff.h"
/* Make sure our exports match up w/the implementation: */
#include "re1.h"
/* Not multi-threaded reentrant. */
static char *regexp_error;
/* Stash error msg in global. */
void regerror(char *msg) {regexp_error = msg;}
/*
** Return NULL normally, error string on error.
** Stash number of bytes needed for compiled regexp into `*len'
*/
char *re_byte_len(const char *re, int *len)
{
int l;
regexp_error = 0;
*len = regcomp_len(re);
return regexp_error;
}
/*
** Return NULL normally, error string on error.
** Compile regexp into string described by `cr'.
*/
char *re_compile(const char *re, scheme_value cr)
{
int len = STRING_LENGTH(cr);
regexp *r = (regexp *) &STRING_REF(cr, 0);
regexp_error = 0;
regcomp_comp(re, r, len);
return regexp_error;
}
/* Return NULL normally, error string on error.
** Stash match info in start_vec and end_vec.
** Returns boolean match/no-match in hit.
*/
char *re_exec(scheme_value cr, const char *string, int start,
scheme_value start_vec, scheme_value end_vec, int *hit)
{
regexp *r = (regexp *) &STRING_REF(cr, 0);
*hit = 0;
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
return "Illegal start vector"; /* never trigger. */
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
return "Illegal end vector";
regexp_error = 0;
if( regexec(r, string+start) ) {
int i;
for(i=0; i<NSUBEXP; i++) {
const char *s = r->startp[i];
const char *e = r->endp[i];
VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE;
VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE;
r->startp[i] = 0; /* Why did Sommerfeld */
r->endp[i] = 0; /* put these here? */
}
*hit = 1;
}
return regexp_error;
}
char *re_subst(scheme_value cr, const char *match,
const char *src, int start,
scheme_value start_vec, scheme_value end_vec,
scheme_value outbuf, int *len)
{
int i;
regexp *r = (regexp *) &STRING_REF(cr, 0);
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
return "Illegal start vector"; /* never trigger. */
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
return "Illegal end vector";
for (i=0; i<NSUBEXP; i++) {
scheme_value se = VECTOR_REF(start_vec, i);
scheme_value ee = VECTOR_REF(end_vec, i);
r->startp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0;
r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0;
}
regexp_error = 0;
regnsub(r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf));
*len = strlen(&STRING_REF(outbuf, 0));
return regexp_error;
}
char *re_subst_len(scheme_value cr, const char *match,
const char *src, int start,
scheme_value start_vec, scheme_value end_vec,
int *len)
{
int i;
regexp *r = (regexp *) &STRING_REF(cr, 0);
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */
return "Illegal start vector"; /* never trigger. */
if( VECTOR_LENGTH(end_vec) != NSUBEXP )
return "Illegal end vector";
for (i=0; i<NSUBEXP; i++) {
scheme_value se = VECTOR_REF(start_vec, i);
scheme_value ee = VECTOR_REF(end_vec, i);
r->startp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0;
r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0;
}
regexp_error = 0;
*len = regsublen(r, src);
return regexp_error;
}
/* Return NULL normally, error string on error.
** Stash match info in start_vec and end_vec.
** Returns boolean match/no-match in hit.
*/
char *re_match(const char *re, const char *string, int start,
scheme_value start_vec, scheme_value end_vec, int *hit)
{
regexp *prog;
regexp_error = 0;
*hit = 0;
prog = regcomp(re);
if( !prog ) return regexp_error;
if( VECTOR_LENGTH(start_vec) != NSUBEXP ) { /* These two tests */
Free(prog);
return "Illegal start vector";
}
if( VECTOR_LENGTH(end_vec) != NSUBEXP ) { /* should never trigger. */
Free(prog);
return "Illegal end vector";
}
if( regexec(prog, string+start) ) {
int i;
for(i=0; i<NSUBEXP; i++) {
const char *s = prog->startp[i];
const char *e = prog->endp[i];
VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE;
VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE;
}
*hit = 1;
}
Free(prog);
return regexp_error;
}
char *filter_stringvec(const char *re, char const **stringvec, int *nummatch)
{
regexp *prog;
regexp_error = 0;
if( prog=regcomp(re) ) {
char const **p = stringvec;
char const **q = p;
while(*p) {
if( regexec(prog, *p) ) *q++ = *p;
p++;
}
Free(prog);
*nummatch = q-stringvec;
}
return regexp_error;
}

View File

@ -1,11 +0,0 @@
/* Exports from re1.c */
char *re_byte_len(const char *re, int *len);
char *re_compile(const char *re, scheme_value target);
char *re_exec(scheme_value cr, const char *string, int start,
scheme_value start_vec, scheme_value end_vec, int *hit);
char *re_match(const char *re, const char *string, int start,
scheme_value start_vec, scheme_value end_vec,
int *hit);

61
scsh/rx/re-low.c Normal file
View File

@ -0,0 +1,61 @@
/* This is an Scheme48/C interface file,
** automatically generated by cig.
*/
#include <stdio.h>
#include <stdlib.h> /* For malloc. */
#include "libcig.h"
/* Make sure foreign-function stubs interface to the C funs correctly: */
#include <sys/types.h>
#include "../regexp/regex.h"
#include "re1.h"
scheme_value df_compile_re(long nargs, scheme_value *args)
{
extern int compile_re(scheme_value , int , regex_t* *);
scheme_value ret1;
int r1;
regex_t* r2;
cig_check_nargs(3, nargs, "compile_re");
r1 = compile_re(args[2], EXTRACT_BOOLEAN(args[1]), &r2);
ret1 = ENTER_FIXNUM(r1);
AlienVal(VECTOR_REF(*args,0)) = (long) r2;
return ret1;
}
scheme_value df_re_search(long nargs, scheme_value *args)
{
extern scheme_value re_search(const regex_t *, scheme_value , int , scheme_value , int , scheme_value , scheme_value );
scheme_value ret1;
scheme_value r1;
cig_check_nargs(7, nargs, "re_search");
r1 = re_search((const regex_t *)AlienVal(args[6]), args[5], EXTRACT_FIXNUM(args[4]), args[3], EXTRACT_FIXNUM(args[2]), args[1], args[0]);
ret1 = r1;
return ret1;
}
scheme_value df_re_errint2str(long nargs, scheme_value *args)
{
extern const char *re_errint2str(int , const regex_t *);
scheme_value ret1;
const char *r1;
cig_check_nargs(3, nargs, "re_errint2str");
r1 = re_errint2str(EXTRACT_FIXNUM(args[2]), (const regex_t *)AlienVal(args[1]));
ret1 = VECTOR_REF(*args,0);
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
return ret1;
}
scheme_value df_free_re(long nargs, scheme_value *args)
{
extern void free_re(regex_t* );
cig_check_nargs(1, nargs, "free_re");
free_re((regex_t* )AlienVal(args[0]));
return SCHFALSE;
}

View File

@ -4,8 +4,8 @@
(foreign-source
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
"#include <sys/types.h>"
"#include \"regex.h\""
"#include \"scsh/re1.h\""
"#include \"../regexp/regex.h\""
"#include \"re1.h\""
"" ""
)
@ -53,7 +53,7 @@
(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec))
(define (max-live-posix-submatch tvec)
(vfoldl (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
(define (compile-posix-re->c-struct re-string sm?)
(receive (errcode c-struct) (%compile-re re-string sm?)
@ -143,7 +143,7 @@
(define (clean-up-cres)
(set! *master-cre-list*
(foldl (lambda (elt lis)
(fold (lambda (elt lis)
(if (weak-pointer-ref (car elt)) ; Still alive
(cons elt lis)
(begin (%free-re (cdr elt))

173
scsh/rx/re1.c Normal file
View File

@ -0,0 +1,173 @@
/* Scheme48 interface to Henry Spencer's Posix regular expression package.
** Copyright (c) 1993, 1994, 1998 by Olin Shivers.
*/
/* Todo:
** not_eol not_bol support on searchers
** error code -> err msg
** regex freeing
** regexp-string -> regex_t caching
** make filter_stringvec return an error code.
*/
#include <stdlib.h>
#include <sys/types.h>
#include "../regexp/regex.h"
#include "../cstuff.h"
/* Make sure our exports match up w/the implementation: */
#include "re1.h"
/*
** Compile regexp into a malloc'd struct.
** The flag sm_p is true if we want to compile for submatches.
** On success, store pointer to struct into cr and return 0.
** On failure, free the struct, store NULL into cr,
** and return a non-zero error code.
*/
int compile_re(scheme_value re_str, int sm_p, regex_t **cr)
{
char *s = &STRING_REF(re_str, 0);
int len = STRING_LENGTH(re_str);
int err;
regex_t *re = Alloc(regex_t);
if( !re ) return -1;
re->re_endp = s + len;
err = regcomp(re, s, REG_EXTENDED | REG_PEND
| (sm_p ? 0 : REG_NOSUB));
if( err ) {Free(re); *cr=0;}
else *cr=re;
return err;
}
/* Do a regex search of RE through string STR, beginning at STR[START].
** - STR is passed as a Scheme value as it is allowed to contain nul bytes.
**
** - trans_vec contains the translation from the user's "virtual" submatches to
** the actual submatches the engine will report:
** - trans_vec[i] = #F means user submatch #i is a dead submatch.
** - trans_vec[i] = j means user submatch #i corresponds to paren #j in re.
**
** Indexing fence-posts are a little complicated due to the fact that you
** get an extra match elt back from the matcher -- match 0 is not a
** paren-based *sub*match, but rather the match info for the whole thing.
**
** So, here is how it works:
** length(start_vec) = length(end_vec) = length(trans_vec) + 1
** because trans_vec doesn't have a translation for submatch 0, which
** is SRE submatch #0 => Posix submatch #0. For SRE submatch #i (1, 2, ...),
** we want the submatch associated with Posix paren # trans_vec[i-1].
**
** - MAX_PSM is the maximum paren in which we have submatch interest -- the
** max element in TRANS_VEC. Any parens after paren #MAX_PSM are just for
** grouping, not for marking submatches. We only have to allocate MAX_PSM+1
** elements in the submatch vector we pass into the search engine. If
** MAX_PSM = -1, then we don't even want the whole-match match bounds, which
** is really good -- the search engine can really fly in this case.
**
** If we match, map re's submatches over to the exported start_vec and
** end_vec match vectors using trans_vec.
**
** Return 0 on success; #f if no match; non-zero integer error code otherwise.
*/
scheme_value re_search(const regex_t *re, scheme_value str, int start,
scheme_value trans_vec, int max_psm,
scheme_value start_vec, scheme_value end_vec)
{
char *s = &STRING_REF(str,0); /* Passed as a scheme_value because */
int len = STRING_LENGTH(str); /* it might contain nul bytes. */
int vlen = VECTOR_LENGTH(start_vec);
int retval;
regmatch_t static_pmatch[10], *pm;
/* If max_psm+1 > 10, we can't use static_pmatch. */
if( max_psm < 10 ) pm = static_pmatch;
else {
pm = Malloc(regmatch_t, max_psm+1);/* Add 1 for the whole-match info. */
if( !pm ) return ENTER_FIXNUM(-1);
}
pm[0].rm_so = start;
pm[0].rm_eo = len;
retval = regexec(re, s, max_psm+1, pm, REG_STARTEND); /* Do it. */
/* We matched and have match-bound info, so translate it over. */
if( !retval && max_psm >= 0 ) {
int i;
VECTOR_REF(start_vec,0) = ENTER_FIXNUM(pm[0].rm_so); /* whole-match */
VECTOR_REF(end_vec,0) = ENTER_FIXNUM(pm[0].rm_eo);
for( i=vlen-1; --i >= 0; ) { /* submatches */
scheme_value j_scm = VECTOR_REF(trans_vec,i);
if( j_scm != SCHFALSE ) {
int j = EXTRACT_FIXNUM(j_scm);
int k = pm[j].rm_so,
l = pm[j].rm_eo;
VECTOR_REF(start_vec,i+1) = (k != -1) ? ENTER_FIXNUM(k) : SCHFALSE;
VECTOR_REF(end_vec, i+1) = (l != -1) ? ENTER_FIXNUM(l) : SCHFALSE;
}
}
}
if( max_psm >= 10 ) Free(pm);
if( retval==REG_NOMATCH ) return SCHFALSE;
if( ! retval ) return SCHTRUE;
return ENTER_FIXNUM(retval);
}
/* Filter a vector of strings by regexp RE_STR.
** Stringvec is a NULL-terminated vector of strings;
** filter it in-place, copying the survivors back to compact them.
** Put the number of survivors in nummatch.
*/
int filter_stringvec(scheme_value re_str, char const **stringvec)
{
int re_len = STRING_LENGTH(re_str);/* Passed as a scheme_value because */
char *re_chars = &STRING_REF(re_str,0);/* it might contain nul bytes. */
regex_t re;
char const **p, **q;
/* REG_NOSUB -- We just want to know if it matches or not. */
re.re_endp = re_chars + re_len;
if( regcomp(&re, re_chars, REG_EXTENDED | REG_PEND | REG_NOSUB) ) {
return 0;
}
for(p=q=stringvec; *p; p++) {
char const *s = *p;
if( ! regexec(&re, s, 0, 0, 0) ) *q++ = s;
}
regfree(&re);
return q-stringvec;
}
const char *re_errint2str(int errcode, const regex_t *re)
{
int size = regerror(errcode, re, 0, 0);
char *s = Malloc(char,size);
if(s) regerror(errcode, re, s, size);
return s;
}
void free_re(regex_t *re)
{
regfree(re);
Free(re);
}

15
scsh/rx/re1.h Normal file
View File

@ -0,0 +1,15 @@
/* Exports from re1.c */
int compile_re(scheme_value sre, int sm_p, regex_t **cr);
scheme_value re_search(const regex_t *re, scheme_value str, int start,
scheme_value trans_vec, int max_psm,
scheme_value start_vec, scheme_value end_vec);
/* Filter a vector of strings by a regexp. */
int filter_stringvec(scheme_value re_str, char const **stringvec);
/* Error code -> error msg */
const char *re_errint2str(int errcode, const regex_t *re);
void free_re(regex_t *re); /* Free the malloc'd regexp. */

View File

@ -208,7 +208,7 @@
port->sexp-list
port->string-list
port->list
reduce-port
port-fold reduce-port
port->fdes
read-string
read-string!
@ -283,7 +283,7 @@
directory-files
glob
glob-quote
file-match
; file-match
create-temp-file
temp-file-iterate
@ -418,6 +418,7 @@
exec-path-list))
;;; Kill me?
(define-interface scsh-regexp-interface
(export string-match
regexp-match?
@ -427,14 +428,14 @@
make-regexp
->regexp
regexp?
regexp-exec
regexp-search
regexp-substitute
regexp-substitute/global
regexp-quote))
(define-interface scsh-string-interface
(export substitute-env-vars index rindex))
(export substitute-env-vars string-index string-index-right))
(define-interface scsh-file-names-interface
(export file-name-as-directory
@ -567,13 +568,15 @@
(define-interface scsh-utilities-interface
(export del delete index rindex reduce filter first any first? nth
any? every? mapv mapv! vector-every? copy-vector initialize-vector
(export del delete filter first first? nth
fold fold-right
any every
mapv mapv! vector-every? copy-vector initialize-vector vector-append
vfold vfold-right
check-arg conjoin disjoin negate compose reverse! call/cc
deprecated-proc
deposit-bit-field
real->exact-integer
string-replace! substring-replace!))
real->exact-integer))
;;; semi-standard network magic numbers
;;; should be available on all platforms
@ -683,14 +686,15 @@
char-ascii?
char-set?
copy-char-set
char-set-copy
char-set=
char-set<=
char-set-size
set-char-set!
char-set-adjoin char-set-delete
char-set-adjoin! char-set-delete!
char-set-for-each
reduce-char-set
char-set-fold reduce-char-set
char-set
chars->char-set
@ -702,11 +706,19 @@
char-set-members
char-set-contains?
char-set-every?
char-set-any
char-set-invert
char-set-union
char-set-intersection
char-set-difference
char-set-invert!
char-set-union!
char-set-intersection!
char-set-difference!
char-set:lower-case
char-set:upper-case
char-set:alphabetic
@ -758,7 +770,8 @@
skip-char-set))
(define-interface awk-interface
(export (awk :syntax)))
(export (awk :syntax)
(awk/posix-string :syntax)))
(define-interface scsh-dbm-interface
(export dbm-open

View File

@ -27,12 +27,16 @@
(define-structure error-package (export error warn)
(open signals))
(open signals)
(optimize auto-integrate)
)
(define-structure scsh-utilities scsh-utilities-interface
(open bitwise error-package let-opt scheme)
(files utilities))
(open bitwise error-package loopholes let-opt scheme)
(files utilities)
(optimize auto-integrate)
)
;;; This guy goes into the FOR-SYNTAX part of scsh's syntax exports.
@ -44,27 +48,36 @@
scsh-utilities ; check-arg
scheme
)
(files syntax-helpers))
(files syntax-helpers)
(optimize auto-integrate)
)
;;; The bufpol/{block, line, none} values
(define-structure buffered-io-flags buffered-io-flags-interface
(open defenum-package scheme)
(files (machine bufpol)))
(files (machine bufpol))
(optimize auto-integrate)
)
(define-structure char-set-package char-set-interface
(open error-package
ascii
define-record-types ; JAR's record macro.
scsh-utilities ; For DEPRECATED-PROC
scheme)
(files char-set))
(files char-set)
(optimize auto-integrate)
)
(define-structures ((tty-flags tty-flags-interface)
(scsh-internal-tty-flags scsh-internal-tty-flags-interface))
(open scheme ascii bitwise)
(files (machine tty-consts)))
(files (machine tty-consts))
(optimize auto-integrate)
)
(define-structure scsh-version scsh-version-interface
@ -79,7 +92,9 @@
$current-input-port
$current-output-port
$error-output-port)
(open ports))
(open ports)
(optimize auto-integrate)
)
;;; The scsh-level-0 package is for implementation convenience.
;;; The scsh startup and top-level modules need access to scsh
@ -124,7 +139,8 @@
(scsh-level-0-internals (export set-command-line-args!
init-scsh-hindbrain
init-scsh-vars))
(scsh-regexp-package scsh-regexp-interface))
; (scsh-regexp-package scsh-regexp-interface)
)
(for-syntax (open scsh-syntax-helpers scheme))
(open externals
structure-refs
@ -160,6 +176,13 @@
interrupts ; signal handler code
re-level-0
rx-syntax
string-lib
loopholes ; For my bogus CALL-TERMINALLY implementation.
scheme
)
@ -188,7 +211,7 @@
filesys
fileinfo
glob
filemtch
; filemtch
time ; New in release 0.2.
(machine time_dep)
network ; New in release 0.3.
@ -198,19 +221,25 @@
pty ; New in release 0.4.
sighandlers ; New in release 0.5.
scsh
re
; re
rdelim
))
)
; (optimize auto-integrate)
)
(define-structure defrec-package (export (define-record :syntax))
(open records scheme)
(for-syntax (open scheme error-package receiving))
(files defrec))
(files defrec)
(optimize auto-integrate)
)
(define-structure defenum-package (export (define-enum-constant :syntax)
(define-enum-constants :syntax))
(open scheme)
(files enumconst))
(files enumconst)
(optimize auto-integrate)
)
;;; This code opens so many modules of gruesome, low-level S48 internals
;;; that these two modules are segregated into separate packages, each
@ -229,7 +258,9 @@
filenames ; translate
scheme-level-2-internal ; usual-resumer
scheme)
(files startup))
(files startup)
(optimize auto-integrate)
)
(define-structure scsh-top-package (export parse-switches-and-execute repl)
(open command-processor
@ -249,54 +280,78 @@
; with-current-output-port exit
scsh-level-0-internals ; set-command-line-args! init-scsh-vars
scheme)
(files top meta-arg))
(files top meta-arg)
(optimize auto-integrate)
)
(define-structure field-reader-package scsh-field-reader-interface
(open receiving ; receive
char-set-package
scsh-utilities
scsh-utilities ; nth
error-package ; error
scsh-level-0 ; delimited readers
scsh-regexp-package
; scsh-regexp-package
re-exports
string-lib ; join-strings
let-opt ; optional-arg parsing & defaulting
scheme
)
(files fr))
(files fr)
(optimize auto-integrate)
)
(define-structures
((awk-expander-package (export expand-awk))
((awk-expander-package (export expand-awk expand-awk/obsolete))
(awk-support-package (export next-range next-:range
next-range: next-:range:)))
(open receiving ; receive
scsh-utilities
error-package ; error
scsh-regexp-package
; scsh-regexp-package
; re-exports
sre-syntax-tools
scheme
)
(files awk))
(files awk)
(optimize auto-integrate)
)
(define-structure awk-package awk-interface
(open awk-support-package scsh-regexp-package receiving scheme)
(open awk-support-package ; These packages provide all the stuff
re-exports ; that appears in the code produced by
receiving ; an awk expansion.
scheme)
(for-syntax (open awk-expander-package scheme))
(begin (define-syntax awk expand-awk)))
(begin (define-syntax awk expand-awk)
(define-syntax awk/posix-string expand-awk/obsolete)))
;;; Exports an AWK macro that is just AWK/POSIX-STRING.
(define-structure obsolete-awk-package (export (awk :syntax))
(open awk-package)
(begin (define-syntax awk
(syntax-rules () ((awk body ...) (awk/posix-string body ....))))))
(define-structure scsh
(compound-interface (interface-of scsh-level-0)
(interface-of scsh-startup-package)
scsh-regexp-interface
; scsh-regexp-interface
re-exports-interface
re-old-funs-interface
scsh-field-reader-interface ; new in 0.3
; scsh-dbm-interface
(export repl)
awk-interface)
awk-interface
)
(open structure-refs
scsh-level-0
scsh-level-0-internals
scsh-regexp-package
re-exports
re-old-funs
; scsh-regexp-package
scsh-startup-package
; dbm
awk-package
@ -304,7 +359,9 @@
scheme)
(access scsh-top-package)
(begin (define repl (structure-ref scsh-top-package repl))))
(begin (define repl (structure-ref scsh-top-package repl)))
(optimize auto-integrate)
)
(define-structure scsh-here-string-hax (export)
(open reading
@ -312,11 +369,6 @@
scsh ; Just need the delimited readers.
features ; make-immutable!
scheme)
(files here))
(define-structure test-package (export test-proc)
(open scsh-regexp-package scheme)
(begin (define (test-proc p)
(regexp-substitute p
(string-match "(foo)(.*)(bar)" "Hello foo Olin bar quux")
'post 3 1 2 'pre))))
(files here)
(optimize auto-integrate)
)

View File

@ -7,7 +7,8 @@
;;; and dynamic env to be gc'd away, since this procedure never returns.
(define (call-terminally thunk)
(with-continuation #f (lambda () (thunk) (exit 0))))
(with-continuation (loophole :escape #f) ; Bogus
(lambda () (thunk) (exit 0))))
;; Alternatively: (with-continuation #f thunk)
;;; More portably, but less usefully:
@ -125,7 +126,7 @@
;; Main loop.
(let split ((i 0))
(cond ((index clist #\: i) =>
(cond ((string-index clist #\: i) =>
(lambda (colon)
(cons (substring clist i colon)
(split (+ colon 1)))))
@ -207,7 +208,7 @@
(define (with-env* alist-delta thunk)
(let* ((old-env #f)
(new-env (reduce (lambda (alist key/val)
(new-env (fold (lambda (key/val alist)
(alist-update (car key/val) (cdr key/val) alist))
(env->alist)
alist-delta)))
@ -435,12 +436,12 @@
;;; (port->list reader port)
;;; Repeatedly applies READER to PORT, accumulating results into a list.
;;; On EOF, returns the list of items thus collected.
;;; (reduce-port port reader op . seeds)
;;; (port-fold port reader op . seeds)
;;; Repeatedly read things from PORT with READER. Each time you read
;;; some value V, compute a new set of seeds with (apply OP V SEEDS).
;;; (More than 1 seed means OP must return multiple values).
;;; On eof, return the seeds.
;;; PORT->LIST is just (REDUCE-PORT PORT READ CONS '())
;;; On eof, return the seeds: (apply value SEEDS).
;;; PORT->LIST is just (PORT-FOLD PORT READ CONS '())
(define (run/port+proc* thunk)
(receive (r w) (pipe)
@ -505,13 +506,16 @@
(define (port->string-list port)
(port->list read-line port))
(define (reduce-port port reader op . seeds)
(letrec ((reduce (lambda seeds
(define (port-fold port reader op . seeds)
(letrec ((fold (lambda seeds
(let ((x (reader port)))
(if (eof-object? x) (apply values seeds)
(call-with-values (lambda () (apply op x seeds))
reduce))))))
(apply reduce seeds)))
fold))))))
(apply fold seeds)))
(define reduce-port
(deprecated-proc port-fold 'reduce-port "Use port-fold instead."))
;;; Not defined:
;;; (field-reader field-delims record-delims)
@ -671,7 +675,7 @@
(define (exec-path/env prog env . arglist)
(flush-all-ports)
(let ((prog (stringify prog)))
(if (index prog #\/)
(if (string-index prog #\/)
;; Contains a slash -- no path search.
(%exec prog (cons prog arglist) env)

1289
scsh/stringlib.scm Normal file

File diff suppressed because it is too large Load Diff

380
scsh/stringpack.scm Normal file
View File

@ -0,0 +1,380 @@
(define-interface string-lib-interface
(export
;; string-map proc s [start end] -> s
(string-map (proc ((proc (:char) :char)
:string
&opt :exact-integer :exact-integer)
:string))
;; string-map! proc s [start end] -> unspecific
(string-map! (proc ((proc (:char) :values)
:string
&opt :exact-integer :exact-integer)
:unspecific))
;; string-fold kons knil s [start end] -> value
;; string-fold-right kons knil s [start end] -> value
((string-fold string-fold-right)
(proc ((proc (:char :value) :value)
:value :string
&opt :exact-integer :exact-integer)
:value))
;; string-unfold p f g seed -> string
(string-unfold (proc ((proc (:value) :boolean)
(proc (:value) :char)
(proc (:value) :value)
:value)
:string))
; Enough is enough.
; ;; string-unfoldn p f g seed ... -> string
; (string-unfoldn (proc ((procedure :values :boolean)
; (procedure :values :char)
; (procedure :values :values)
; &rest :value)
; :string))
;; string-tabulate proc len -> string
(string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer)
:string))
;; string-for-each proc s [start end] -> unspecific
;; string-iter proc s [start end] -> unspecific
((string-for-each string-iter)
(proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer)
:unspecific))
;; string-every? pred s [start end]
;; string-any pred s [start end]
(string-every?
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
:boolean))
(string-any
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
:value))
;; string-compare string1 string2 lt-proc eq-proc gt-proc
;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc
((string-compare string-compare-ci)
(proc (:string :string (proc (:exact-integer) :values)
(proc (:exact-integer) :values)
(proc (:exact-integer) :values))
:values))
;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt
;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt
((substring-compare substring-compare-ci)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer
(proc (:exact-integer) :values)
(proc (:exact-integer) :values)
(proc (:exact-integer) :values))
:values))
;; string< string1 string2
((string= string< string> string<= string>= string<>
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>)
(proc (&rest :string) :value))
;; substring< string1 start1 end1 string2 start2 end2
((substring= substring<> substring-ci= substring-ci<>
substring< substring> substring-ci< substring-ci>
substring<= substring>= substring-ci<= substring-ci>=)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer)
:value))
;; string-upper-case? string [start end]
;; string-lower-case? string [start end]
((string-upper-case? string-lower-case?)
(proc (:string &opt :exact-integer :exact-integer) :boolean))
;; capitalize-string string [start end]
;; capitalize-words string [start end]
;; string-downcase string [start end]
;; string-upcase string [start end]
;; capitalize-string! string [start end]
;; capitalize-words! string [start end]
;; string-downcase! string [start end]
;; string-upcase! string [start end]
((capitalize-string capitalize-words string-downcase string-upcase)
(proc (:string &opt :exact-integer :exact-integer) :string))
((capitalize-string! capitalize-words! string-downcase! string-upcase!)
(proc (:string &opt :exact-integer :exact-integer) :unspecific))
;; string-take string nchars
;; string-drop string nchars
((string-take string-drop) (proc (:string :exact-integer) :string))
;; string-padl string k [char start end]
;; string-padr string k [char start end]
((string-padl string-padr)
(proc (:string :exact-integer &opt :char :exact-integer :exact-integer)
:string))
;; string-trim string [char/char-set/pred start end]
;; string-triml string [char/char-set/pred start end]
;; string-trimr string [char/char-set/pred start end]
((string-trim string-triml string-trimr)
(proc (:string &opt :value :exact-integer :exact-integer)
:string))
;; string-filter char/char-set/pred string [start end]
;; string-delete char/char-set/pred string [start end]
((string-filter string-delete)
(proc (:value :string &opt :exact-integer :exact-integer) :string))
;; string-index string char/char-set/pred [start end]
;; string-index-right string char/char-set/pred [end start]
;; string-skip string char/char-set/pred [start end]
;; string-skip-right string char/char-set/pred [end start]
((string-index string-index-right string-skip string-skip-right)
(proc (:string :value &opt :exact-integer :exact-integer)
:value))
;; string-prefix-count string1 string2
;; string-suffix-count string1 string2
;; string-prefix-count-ci string1 string2
;; string-suffix-count-ci string1 string2
((string-prefix-count string-prefix-count-ci
string-suffix-count string-suffix-count-ci)
(proc (:string :string) :exact-integer))
;; substring-prefix-count string1 start1 end1 string2 start2 end2
;; substring-suffix-count string1 start1 end1 string2 start2 end2
;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2
;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2
((substring-prefix-count substring-prefix-count-ci
substring-suffix-count substring-suffix-count-ci)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer)
:exact-integer))
;; string-prefix? string1 string2
;; string-suffix? string1 string2
;; string-prefix-ci? string1 string2
;; string-suffix-ci? string1 string2
((string-prefix? string-prefix-ci?
string-suffix? string-suffix-ci?)
(proc (:string :string) :boolean))
;; substring-prefix? string1 start1 end1 string2 start2 end2
;; substring-suffix? string1 start1 end1 string2 start2 end2
;; substring-prefix-ci? string1 start1 end1 string2 start2 end2
;; substring-suffix-ci? string1 start1 end1 string2 start2 end2
((substring-prefix? substring-prefix-ci?
substring-suffix? substring-suffix-ci?)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer)
:boolean))
;; substring? pattern string [start end]
;; substring-ci? pattern string [start end]
((substring? substring-ci?)
(proc (:string :string &opt :exact-integer :exact-integer)
:value))
;; string-fill! string char [start end]
(string-fill! (proc (:string :char &opt :exact-integer :exact-integer)
:unspecific))
;; string-copy! to tstart from [fstart fend]
(string-copy! (proc (:string :exact-integer :string
&opt :exact-integer :exact-integer)
:unspecific))
;; string-copy s [start end] -> string
;; substring s start [end] -> string
(string-copy (proc (:string &opt :exact-integer :exact-integer) :string))
(substring (proc (:string :exact-integer &opt :exact-integer) :string))
;; string-reverse s [start end]
;; string-reverse! s [start end]
(string-reverse (proc (:string &opt :exact-integer :exact-integer) :string))
(string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific))
;; reverse-list->string char-list
;; string->list s [start end]
;; string-concat string-list
;; string-concat/shared string-list
;; string-append/shared s ...
(reverse-list->string (proc (:value) :string))
(string->list (proc (:string &opt :exact-integer :exact-integer) :value))
((string-concat string-concat/shared) (proc (:value) :string))
(string-append/shared (proc (&rest :string) :string))
;; xsubstring s from [to start end]
;; string-xcopy! target tstart s from [to start end]
(xsubstring (proc (:string :exact-integer &opt
:exact-integer :exact-integer :exact-integer)
:string))
(string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt
:exact-integer :exact-integer :exact-integer)
:unspecific))
;; string-null? s
(string-null? (proc (:string) :boolean))
(join-strings (proc (:value &opt :string :symbol) :string))
;; Here are the R4RS procs
(string? (proc (:value) :boolean))
(make-string (proc (:exact-integer &opt :char) :string))
(string (proc (&rest :char) :string))
(string-length (proc (:string) :exact-integer))
(string-ref (proc (:string :exact-integer) :char))
(string-set! (proc (:string :exact-integer :char) :unspecific))
; Not provided by string-lib.
;((string=? string-ci=? string<? string-ci<?
; string>? string-ci>? string<=? string-ci<=?
; string>=? string-ci>=?) (proc (:string :string) :boolean))
;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!,
;; and STRING->LIST. The string-lib types are different -- extended.
;(substring (proc (:string :exact-integer :exact-integer) :string))
;(string-copy (proc (:string) :string))
;(string-fill! (proc (:string :char) :unspecific))
;(string->list (proc (:string) :value))
(string-append (proc (&rest :string) :string))
(list->string (proc (:value) :string))
))
(define-interface string-lib-internals-interface
(export
(parse-final-start+end (proc ((procedure :values :values) :string :value)
(some-values :exact-integer :exact-integer)))
(parse-start+end (proc ((procedure :values :values) :string :value)
(some-values :exact-integer :exact-integer :value)))
(check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer)
:unspecific))
(make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean))
:vector))))
(define-structures ((string-lib string-lib-interface)
(string-lib-internals string-lib-internals-interface))
(open char-set-package
receiving
error-package
let-opt
structure-refs
scsh-utilities ; FOLD-RIGHT
scheme)
(access scheme ; Original SUBSTRING
silly) ; Primitive reverse-list->string
(files stringlib)
(optimize auto-integrate))
;;; CPP Lib
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Character->Character Partial functions
;;; Many of these types are pretty weak, but there is no way to
;;; specify that a parameter must be a particular record type.
;;; Every little bit helps, though.
(define-interface ccp-lib-interface
(export
;; ccp? x -> boolean
(ccp? (proc (:value) :boolean))
;; ccp-domain ccp -> char-set
(ccp-domain (proc (:value) :value)) ; Not very informative.
;; ccp-copy ccp -> ccp
(ccp-copy (proc (:value) :value))
;; ccp= ccp1 ccp2 ...
;; ccp<= ccp1 ccp2 ...
((ccp= ccp<=) (proc (&rest :value) :boolean)) ; Not very informative.
;; ccp-fold kons knil ccp -> value
(ccp-fold (proc ((proc (:char :char :value) :value) :value :value) :value))
;; ccp-for-each proc ccp
(ccp-for-each (proc ((proc (:char :char) :values)) :unspecific))
;; ccp->alist ccp -> alist
(ccp->alist (proc (:value) :value))
;; ccp-restrict ccp cset -> ccp
;; ccp-restrict! ccp cset -> ccp
((ccp-restrict ccp-restrict!) (proc (:value :value) :value))
;; ccp-adjoin ccp from-char1 to-char1 ... -> ccp
;; ccp-adjoin! ccp from-char1 to-char1 ... -> ccp
;; ccp-delete ccp from-char1 ... -> ccp
;; ccp-delete! ccp from-char1 ... -> ccp
((ccp-adjoin ccp-adjoin!) (proc (:value &rest :char) :value))
((ccp-delete ccp-delete!) (proc (:value &rest :char) :value))
;; ccp-extend ccp1 ... -> ccp
;; ccp-extend! ccp1 ... -> ccp
((ccp-extend ccp-extend!) (proc (&rest :value) :value))
;; ccp-compose ccp1 ... -> ccp
(ccp-compose (proc (&rest :value) :value))
;; alist->ccp char/char-alist [ccp] -> ccp
;; alist->ccp! char/char-alist [ccp] -> ccp
((alist->ccp alist->ccp!) (proc (:value &opt :value) :value))
;; proc->ccp proc [domain ccp] -> ccp
;; proc->ccp! proc [domain ccp] -> ccp
((proc->ccp proc->ccp!) (proc ((proc (:char) :char) &opt :value :value)
:value))
;; constant-ccp char [domain ccp] -> ccp
;; constant-ccp! char domain ccp -> ccp
((constant-ccp constant-ccp!) (proc (:char &opt :value :value) :value))
;; ccp/mappings from1 to1 ... -> ccp
;; extend-ccp/mappings ccp from1 to1 ... -> ccp
;; extend-ccp/mappings! ccp from1 to1 ... -> ccp
(ccp/mappings (proc (&rest :value) :value))
((extend-ccp/mappings extend-ccp/mappings!)
(proc (:value &rest :value) :value))
;; construct-ccp ccp elt ... -> ccp
;; construct-ccp! ccp elt ... -> ccp
((construct-ccp construct-ccp!) (proc (:value &rest :value) :value))
;; ccp-unfold p f g seed -> ccp
(ccp-unfold (proc ((proc (:value) :boolean)
(procedure :value (some-values :char :char))
(proc (:value) :value)
:value)
:value))
;; tr ccp string [start end] -> string
;; ccp-map ccp string [start end] -> string
;; ccp-map! ccp string [start end]
;; ccp-app ccp char -> char or false
((tr ccp-map)
(proc (:value :string &opt :exact-integer :exact-integer) :string))
(ccp-map! (proc (:value :string &opt :exact-integer :exact-integer) :unspecific))
(ccp-app (proc (:value :char) :value))
;; Primitive CCP's.
ccp:0 ccp:1 ccp:upcase ccp:downcase
))
(define-structure ccp-lib ccp-lib-interface
(open char-set-package
ascii
defrec-package
string-lib
let-opt
receiving
scsh-utilities ; every
error-package
scheme)
(files ccp)
(optimize auto-integrate))

View File

@ -855,21 +855,6 @@ scheme_value df_scm_sort_filevec(long nargs, scheme_value *args)
return SCHFALSE;
}
scheme_value df_filter_stringvec(long nargs, scheme_value *args)
{
extern char *filter_stringvec(const char *, char const ** , int *);
scheme_value ret1;
char *r1;
int r2;
cig_check_nargs(3, nargs, "filter_stringvec");
r1 = filter_stringvec(cig_string_body(args[2]), (char const ** )AlienVal(args[1]), &r2);
ret1 = VECTOR_REF(*args,0);
{AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);}
VECTOR_REF(*args,1) = ENTER_FIXNUM(r2);
return ret1;
}
scheme_value df_scm_envvec(long nargs, scheme_value *args)
{
extern char** scm_envvec(int *);

View File

@ -850,23 +850,26 @@
;;; I do this one in C, I'm not sure why:
;;; It is used by MATCH-FILES.
(define-foreign %filter-C-strings!
(filter_stringvec (string pattern) ((C "char const ** ~a") cvec))
static-string ; error message -- #f if no error.
integer) ; number of files that pass the filter.
;;; 99/7: No one is using this function, so I'm commenting it out.
;;; Later, we could tune up the globber or regexp file-matcher to use
;;; it (but should shift it into the rx directory). But I should also go
;;; to a file-at-a-time cursor model for directory fetching. -Olin
;(define-foreign %filter-C-strings!
; (filter_stringvec (string-desc pattern) ((C "char const ** ~a") cvec))
; integer) ; number of files that pass the filter.
(define (match-files regexp . maybe-dir)
(let ((dir (:optional maybe-dir ".")))
(check-arg string? dir match-files)
(receive (err cvec numfiles)
(%open-dir (ensure-file-name-is-nondirectory dir))
(if err (errno-error err match-files regexp dir))
(receive (err numfiles) (%filter-C-strings! regexp cvec)
(if err (error err match-files))
(%sort-file-vector cvec numfiles)
(let ((files (C-string-vec->Scheme&free cvec numfiles)))
(vector->list files))))))
;(define (match-files regexp . maybe-dir)
; (let ((dir (:optional maybe-dir ".")))
; (check-arg string? dir match-files)
; (receive (err cvec numfiles)
; (%open-dir (ensure-file-name-is-nondirectory dir))
; (if err (errno-error err match-files regexp dir))
; (receive (numfiles) (%filter-C-strings! regexp cvec)
; ;(if err (error err match-files))
; (%sort-file-vector cvec numfiles)
; (let ((files (C-string-vec->Scheme&free cvec numfiles)))
; (vector->list files))))))
;;; Environment manipulation
@ -875,7 +878,7 @@
;;; (var . val) / "var=val" rep conversion:
(define (split-env-string var=val)
(let ((i (index var=val #\=)))
(let ((i (string-index var=val #\=)))
(if i (values (substring var=val 0 i)
(substring var=val (+ i 1) (string-length var=val)))
(error "No \"=\" in environment string" var=val))))

View File

@ -16,30 +16,18 @@
(define (delete pred lis)
(filter (lambda (x) (not (pred x))) lis))
(define (index str c . maybe-start)
(let ((start (max 0 (:optional maybe-start 0)))
(len (string-length str)))
(do ((i start (+ 1 i)))
((or (>= i len)
(char=? c (string-ref str i)))
(and (< i len) i)))))
(define (fold kons knil lis)
(let lp ((lis lis) (ans knil))
(if (pair? lis)
(lp (cdr lis) (kons (car lis) ans))
ans)))
(define (rindex str c . maybe-start)
(let* ((len (string-length str))
(start (min (:optional maybe-start len)
len)))
(do ((i (- start 1) (- i 1)))
((or (< i 0)
(char=? c (string-ref str i)))
(and (>= i 0) i)))))
;;; (f (f (f zero x1) x2) x3)
;;; [Richard's does (f x3 (f x2 (f x1 zero)))
(define (reduce f zero l)
(letrec ((lp (lambda (val rest)
(if (pair? rest) (lp (f val (car rest)) (cdr rest))
val))))
(lp zero l)))
(define (fold-right kons knil lis)
(let recur ((lis lis))
(if (pair? lis)
(let ((head (car lis))) ; Won't need LIS after RECUR call.
(kons head (recur (cdr lis))))
knil)))
(define (filter pred list)
(letrec ((filter (lambda (list)
@ -62,8 +50,6 @@
(lp (cdr list))))))))
(lp list)))
(define any first)
;;; Returns the first true value produced by PRED, not the list element
;;; that satisfied PRED.
@ -74,26 +60,27 @@
(lp (cdr list)))))))
(lp list)))
(define any? first?)
(define any first?)
(define (every? pred list)
(letrec ((lp (lambda (list)
(define (every pred list)
(or (not (pair? list))
(and (pred (car list))
(lp (cdr list)))))))
(lp list)))
(let lp ((head (car list)) (tail (cdr list)))
(if (pair? tail)
(and (pred head) (lp (car tail) (cdr tail)))
(pred head))))) ; Tail-call the last PRED call.
(define (mapv f v)
(let* ((len (vector-length v))
(ans (make-vector len)))
(do ((i 0 (+ i 1)))
((= i len) ans)
((>= i len) ans)
(vector-set! ans i (f (vector-ref v i))))))
(define (mapv! f v)
(let ((len (vector-length v)))
(do ((i 0 (+ i 1)))
((= i len) v)
((>= i len) v)
(vector-set! v i (f (vector-ref v i))))))
(define (vector-every? pred v)
@ -115,9 +102,42 @@
((< i 0) v)
(vector-set! v i (init i)))))
(define (vector-append . vecs)
(let* ((vlen (fold (lambda (v len) (+ (vector-length v) len)) 0 vecs))
(ans (make-vector vlen)))
(let lp1 ((vecs vecs) (to 0))
(if (pair? vecs)
(let* ((vec (car vecs))
(len (vector-length vec)))
(let lp2 ((from 0) (to to))
(cond ((< from len)
(vector-set! ans to (vector-ref vec from))
(lp2 (+ from 1) (+ to 1)))
(else (lp1 (cdr vecs) to)))))))
ans))
(define (vfold kons knil v)
(let ((len (vector-length v)))
(do ((i 0 (+ i 1))
(ans knil (kons (vector-ref v i) ans)))
((>= i len) ans))))
(define (vfold-right kons knil v)
(do ((i (- (vector-length v) 1) (- i 1))
(ans knil (kons (vector-ref v i) ans)))
((< i 0) ans)))
;;; We loophole the call to ERROR -- the point is that perhaps the
;;; user will interact with a breakpoint, and proceed with a new
;;; value, which we will then pass to a new invocation of CHECK-ARG
;;; for approval.
(define (check-arg pred val caller)
(if (pred val) val
(check-arg pred (error "Bad argument" val pred caller) caller)))
(check-arg pred
(loophole :value (error "Bad argument" val pred caller))
caller)))
(define (conjoin f g)
(lambda args (and (apply f args) (apply g args))))
@ -170,31 +190,3 @@
(let ((f (round x)))
(if (inexact? f) (inexact->exact f) f)))
;;; Copy string SOURCE into TARGET[start,...]
(define (string-replace! target start source)
(let ((len (string-length source)))
(do ((i (+ start len -1) (- i 1))
(j (- len 1) (- j 1)))
((< j 0) target)
(string-set! target i (string-ref source j)))))
;;; Copy SOURCE[source-start, source-end) into TARGET[start,)
(define (substring-replace! target start source source-start source-end)
(do ((i (+ start (- source-end source-start) -1) (- i 1))
(j (- source-end 1) (- j 1)))
((< j source-start) target)
(string-set! target i (string-ref source j))))
;;; Compute (... (f (f (f zero c0) c1) c2) ...)
(define (string-reduce f zero s)
(let ((len (string-length s)))
(let lp ((v zero) (i 0))
(if (= i len)
v
(lp (f v (string-ref s i)) (+ i 1))))))