diff --git a/scsh/awk.scm b/scsh/awk.scm index e512b55..e95dcc4 100644 --- a/scsh/awk.scm +++ b/scsh/awk.scm @@ -1,10 +1,27 @@ ;;; An awk loop, after the design of David Albertz and Olin Shivers. ;;; Copyright (c) 1994 by Olin Shivers. +;;; This uses the new RX SRE syntax. Defines a Clinger-Rees expander for +;;; the old, pre-SRE syntax AWK, and one for the new SRE-syntax AWK. + +;;; Imports: ;;; - Requires RECEIVE from RECEIVING package. ;;; - Would require DESTRUCTURE from DESTRUCTURING package, but it appears ;;; to be broken, so we hack it w/cars and cdrs. -;;; - Requires STRING-MATCH from SCSH package. +;;; - Requires STRING-MATCH and STRING-MATCH? from RE-EXPORTS package. +;;; - Requires regexp manipulation stuff from SRE-SYNTAX-TOOLS +;;; - Requires ERROR from ERROR-PACKAGE. +;;; - Requires ANY and FILTER frm SCSH-UTILITIES. +;;; +;;; Needs error-package receiving sre-syntax-tools scsh-utilities +;;; +;;; Exports: +;;; (expand-awk exp r c) Clinger-Rees macro expander, new syntax +;;; (expand-awk/obsolete exp r c) Clinger-Rees macro expander, old syntax +;;; +;;; next-range next-:range These four functions are used in the +;;; next-range: next-:range: code output by the expander. + ;;; Examples: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -15,13 +32,14 @@ ;;; ;;; ;;; Count the number of non-comment lines of code in my Scheme source. ;;; (awk (read-line) (line) ((nlines 0)) -;;; ("^[ \t]*;" nlines) ; A comment line. -;;; (else (+ nlines 1))) ; Not a comment line. +;;; ((: bos (* white) ";") nlines) ; A comment line. +;;; (else (+ nlines 1))) ; Not a comment line. ;;; -;;; ;;; Read numbers, counting the evens and odds. +;;; ;;; Read numbers, counting the evens and odds, +;;; ;;; and printing out sign information. ;;; (awk (read) (val) ((evens 0) (odds 0)) -;;; ((zero? val) (display "zero ") (values evens odds)) ; Tell me about -;;; ((> val 0) (display "pos ") (values evens odds)) ; sign, too. +;;; ((zero? val) (display "zero ") (values evens odds)) +;;; ((> val 0) (display "pos ") (values evens odds)) ;;; (else (display "neg ") (values evens odds)) ;;; ;;; ((even? val) (values (+ evens 1) odds)) @@ -33,6 +51,17 @@ ;;; . ;;; . ;;; ) +;;; +;;; ::= (ELSE body ...) +;;; | (:RANGE test1 test2 body ...) ; RANGE :RANGE RANGE: :RANGE: +;;; | (AFTER body ...) +;;; | (test => proc) +;;; | (test ==> vars body ...) +;;; | (test body ...) +;;; +;;; test ::= integer | sre | (WHEN exp) | exp +;;; (sre/exp ambiguities resolved in favor of SRE) + ;;; This macro is written using Clinger/Rees explicit-renaming low-level ;;; macros. So it is pretty ugly. It takes a little care to generate @@ -141,17 +170,23 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define (expand-awk exp r c) +;;; If STRING-REGEXPS? is true, we use the old, obsolete syntax, where +;;; a test form that is a string, such as "shivers|bdc", is treated as +;;; a regular expression in the Posix string syntax. Otherwise, we use the +;;; new SRE syntax, where strings are treated as SRE constants. + +(define (expand-awk exp r c) (really-expand-awk exp r c #f)) +(define (expand-awk/obsolete exp r c) (really-expand-awk exp r c #t)) + +(define (really-expand-awk exp r c string-regexps?) (let* ((%lambda (r 'lambda)) ; Bind a mess of keywords. (%let (r 'let)) - (%receive (r 'receive)) - (%values (r 'values)) (%if (r 'if)) (%eof-object? (r 'eof-object?)) (%after (r 'after)) (%else (r 'else)) (%+ (r '+)) - (%make-regexp (r 'make-regexp)) + (%rx (r 'rx)) (gensym (let ((i 0)) (lambda (s) @@ -169,7 +204,9 @@ ;; Rip the form apart. (reader-exp (cadr exp)) - (rec/field-vars (caddr exp)) + ;; Replace #F's with gensym'd variables in the record/field vars. + (rec/field-vars (map (lambda (v) (or v (r (gensym "anon-rfval")))) + (caddr exp))) (rec-var (car rec/field-vars)) ; The var bound to the record. (rest (cdddr exp))) ; Stuff after the rec&field-vars. @@ -178,40 +215,48 @@ (values #f (car rest) (cdr rest)) ; form. (values (car rest) (cadr rest) (cddr rest))) - ;; Some analysis: what have we got? - ;; Range clauses, else clauses, line num tests,... - (let* ((recnum-tests? ; Do any of the clauses test the record - (any? (lambda (clause) ; count? (I.e., any integer tests?) - (let ((test (car clause))) - (or (integer? test) - (and (range? clause) - (or (integer? (cadr clause)) - (integer? (caddr clause))))))) - clauses)) + ;; If we are doing the old, obsolete Posix-string syntax, map + ;; the clause tests over to the new syntax. + (let* ((clauses (if string-regexps? + (map (lambda (clause) + (hack-clause-for-posix-string-syntax clause r c)) + clauses) + clauses)) + + ;; Some analysis: what have we got? + ;; Range clauses, else clauses, line num tests,... + (recnum-tests? ; Do any of the clauses test the record + (any (lambda (clause) ; count? (I.e., any integer tests?) + (let ((test (car clause))) + (or (integer? test) + (and (range? clause) + (or (integer? (cadr clause)) + (integer? (caddr clause))))))) + clauses)) ;; If any ELSE clauses, bind this to the var in which we ;; will keep the else state, otherwise #f. - (else-var (and (any? (lambda (clause) - (c (car clause) %else)) - clauses) - (r 'else))) + (else-var (and (any (lambda (clause) + (c (car clause) %else)) + clauses) + (r 'else-state))) - ;; We compile all of the regexp patterns into regexp + ;; We compile all of the *static* regexp patterns into regexp ;; data structures outside the AWK loop. So we need to ;; make a list of all the regexps that are used as tests. (patterns (apply append (map (lambda (clause) (let ((test (car clause))) - (cond ((string? test) (list test)) + (cond ((sre-form? test r c) (list test)) ((range? clause) (let ((t1 (cadr clause)) (t2 (caddr clause))) - (append (if (string? t1) + (append (if (sre-form? t1 r c) (list t1) '()) - (if (string? t2) - (list t2) - '())))) + (if (sre-form? t2 r c) + (list t2) + '())))) (else '())))) clauses))) @@ -223,16 +268,28 @@ (if (member pat ans) ans (cons pat ans))) '()))) - ;; An alist matching regexp patterns with the vars to which - ;; we will bind their compiled regexp data structure. - (pats/vars (map (lambda (p) (cons p (r (gensym "re.")))) - patterns)) + (pats-static? (map (lambda (sre) + (static-regexp? (parse-sre sre r c))) + patterns)) - ;; A LET-list binding the regexp vars to their compiled regexps. - (regexp-inits (map (lambda (p/v) - `(,(cdr p/v) (,%make-regexp ,(car p/v)))) - pats/vars)) + ;; An alist matching each pattern with the exp that refers + ;; to it -- a var if it's static, a Scheme (RX ...) exp otw. + (pats/refs (map (lambda (pat static?) + (cons pat + (if static? + (r (gensym "re.")) + `(,%rx ,pat)))) + patterns pats-static?)) + ;; A LET-list binding the regexp vars to their + ;; compiled static regexps. + (regexp-inits (apply append + (map (lambda (p/r static?) + (if static? + `((,(cdr p/r) (,%rx ,(car p/r)))) + '())) + pats/refs + pats-static?))) ;; Make a list of state vars for the range clauses. ;; For each range clause, we need a boolean var to track @@ -279,7 +336,7 @@ (loop-body (awk-loop-body lp-var rec-var else-var rec-counter range-vars svars - clauses pats/vars r c)) + clauses pats/refs r c)) ;; Variables that have to be updated per-iteration, as a LET list. ;; Note that we are careful not to increment the record counter @@ -302,12 +359,32 @@ `(,%if (,%eof-object? ,rec-var) ,after-exp ,loop-body)))))))) +;;; This maps a clause in the old, obsolete syntax over to a clause +;;; in the new, SRE syntax. +(define (hack-clause-for-posix-string-syntax clause r c) + (let ((hack-simple-test (lambda (test) + (cond ((string? test) + `(,(r 'posix-string) ,test)) + ((integer? test) test) + (else `(,(r 'when) ,test))))) + (test (car clause))) + (cond ((range-keyword? test r c) + `(,test ,(hack-simple-test (cadr clause)) + ,(hack-simple-test (caddr clause)) + . ,(cdddr clause))) + + ((or (c test (r 'else)) + (c test (r 'after))) + clause) + + (else `(,(hack-simple-test test) . ,(cdr clause)))))) + ;;; Expand into the body of the awk loop -- the code that tests & executes ;;; each clause, and then jumps to the top of the loop. (define (awk-loop-body lp-var rec-var else-var rec-counter - range-vars svars clauses pats/vars r c) + range-vars svars clauses pats/refs r c) (let ((clause-vars (if else-var (cons else-var svars) svars)) (loop-vars (append (if rec-counter (list rec-counter) '()) range-vars @@ -325,7 +402,7 @@ (let ((tail (expand (cdr clauses) (cdr range-vars)))) (expand-range-clause clause tail (car range-vars) rec-var else-var rec-counter svars - pats/vars + pats/refs r c))) ((c test %after) ; An AFTER clause. Skip it. @@ -339,7 +416,7 @@ (let ((tail (expand (cdr clauses) range-vars))) (expand-simple-clause clause tail rec-var else-var rec-counter svars - pats/vars r c))))) + pats/refs r c))))) ;; No clauses -- just jump to top of loop. `(,lp-var . ,loop-vars))))) @@ -347,49 +424,92 @@ ;;; Make a Scheme expression out of a test form. ;;; Integer i => (= i ) -;;; String s => (regexp-exec s ) +;;; SRE s => (regexp-search ) +;;; (when e) => e ;;; Expression e => e +;;; +;;; If FOR-VALUE? is true, then we do regexp searches with REGEXP-SEARCH, +;;; otherwise, we use the cheaper REGEXP-SEARCH?. -(define (->simple-clause-test test-form rec-var rec-counter pats/vars r) +(define (->simple-clause-test test-form for-value? rec-var rec-counter pats/refs r c) (cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form)) - ((string? test-form) - (let ((re-var (cond ((assoc test-form pats/vars) => cdr) - (else (error "Impossible AWK error -- unknown regexp" - test-form pats/vars))))) - `(,(r 'regexp-exec) ,re-var ,rec-var))) + + ((sre-form? test-form r c) + `(,(r (if for-value? 'regexp-search 'regexp-search?)) + ,(cdr (assoc test-form pats/refs)) + ,rec-var)) + + ((and (pair? test-form) + (c (r 'when) (car test-form))) + (if (= 2 (length test-form)) (cadr test-form) + (error "Illegal WHEN test in AWK" test-form))) + (else test-form))) (define (expand-simple-clause clause tail rec-var else-var rec-counter svars - pats/vars r c) + pats/refs r c) (let* ((%let (r 'let)) - (%= (r '=)) - (%string-match (r 'string-match)) (%arrow (r '=>)) + (%long-arrow (r '==>)) (%if (r 'if)) + (%mss (r 'match:substring)) (test (car clause)) - (test (->simple-clause-test test rec-var rec-counter pats/vars r)) + (mktest (lambda (for-value?) + (->simple-clause-test test for-value? rec-var + rec-counter pats/refs r c))) ;; Is clause of the form (test => proc) (arrow? (and (= 3 (length clause)) (c (cadr clause) %arrow))) + ;; How about (test ==> (var ...) body ...)? + (long-arrow? (and (< 3 (length clause)) + (c (cadr clause) %long-arrow))) + (null-clause-list (null-clause-action else-var svars r)) ;; The core form conditionally executes the body. ;; It returns the new else var and the new state vars, if any. - (core (if arrow? - (let* ((tv (r 'tval)) ; APP is the actual - (app `(,(caddr clause) ,tv))) ; body: (proc tv). - `(,%let ((,tv ,test)) - (,%if ,tv - ,(clause-action (list app) else-var svars r c) - . ,null-clause-list))) + (core (cond (arrow? + (let* ((tv (r 'tval)) ; APP is the actual + (app `(,(caddr clause) ,tv)) ; body: (proc tv). + (test (mktest #t))) + `(,%let ((,tv ,test)) + (,%if ,tv + ,(clause-action (list app) else-var svars r c) + . ,null-clause-list)))) - `(,%if ,test ,(clause-action (cdr clause) else-var svars r c) - . ,null-clause-list))) + (long-arrow? + (let* ((tv (r 'tval)) + (test (mktest #t)) + (bindings ; List of LET bindings for submatches. + (let lp ((i 0) + (vars (caddr clause)) + (bindings '())) + (if (pair? vars) + (let ((var (car vars))) + (lp (+ i 1) (cdr vars) + (if var + `((,var (,%mss ,tv ,i)) . ,bindings) + bindings))) ; #F = "don't-care" + bindings)))) + + `(,%let ((,tv ,test)) + (,%if ,tv + (,%let ,bindings ; Bind submatches. + . ,(deblock (clause-action (cdddr clause) + else-var svars + r c) + r c)) + . ,null-clause-list)))) + + (else + `(,%if ,(mktest #f) ,(clause-action (cdr clause) + else-var svars r c) + . ,null-clause-list)))) (loop-vars (if else-var (cons else-var svars) svars))) @@ -402,7 +522,7 @@ (define (expand-range-clause clause tail range-var rec-var else-var rec-counter svars - pats/vars r c) + pats/refs r c) (let* ((start-test (cadr clause)) (stop-test (caddr clause)) (body (cdddr clause)) @@ -419,10 +539,10 @@ (else (error "Unrecognised range keyword!" clause))))) ;; Convert the start and stop test forms to code. - (start-test (->simple-clause-test start-test rec-var - rec-counter pats/vars r)) - (stop-test (->simple-clause-test stop-test rec-var - rec-counter pats/vars r)) + (start-test (->simple-clause-test start-test #f rec-var + rec-counter pats/refs r c)) + (stop-test (->simple-clause-test stop-test #f rec-var + rec-counter pats/refs r c)) (start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them. (stop-thunk `(,%lambda () ,stop-test)) @@ -444,7 +564,6 @@ (tail-exps (deblock tail r c)) (%if (r 'if)) - (%receive (r 'receive)) (%let (r 'let)) ;; We are hard-wiring the else var to #t after this, so the core diff --git a/scsh/ccp.scm b/scsh/ccp.scm new file mode 100644 index 0000000..16dcece --- /dev/null +++ b/scsh/ccp.scm @@ -0,0 +1,576 @@ +;;; Char->char partial maps -*- Scheme -*- +;;; Copyright (C) 1998 by Olin Shivers. + +;;; CCPs are an efficient data structure for doing simple string transforms, +;;; similar to the kinds of things you would do with the tr(1) program. +;;; +;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit +;;; character types would need a more sophisticated data structure, tuned +;;; for sparseness. I would suggest something like this: +;;; (define-record ccp +;;; domain ; The domain char-set +;;; map ; Sorted vector of (char . string) pairs +;;; ; specifying the map. +;;; id?) ; If true, mappings not specified by MAP are +;;; ; identity mapping. If false, MAP must +;;; ; specify a mapping for every char in DOMAIN. +;;; +;;; A (char . string) elements in MAP specifies a mapping for the contiguous +;;; sequence of L chars beginning with CHAR (in the sequence of the underlying +;;; char type representation), where L is the length of STRING. These MAP elements +;;; are sorted by CHAR, so that binary search can be used to get from an input +;;; character C to the right MAP element quickly. +;;; +;;; This representation should be reasonably compact for standard mappings on, +;;; say, a Unicode CCP. An implementation might wish to have a cache field +;;; in the record for storing the full 8kb bitset when performing ccp-map +;;; operations. Or, an implementation might want to store the Latin-1 subset +;;; of the map in a dense format, and keep the remainder in a sparse format. + +(define num-chars (char-set-size char-set:full)) ; AKA 256. + +(define-record ccp + domain ; The domain char-set + dshared? ; Is the domain value shared or linear? + map ; 256-elt string + mshared?) ; Is the map string shared or linear? + + +;;; Accessors and setters that manage the linear bookkeeping +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (ccp-domain ccp) + (set-ccp:dshared? ccp #t) + (ccp:domain ccp)) + +;;; CCP is a linear ccp. PROC is a domain->domain function; it must be +;;; linear in its parameter and result. +;;; +;;; Updates the domain of the CCP with PROC, returns the resulting +;;; CCP; reuses the old one to construct the new one. + +(define (restrict-linear-ccp-domain ccp proc) + (let ((new-d (proc (if (ccp:dshared? ccp) + (begin (set-ccp:dshared? ccp #f) + (char-set-copy (ccp:domain ccp))) + (ccp:domain ccp))))) + (set-ccp:domain ccp new-d) + ccp)) + +;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function. +;;; It is passed a linear domain and cmap string. It may side-effect +;;; the cmap string, and returns the resulting updated domain. +;;; We return the resulting CCP, reusing the parameter to construct it. + +(define (linear-update-ccp ccp proc) + (let* ((cmap (if (ccp:mshared? ccp) + (begin (set-ccp:mshared? ccp #f) + (string-copy (ccp:map ccp))) + (ccp:map ccp))) + + (new-d (proc (if (ccp:dshared? ccp) + (begin (set-ccp:dshared? ccp #f) + (char-set-copy (ccp:domain ccp))) + (ccp:domain ccp)) + cmap))) + (set-ccp:domain ccp new-d) + ccp)) + + + +;;; Return CCP's map field, and mark it as shared. CCP functions that +;;; restrict a ccp's domain share map strings, so they use this guy. +(define (ccp:map/shared ccp) + (set-ccp:mshared? ccp #t) + (ccp:map ccp)) + +(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f + (string-copy (ccp:map ccp)) #f)) + +;;; N-ary equality relation for partial maps + +(define (ccp= ccp1 . rest) + (let ((domain (ccp:domain ccp1)) + (cmap (ccp:map ccp1))) + (every (lambda (ccp2) + (and (char-set= domain (ccp:domain ccp2)) + (let ((cmap2 (ccp:map ccp2))) + (char-set-every? (lambda (c) + (let ((i (char->ascii c))) + (char=? (string-ref cmap i) + (string-ref cmap2 i)))) + domain)))) + rest))) + + +;;; N-ary subset relation for partial maps + +(define (ccp<= ccp1 . rest) + (let lp ((domain1 (ccp:domain ccp1)) + (cmap1 (ccp:map ccp1)) + (rest rest)) + (or (not (pair? rest)) + (let* ((ccp2 (car rest)) + (domain2 (ccp:domain ccp2)) + (cmap2 (ccp:map ccp2)) + (rest (cdr rest))) + (and (char-set<= domain1 domain2) + (let ((cmap2 (ccp:map ccp2))) + (char-set-every? (lambda (c) + (let ((i (char->ascii c))) + (char=? (string-ref cmap1 i) + (string-ref cmap2 i)))) + domain1)) + (lp domain2 cmap2 rest)))))) + + +;;; CCP iterators +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (ccp-fold kons knil ccp) + (let ((cmap (ccp:map ccp))) + (char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v)) + knil + (ccp:domain ccp)))) + +(define (ccp-for-each proc ccp) + (let ((cmap (ccp:map ccp))) + (char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c)))) + (ccp:domain ccp)))) + +(define (ccp->alist ccp) + (ccp-fold (lambda (from to alist) (cons (cons from to) alist)) + '() + ccp)) + + +;;; CCP-RESTRICT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Restrict a ccp's domain. + +(define (ccp-restrict ccp cset) + (make-ccp (char-set-intersection cset (ccp:domain ccp)) + #f + (ccp:map/shared ccp) + #t)) + +(define (ccp-restrict! ccp cset) + (restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset)))) + + +;;; CCP-ADJOIN ccp from-char1 to-char1 ... +;;; CCP-DELETE ccp char1 ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Add & delete mappings to/from a ccp. + +(define (ccp-delete ccp . chars) + (make-ccp (apply char-set-delete (ccp:domain ccp) chars) + #f + (ccp:map/shared ccp) + #t)) + +(define (ccp-delete! ccp . chars) + (restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars)))) + + +(define (ccp-adjoin ccp . chars) + (let ((cmap (string-copy (ccp:map ccp)))) + (make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars) + #f + cmap + #f))) + +(define (ccp-adjoin! ccp . chars) + (linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars)))) + +(define (install-ccp-adjoin! cmap domain chars) + (let lp ((chars chars) (d domain)) + (if (pair? chars) + (let ((from (car chars)) + (to (cadr chars)) + (chars (cddr chars))) + (string-set! cmap (char->ascii from) to) + (lp chars (char-set-adjoin! d from))) + d))) + + +;;; CCP-EXTEND ccp1 ... +;;; CCP-EXTEND! ccp1 ccp2 ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Extend ccp1 with ccp2, etc. + +(define (ccp-extend . ccps) + (if (pair? ccps) + (let ((ccp0 (car ccps)) + (ccps (cdr ccps))) + (if (pair? ccps) + (let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap. + ;; The FOLD installs each ccp in CCPS into CMAP and produces + ;; the new domain. + (make-ccp (fold (lambda (ccp d) + (install-ccp-extension! cmap d ccp)) + (char-set-copy (ccp:domain ccp0)) + ccps) + #f cmap #f)) + + ccp0)) ; Only 1 parameter + + ccp:0)) ; 0 parameters + +(define (ccp-extend! ccp0 . ccps) + (linear-update-ccp ccp0 + (lambda (domain cmap) + (fold (lambda (ccp d) (install-ccp-extension! cmap d ccp)) + domain + ccps)))) + + +;;; Side-effect CMAP, linear-update and return DOMAIN. +(define (install-ccp-extension! cmap domain ccp) + (let ((cmap1 (ccp:map ccp)) + (domain1 (ccp:domain ccp))) + (char-set-for-each (lambda (c) + (let ((i (char->ascii c))) + (string-set! cmap i (string-ref cmap1 i)))) + domain1) + (char-set-union! domain domain1))) + + +;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; For each character C-IN in the original domain, we push it +;;; through the pipeline of CCPs. If we ever land outside the +;;; domain of a ccp, we punt C-IN. If we push it all the way +;;; through, we add C-IN to our result domain, and add the mapping +;;; into the cmap we are assembling. +;;; +;;; Looping this way avoids building up intermediate temporary +;;; CCPs. If CCP's were small bitsets, we might be better off +;;; slicing the double-nested loops the other way around. + +(define (ccp-compose . ccps) + (cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1 + ((not (pair? (cdr ccps))) (car ccps)) ; 1 arg + (else + (let* ((v (list->vector ccps)) + (vlen-2 (- (vector-length v) 2)) + (cmap (make-string num-chars)) + (d1 (ccp:domain (vector-ref v (+ vlen-2 1)))) + (d (char-set-fold (lambda (c-in d) + (let lp ((c c-in) (i vlen-2)) + (if (>= i 0) + (let ((ccp (vector-ref v i))) + (if (char-set-contains? (ccp:domain ccp) c) + (lp (string-ref (ccp:map ccp) + (char->ascii c)) + (- i 1)) + + ;; Lose: remove c-in from d. + (char-set-delete! d c-in))) + + ;; Win: C-IN -> C + (begin (string-set! cmap + (char->ascii c-in) + c) + d)))) + (char-set-copy d1) + d1))) + (make-ccp d #f cmap #f))))) + + + +;;; ALIST->CPP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (alist->ccp cc-alist . maybe-base-ccp) + (let ((base (:optional maybe-base-ccp ccp:0))) + (if (pair? cc-alist) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-alist! cmap + (char-set-copy (ccp:domain base)) + cc-alist) + #f cmap #f)) + base))) + +(define (alist->ccp! alist base) + (linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist)))) + +;;; Side-effect CMAP, linear-update and return DOMAIN. +(define (install-ccp-alist! cmap domain alist) + (fold (lambda (from/to d) (let ((from (car from/to)) + (to (cdr from/to))) + (string-set! cmap (char->ascii from) to) + (char-set-adjoin! domain from))) + domain + alist)) + + +;;; PROC->CCP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (proc->ccp proc [domain base-ccp]) + +(define (proc->ccp proc . args) + (let-optionals args ((proc-domain char-set:full) + (base ccp:0)) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base)) + proc proc-domain) + #f cmap #f)))) + +(define (proc->ccp! proc proc-domain base) + (linear-update-ccp base + (lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain)))) + +(define (install-ccp-proc! cmap domain proc proc-domain) + (char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c))) + proc-domain) + (char-set-union! domain proc-domain)) + + +;;; CONSTANT-CCP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (constant-ccp char [domain base-ccp]) +;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR. +;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0. + +(define (constant-ccp char . args) + (let-optionals args ((char-domain char-set:full) (base ccp:0)) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base)) + char char-domain) + #f cmap #f)))) + +(define (constant-ccp! char char-domain base) + (linear-update-ccp base + (lambda (d cmap) (install-constant-ccp! cmap d char char-domain)))) + +;;; Install the constant mapping into CMAP0 by side-effect, +;;; linear-update & return DOMAIN0 with the constant-mapping's domain. +(define (install-constant-ccp! cmap0 domain0 char char-domain) + (char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char)) + char-domain) + (char-set-union! domain0 char-domain)) + + +;;; CCP/MAPPINGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp +;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp +;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp +;;; Each FROM element is either a string or a (lo-char . hi-char) range. +;;; Each TO element is either a string or a lo-char. Strings are replicated +;;; to match the length of the corresponding FROM element. +;;; CCP/MAPPINGS's base CCP is CCP:0 +;;; +;;; Tedious code. + +;;; Internal utility. +;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect. +;;; Return the new domain. + +(define (install-ccp-mapping-pair! cmap domain from to) + ;; Tedium -- four possibilities here: + ;; str->str, str->lo-char, + ;; range->str, range->lo-char. + (if (string? from) + (if (string? to) + ;; "abc" -> "ABC" + (let ((len1 (string-length from)) + (len2 (string-length to))) + (let lp2 ((i (- len1 1)) + (j (modulo (- len2 1) len1)) + (d domain)) + (if (>= i 0) + (let ((c (string-ref from i))) + (string-set! cmap + (char->ascii c) + (string-ref to i)) + (lp2 (- i 1) + (- (if (> j 0) j len2) 1) + (char-set-adjoin! d c))) + d))) + + ;; "abc" -> #\A + (let lp2 ((i (- (string-length from) 1)) + (j (char->ascii to)) + (d domain)) + (if (>= i 0) + (let ((c (string-ref from i))) + (string-set! cmap + (char->ascii c) + (ascii->char j)) + (lp2 (- i 1) + (- j 1) + (char-set-adjoin! d c))) + d))) + + (let ((from-start (char->ascii (car from))) + (from-end (char->ascii (cdr from)))) + (if (string? to) + (let ((len2-1 (- (string-length to) 1))) + ;; (#\a . #\c) -> "ABC" + (let lp2 ((i from-start) (j 0) (d domain)) + (if (<= i from-end) + (let ((c (string-ref to j))) + (string-set! cmap i c) + (lp2 (+ i 1) + (if (= j len2-1) 0 (+ j 1)) + (char-set-adjoin! d c))) + d))) + + ;; (#\a . #\c) -> #\A + (do ((i from-start (+ i 1)) + (j (char->ascii to) (+ j 1)) + (d domain (begin (string-set! cmap i (ascii->char j)) + (char-set-adjoin d (ascii->char i))))) + ((> i from-end) d)))))) + +;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN. +(define (install-mapping-pairs cmap domain args) + (let lp ((domain domain) (args args)) + (if (pair? args) + (lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args)) + (cddr args)) + domain))) + +(define (ccp/mappings . args) + (let ((cmap (make-string num-chars))) + (make-ccp (install-mapping-pairs (make-string num-chars) + (char-set-copy char-set:empty) + args) + #f cmap #f))) + +(define (extend-ccp/mappings base . args) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args) + #f cmap #f))) + +(define (extend-ccp/mappings! base . args) + (linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args)))) + + +;;; CONSTRUCT-CCP! ccp elt ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The kitchen-sink constructor; static typing be damned. +;;; ELTS are interpreted as follows: +;;; (lo-char . hi-char) to-string|lo-char ; ccp/range +;;; from-string to-string|lo-char ; ccp/range +;;; ccp ; ccp-extend +;;; alist ; alist->ccp +;;; domain char ; ccp-constant +;;; domain proc ; proc->ccp + +(define (construct-ccp! ccp . elts) + (linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts)))) + +(define (construct-ccp base . elts) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts) + #f cmap #f))) + +;;; Install the mappings into CMAP by side-effect, +;;; linear-update & return DOMAIN with the final domain. + +(define (install-ccp-construct! cmap domain elts) + (let lp ((d domain) (elts elts)) + ;(format #t "d=~s elts=~s\n" d elts) + (if (not (pair? elts)) d + (let ((elt (car elts)) + (elts (cdr elts))) + (cond ((pair? elt) + (cond ((pair? (car elt)) ; ELT is an alist. + (lp (install-ccp-alist! cmap d elt) elts)) + ((char? (car elt)) ; ELT is (lo-char . hi-char) range. + (lp (install-ccp-mapping-pair! cmap d elt (car elts)) + (cdr elts))) + (else (error "Illegal elt to construct-ccp" elt)))) + + ((string? elt) + (lp (install-ccp-mapping-pair! cmap d elt (car elts)) + (cdr elts))) + + ((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts)) + + ((char-set? elt) + (let ((elt2 (car elts)) + (elts (cdr elts))) + (lp (cond ((char? elt2) + (install-constant-ccp! cmap d elt2 elt)) + ((procedure? elt2) + (install-ccp-proc! cmap d elt2 elt)) + (else (error "Illegal elt-pair to construct-ccp" + elt elt2))) + elts))) + + (else (error "Illegal elt to construct-ccp" elt))))))) + + +;;; CCP unfold + +(define (ccp-unfold p f g seed) + (let lp ((seed seed) (ccp (ccp-copy ccp:0))) + (if (p seed) ccp + (lp (g seed) + (receive (from to) (f seed) + (lp (g seed) (ccp-adjoin! ccp from to))))))) + + + +;;; Using CCPs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; TR ccp string [start end] -> string +;;; CCP-MAP ccp string [start end] -> string +;;; CCP-MAP! ccp string [start end] -> undefined +;;; CCP-APP ccp char -> char or false + +;;; If a char in S is not in CCP's domain, it is dropped from the result. +;;; You can use this to map and delete chars from a string. + +(define (tr ccp s . maybe-start+end) + (let-optionals maybe-start+end ((start 0) (end (string-length s))) + ;; Count up the chars in S that are in the domain, + ;; and allocate the answer string ANS: + (let* ((len (- end start)) + (domain (ccp:domain ccp)) + (ans-len (string-fold (lambda (c numchars) + (if (char-set-contains? domain c) + (+ numchars 1) + numchars)) + 0 s start end)) + (ans (make-string ans-len))) + + ;; Apply the map, installing the resulting chars into ANS: + (string-fold (lambda (c i) (cond ((ccp-app ccp c) => + (lambda (c) + (string-set! ans i c) + (+ i 1))) + (else i))) ; Not in domain -- drop it. + 0 s start end) + ans))) + +(define (ccp-map ccp s . maybe-start+end) + (apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end)) + +(define (ccp-map! ccp s . maybe-start+end) + (apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end)) + +(define (ccp-app ccp char) + (and (char-set-contains? (ccp:domain ccp) char) + (string-ref (ccp:map ccp) (char->ascii char)))) + + +;;; Primitive CCPs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define id-cmap + (let ((m (make-string num-chars))) + (do ((i (- num-chars 1) (- i 1))) + ((< i 0)) + (string-set! m i (ascii->char i))) + m)) + +(define ccp:0 (make-ccp char-set:empty #t id-cmap #t)) +(define ccp:1 (make-ccp char-set:full #t id-cmap #t)) + +(define ccp:upcase (proc->ccp char-upcase char-set:full)) +(define ccp:downcase (proc->ccp char-downcase char-set:full)) diff --git a/scsh/char-set.scm b/scsh/char-set.scm index d58a724..72aedbe 100644 --- a/scsh/char-set.scm +++ b/scsh/char-set.scm @@ -19,12 +19,11 @@ ;;; copy of the record fields. ;;; New dfns: -;;; (char-set= cs1 cs2) -;;; (char-set<= cs1 cs2) -;;; (reduce-char-set kons knil cs) -;;; (set-char-set! cs char in?) +;;; (char-set= cs1 cs2 ...) +;;; (char-set<= cs1 cs2 ...) +;;; (char-set-fold kons knil cs) ;;; (char-set-for-each f cs) -;;; (copy-char-set cs) +;;; (char-set-copy cs) ;;; (char-set-size cs) ;;; char-set:printing (char-printing? c) ;;; char-set:blank (char-blank? c) @@ -33,13 +32,21 @@ ;;; char-set:ascii (char-ascii? c) ;;; char-set:empty ;;; char-set:full +;;; char-set-every? pred cs +;;; char-set-any pred cs +;;; char-set-adjoin cset char -> cset +;;; char-set-adjoin! cset char -> cset +;;; char-set-delete cset char -> cset +;;; char-set-delete! cset char -> cset (define char:newline (ascii->char 13)) -(define char:tab (ascii->char 9)) -(define char:vtab (ascii->char 11)) -(define char:page (ascii->char 12)) -(define char:return (ascii->char 10)) -(define char:space (ascii->char 32)) +(define char:tab (ascii->char 9)) +(define char:vtab (ascii->char 11)) +(define char:page (ascii->char 12)) +(define char:return (ascii->char 10)) +(define char:space (ascii->char 32)) + +(define (string-copy s) (substring s 0 (string-length s))) (define (string-fill-range! str lower upper ch) (do ((index lower (+ index 1))) @@ -61,24 +68,26 @@ char-set? (s char-set:s)) -(define (copy-char-set cs) (make-char-set (string-copy (char-set:s cs)))) +(define (char-set-copy cs) (make-char-set (string-copy (char-set:s cs)))) -(define (char-set= cs1 cs2) - (let ((s1 (char-set:s cs1)) - (s2 (char-set:s cs2))) - (let lp ((i 255)) - (or (< i 0) - (and (char=? (string-ref s1 i) (string-ref s2 i)) - (lp (- i 1))))))) +;;; The = and <= code is ugly because it's n-ary. -(define (char-set<= cs1 cs2) - (let ((s1 (char-set:s cs1)) - (s2 (char-set:s cs2))) - (let lp ((i 255)) - (or (< i 0) - (and (<= (char->ascii (string-ref s1 i)) - (char->ascii (string-ref s2 i))) - (lp (- i 1))))))) +(define (char-set= cs1 . rest) + (let ((s1 (char-set:s cs1))) + (every (lambda (cs) (string=? s1 (char-set:s cs))) + rest))) + +(define (char-set<= cs1 . rest) + (let lp ((s1 (char-set:s cs1)) + (rest rest)) + (or (not (pair? rest)) + (let ((s2 (char-set:s (car rest))) + (rest (cdr rest))) + (let lp2 ((i 255)) + (if (< i 0) (lp s2 rest) + (and (<= (char->ascii (string-ref s1 i)) + (char->ascii (string-ref s2 i))) + (lp2 (- i 1))))))))) (define (char-set-size cs) @@ -88,20 +97,34 @@ (lp (- i 1) (if (= 0 (char->ascii (string-ref s i))) size (+ size 1))))))) -(define (set-char-set! cs char in?) - (string-set! (char-set:s cs) - (char->ascii char) - (ascii->char (if in? 1 0)))) +(define (set-char-set cs in? . chars) + (let ((s (string-copy (char-set:s cs))) + (val (if in? (ascii->char 1) (ascii->char 0)))) + (for-each (lambda (c) (string-set! s (char->ascii c) val)) + chars) + (make-char-set s))) -(define (char-set-for-each f cs) +(define (set-char-set! cs in? . chars) + (let ((s (char-set:s cs)) + (val (if in? (ascii->char 1) (ascii->char 0)))) + (for-each (lambda (c) (string-set! s (char->ascii c) val)) + chars)) + cs) + +(define (char-set-adjoin cs . chars) (apply set-char-set cs #t chars)) +(define (char-set-adjoin! cs . chars) (apply set-char-set! cs #t chars)) +(define (char-set-delete cs . chars) (apply set-char-set cs #f chars)) +(define (char-set-delete! cs . chars) (apply set-char-set! cs #f chars)) + +(define (char-set-for-each proc cs) (let ((s (char-set:s cs))) (let lp ((i 255)) (cond ((>= i 0) (if (not (= 0 (char->ascii (string-ref s i)))) - (f (ascii->char i))) + (proc (ascii->char i))) (lp (- i 1))))))) -(define (reduce-char-set kons knil cs) +(define (char-set-fold kons knil cs) (let ((s (char-set:s cs))) (let lp ((i 255) (ans knil)) (if (< i 0) ans @@ -110,6 +133,28 @@ ans (kons (ascii->char i) ans))))))) +(define reduce-char-set (deprecated-proc char-set-fold 'char-set-fold + "Use char-set-fold instead.")) + +(define (char-set-every? pred cs) + (let ((s (char-set:s cs))) + (let lp ((i 255)) + (or (< i 0) + (if (= 0 (char->ascii (string-ref s i))) + (lp (- i 1)) + (and (pred (ascii->char i)) + (lp (- i 1)))))))) + +(define (char-set-any pred cs) + (let ((s (char-set:s cs))) + (let lp ((i 255)) + (and (>= i 0) + (if (= 0 (char->ascii (string-ref s i))) + (lp (- i 1)) + (or (pred (ascii->char i)) + (lp (- i 1)))))))) + + (define (char-set . chars) (chars->char-set chars)) @@ -181,58 +226,88 @@ (define (char-set-member? . args) (error "CHAR-SET-MEMBER? is no longer provided. Use CHAR-SET-CONTAINS? instead.")) + +;;; Set algebra +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (define (char-set-invert cs) (predicate->char-set (lambda (char) (not (char-set-contains? cs char))))) -;;; The union, intersection, and difference code is ugly, -;;; because the ops are n-ary. +(define (char-set-union . csets) + (if (pair? csets) + (apply char-set-union! (char-set-copy (car csets)) (cdr csets)) + char-set:empty)) + +(define (char-set-intersection . csets) + (if (pair? csets) + (apply char-set-intersection! (char-set-copy (car csets)) (cdr csets)) + char-set:full)) + +(define (char-set-difference cs1 . csets) + (if (pair? csets) + (apply char-set-difference! (char-set-copy cs1) csets) + cs1)) + + +;;; Linear set-algebraic ops +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These guys are allowed, but not required, to side-effect their first +;;; argument when computing their result. In other words, you must use them +;;; as if they were completely functional, just like their non-! counterparts, +;;; and you must additionally ensure that their first arguments are "dead" +;;; at the point of call. In return, we promise a more efficient result, plus +;;; allowing you to always assume char-sets are unchangeable values. ;;; Apply P to each index and it's char in S: (P I C). ;;; Used by the intersection & difference. -(define (string-iter s p) +(define (string-iter p s) (let lp ((i (- (string-length s) 1))) (cond ((>= i 0) (p i (string-ref s i)) (lp (- i 1)))))) -(define (char-set-union . csets) - (if (pair? csets) - (let ((cset (copy-char-set (car csets)))) - (for-each (lambda (cs) - (char-set-for-each (lambda (c) (set-char-set! cset c #t)) - cs)) - (cdr csets)) - cset) - char-set:empty)) +(define (char-set-invert! cset) + (let ((s (char-set:s cset))) + (string-iter (lambda (i c) + (string-set! s i (ascii->char (- 1 (char->ascii c))))) + s)) + cset) + +(define (char-set-union! cset1 . csets) + (let ((s (char-set:s cset1))) + (for-each (lambda (cset) + (char-set-for-each (lambda (c) + (string-set! s (char->ascii c) + (ascii->char 1))) + cset)) + csets)) + cset1) + +(define (char-set-intersection! cset1 . csets) + (let ((s (char-set:s cset1))) + (for-each (lambda (cset) + (string-iter (lambda (i c) + (if (zero? (char->ascii c)) + (string-set! s i (ascii->char 0)))) + (char-set:s cset))) + csets)) + cset1) + +(define (char-set-difference! cset1 . csets) + (let ((s (char-set:s cset1))) + (for-each (lambda (cset) + (char-set-for-each (lambda (c) + (string-set! s (char->ascii c) + (ascii->char 0))) + cset)) + csets)) + cset1) -(define (char-set-intersection . csets) - (if (pair? csets) - (let* ((cset (copy-char-set (car csets))) - (s (char-set:s cset))) - (for-each (lambda (cs) - (string-iter (char-set:s cs) - (lambda (i c) - (if (= 0 (char->ascii c)) - (string-set! s i (ascii->char 0)))))) - (cdr csets)) - cset) - char-set:full)) -(define (char-set-difference cs1 . csets) - (if (pair? csets) - (let* ((cset (copy-char-set cs1)) - (s (char-set:s cset))) - (for-each (lambda (cs) - (string-iter (char-set:s cs) - (lambda (i c) - (if (= 1 (char->ascii c)) - (string-set! s i (ascii->char 0)))))) - csets) - cset) - cs1)) ;;;; System Character Sets +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define char-set:lower-case (ascii-range->char-set #x61 #x7B)) (define char-set:upper-case (ascii-range->char-set #x41 #x5B)) diff --git a/scsh/filemtch.scm b/scsh/filemtch.scm index 5cb8a72..d338502 100644 --- a/scsh/filemtch.scm +++ b/scsh/filemtch.scm @@ -11,17 +11,28 @@ ;;; root Search starts from here. Usefully "." (cwd) ;;; dots? => if true, dot files will be matched. ;;; if false, dot files will not be matched. -;;; pattern-list := a list of regular expressions or predicates -;;; Each member of the list corresponds -;;; to one or more levels in a directory. -;;; (A member with embedded "/" characters -;;; corresponds to multiple levels.) -;;; Example: ("foo" "bar" "\\.c$") +;;; pattern-list := a list of +;;; - strings +;;; These are split at /'s and then +;;; treated as Posix regexp strings. +;;; - regexps (typically made with RX macro) +;;; - predicates +;;; Each member of the list corresponds to one +;;; or more levels in a directory. (A string +;;; with embedded "/" characters corresponds +;;; to multiple levels.) +;;; Example: +;;; (file-match "." #f "foo" "bar" "\\.c$") ;;; means match files that end in ".c" ;;; if they reside in a directory with ;;; a name that contains "bar", which ;;; itself must reside in a directory ;;; with a name that contains "foo". +;;; Here are two more equivalent specs +;;; for the example above: +;;; (file-match "." #f "foo/bar/\\.c$") +;;; (file-match "." #f (rx "foo") (rx "bar") +;;; (rx ".c" eos)) ;;; If a member in the list is a predicate, ;;; the predicate must be a procedure of ;;; one argument. This procedure is applied @@ -40,16 +51,19 @@ ;;; when FILE-DIRECTORY? is applied to the bogus symlink. (define (file-match root dot-files? . patterns) - (let ((patterns (apply append (map split-pat patterns)))) + (let ((patterns (apply append + (map (lambda (p) (if (string? p) + (map posix-string->regexp (split-pat p)) + p)) + patterns)))) (let recur ((root root) (patterns patterns)) (if (pair? patterns) (let* ((pattern (car patterns)) (patterns (cdr patterns)) (dir (file-name-as-directory root)) - (matcher (cond ((string? pattern) - (let ((re (make-regexp pattern))) - (lambda (f) (regexp-exec re f)))) + (matcher (cond ((regexp? pattern) + (lambda (f) (regexp-search? re f))) ;; This arm makes a file-matcher using ;; predicate PATTERN. If PATTERN signals @@ -87,7 +101,8 @@ (if (procedure? pat) (list pat) (let lp ((i (string-length pat)) (ans '())) - (cond ((rindex pat #\/ i) => - (lambda (j) (lp (cons (substring pat (+ j 1) i) ans) j))) + (cond ((string-index-right pat #\/ i) => + (lambda (j) (lp j (cons (substring pat (+ j 1) i) ans)))) (else (cons (substring pat 0 i) ans)))))) + diff --git a/scsh/fname.scm b/scsh/fname.scm index 2451838..77d4f3b 100644 --- a/scsh/fname.scm +++ b/scsh/fname.scm @@ -68,7 +68,7 @@ ;;; Returns FNAME's directory component in *directory form.* (define (file-name-directory fname) - (cond ((rindex fname #\/) => + (cond ((string-index-right fname #\/) => (lambda (rslash) (if (last-non-slash fname) (substring fname 0 (+ 1 rslash)) @@ -77,7 +77,7 @@ (define (file-name-nondirectory fname) - (cond ((rindex fname #\/) => + (cond ((string-index-right fname #\/) => (lambda (rslash) (if (last-non-slash fname) (substring fname (+ 1 rslash) (string-length fname)) @@ -90,7 +90,7 @@ (len (string-length fname))) (let split ((start 0)) (cond ((>= start len) '()) - ((index fname #\/ start) => + ((string-index fname #\/ start) => (lambda (slash) (cons (substring fname start slash) (split (+ slash 1))))) @@ -128,7 +128,7 @@ ;;; /usr/shivers/.login are not considered extensions. (define (file-name-extension-index fname) - (let ((dot (rindex fname #\.))) + (let ((dot (string-index-right fname #\.))) (if (and dot (> dot 0) (not (char=? #\/ (string-ref fname (- dot 1))))) @@ -154,7 +154,7 @@ (let* ((user (substring fname 1 end)) (ui (name->user-info user))) (user-info:home-dir ui)))))) - (cond ((index fname #\/ 1) => + (cond ((string-index fname #\/ 1) => (lambda (slash) (string-append (tilde->homedir slash) "/" (substring fname (+ slash 1) len)))) @@ -256,7 +256,7 @@ (let ((len (string-length s))) (cond ((zero? len) (apply string-append (reverse! ans))) - ((index s #\$) => + ((string-index s #\$) => (lambda (i) (let ((ans (cons (substring s 0 i) ans)) (s (substring s (+ i 1) len)) @@ -264,13 +264,13 @@ (if (zero? len) (lp ans "") (let ((next-char (string-ref s 0))) (cond ((char=? #\{ next-char) - (cond ((index s #\}) => + (cond ((string-index s #\}) => (lambda (i) (lp (cons (getenv (substring s 1 i)) ans) (substring s (+ i 1) len)))) (else (error "Unbalanced ${ delimiter in string" s)))) (else - (let ((i (or (index s #\/) len))) + (let ((i (or (string-index s #\/) len))) (lp (cons (getenv (substring s 0 i)) ans) (substring s i len)))))))))) (else (lp (cons s ans) "")))))) diff --git a/scsh/fr.scm b/scsh/fr.scm index 077005a..103f81a 100644 --- a/scsh/fr.scm +++ b/scsh/fr.scm @@ -54,43 +54,12 @@ ;;; This has the effect you want with field parsing. For example, if you split ;;; a string with the empty pattern, you will explode the string into its ;;; individual characters: -;;; ((suffix-splitter "") "foo") -> #("" "f" "o" "o") +;;; ((suffix-splitter (rx "")) "foo") -> #("" "f" "o" "o") ;;; However, even though this boundary case is handled correctly, we don't ;;; recommend using it. Say what you mean -- just use a field splitter: -;;; ((field-splitter ".") "foo") -> #("f" "o" "o") +;;; ((field-splitter (rx any)) "foo") -> #("f" "o" "o") - -;;; (join-strings string-list [delimiter grammar]) => string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Paste strings together using the delimiter string. -;;; -;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" -;;; -;;; DELIMITER defaults to a single space " " -;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix. - -;;; (join-strings strings [delim grammar]) - -(define (join-strings strings . args) - (if (pair? strings) - (let-optionals args ((delim " ") (grammar 'infix)) - (check-arg string? delim join-strings) - (let ((strings (reverse strings))) - (let lp ((strings (cdr strings)) - (ans (case grammar - ((infix) (list (car strings))) - ((suffix) (list (car strings) delim)) - (else (error "Illegal grammar" grammar))))) - (if (pair? strings) - (lp (cdr strings) - (cons (car strings) (cons delim ans))) - - ; All done - (apply string-append ans))))) - - "")) ; Special-cased for infix grammar. - ;;; FIELD PARSERS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This section defines routines to split a string into fields. @@ -100,15 +69,11 @@ (define (->delim-matcher x) (if (procedure? x) x ; matcher proc - (let ((re (cond ((regexp? x) x) ; regexp pattern - ((string? x) (make-regexp x)) ; regexp string - (else (error "Illegal pattern/parser" x))))) - - ;; The matcher proc. - (lambda (s i) - (cond ((regexp-exec re s i) => - (lambda (m) (values (match:start m 0) (match:end m 0)))) - (else (values #f #f))))))) + ;; The matcher proc. + (lambda (s i) + (cond ((regexp-search x s i) => + (lambda (m) (values (match:start m 0) (match:end m 0)))) + (else (values #f #f)))))) ;;; (infix-splitter [re num-fields handle-delim]) -> parser ;;; (suffix-splitter [re num-fields handle-delim]) -> parser @@ -154,7 +119,8 @@ match-delim cons-field num-fields nfields-exact?)))))))) -(define default-field-matcher (->delim-matcher "[^ \t\n]+")) +;;; Default field spec is runs of non-whitespace chars. +(define default-field-matcher (->delim-matcher (rx (+ (~ white))))) ;;; (field-splitter [field-spec num-fields]) @@ -313,8 +279,8 @@ ;;; Now, build the exported procedures: {infix,suffix,sloppy-suffix}-splitter. -(define default-suffix-matcher (->delim-matcher "[ \t\n]+|$")) -(define default-infix-matcher (->delim-matcher "[ \t\n]+")) +(define default-suffix-matcher (->delim-matcher (rx (| (+ white) eos)))) +(define default-infix-matcher (->delim-matcher (rx (+ white)))) (define infix-splitter (make-field-parser-generator default-infix-matcher infix-field-loop)) @@ -408,21 +374,17 @@ ;;; Repeatedly do (APPLY PROC M STATE) to generate new state values, ;;; where M is a regexp match structure made from matching against STRING. -;(define (regexp-reduce string start regexp proc . state) -; (let ((end (string-length string)) -; (regexp (if (string? regexp) -; (make-regexp regexp) -; regexp))) -; +;(define (regexp-fold string start regexp proc . state) +; (let ((end (string-length string))) ; (let lp ((i start) (state state) (last-null? #f)) ; (let ((j (if last-null? (+ i 1) i))) -; (cond ((and (<= j end) (regexp-exec regexp string j)) => +; (cond ((and (<= j end) (regexp-search regexp string j)) => ; (lambda (m) ; (receive state (apply proc m state) ; (lp (match:end m) state (= (match:start m) (match:end m)))))) ; (else (apply values state))))))) ; ;(define (all-regexp-matches regexp string) -; (reverse (regexp-reduce string 0 regexp -; (lambda (m ans) (cons (match:substring m 0) ans)) -; '()))) +; (reverse (regexp-fold string 0 regexp +; (lambda (m ans) (cons (match:substring m 0) ans)) +; '()))) diff --git a/scsh/glob.scm b/scsh/glob.scm index 83de27c..271b59c 100644 --- a/scsh/glob.scm +++ b/scsh/glob.scm @@ -76,8 +76,8 @@ (else (let* ((dots? (char=? #\. (string-ref pat 0))) ; Match dot files? (candidates (maybe-directory-files fname dots?)) - (re (make-regexp (glob->regexp pat)))) - (values (filter (lambda (f) (regexp-exec re f)) candidates) + (re (glob->regexp pat))) + (values (filter (lambda (f) (regexp-search? re f)) candidates) #t))))) ; These guys exist for sure. ;;; The initial special-case above isn't really for the fast-path; it's @@ -87,43 +87,96 @@ ;;; Translate a brace-free glob pattern to a regular expression. -(define (glob->regexp pat) +(define glob->regexp + (let ((dot-star (re-repeat 0 #f re-any))) ; ".*" or (* any) + (lambda (pat) + (let ((pat-len (string-length pat)) + + (str-cons (lambda (chars res) ; Reverse CHARS and cons the + (if (pair? chars) ; result string-re onto RES. + (cons (re-string (list->string (reverse chars))) + res) + res)))) + + ;; We accumulate chars into CHARS, and coalesce into a single string + ;; with STR-CONS when we run across a non-char. + (let lp ((chars '()) + (res (list re-bos)) + (i 0)) + (if (= i pat-len) + (re-seq (reverse (str-cons chars res))) + + (let ((c (string-ref pat i)) + (i (+ i 1))) + (case c + ((#\\) (if (< i pat-len) + (lp (cons (string-ref pat i) chars) + res (+ i 1)) + (error "Ill-formed glob pattern -- ends in backslash" pat))) + + ((#\*) (lp '() + (cons dot-star (str-cons chars res)) + i)) + ((#\?) (lp '() + (cons re-any (str-cons chars res)) + i)) + + ((#\[) (receive (cset i) (parse-glob-bracket pat i) + (lp '() + (cons (re-char-set cset) + (str-cons chars res)) + i))) + + (else (lp (cons c chars) res i)))))))))) + + +;;; A glob bracket expression is [...] or [^...]. +;;; The body is a sequence of and - ranges. +;;; A is any character except right-bracket, carat, hypen or backslash, +;;; or a backslash followed by any character at all. + +(define (parse-glob-bracket pat i) (let ((pat-len (string-length pat))) - (let lp ((result '(#\^)) - (i 0) - (state 'normal)) - (if (= i pat-len) + (receive (negate? i) (if (and (< i pat-len) (char=? #\^ (string-ref pat i))) + (values #t (+ i 1)) + (values #f i)) - (if (eq? state 'normal) - (list->string (reverse (cons #\$ result))) - (error "Illegal glob pattern" pat)) + (let lp ((elts '()) (i i)) + (if (>= i pat-len) + (error "Ill-formed glob pattern -- no terminating close-bracket" pat) + (let ((c (string-ref pat i)) + (i (+ i 1))) + (case c + ((#\]) + (let ((cset (fold (lambda (elt cset) + (char-set-union + cset + (if (char? elt) + (char-set elt) + (ascii-range->char-set (char->ascii (car elt)) + (+ 1 (char->ascii (cdr elt))))))) + char-set:empty + elts))) + (values (re-char-set (if negate? + (char-set-invert cset) + cset)) + i))) - (let ((c (string-ref pat i)) - (i (+ i 1))) - (case state - ((char-set) - (lp (cons c result) - i - (if (char=? c #\]) 'normal 'char-set))) + ((#\\) + (if (>= i pat-len) + (error "Ill-formed glob pattern -- ends in backslash" pat) + (lp (cons (string-ref pat i) elts) (+ i 1)))) - ((escape) - (lp (case c - ((#\$ #\^ #\. #\+ #\? #\* #\| #\( #\) #\[) - (cons c (cons #\\ result))) - (else (cons c result))) - i - 'normal)) + ((#\-) + (cond ((>= i pat-len) + (error "Ill-formed glob pattern -- unterminated range." pat)) + ((or (null? elts) (not (char? (car elts)))) + (error "Ill-formed glob pattern -- range has no beginning." pat)) + (else (lp (cons (cons (car elts) (string-ref pat i)) elts) + (+ i 1))))) - ;; Normal - (else (case c - ((#\\) (lp result i 'escape)) - ((#\*) (lp (cons #\* (cons #\. result)) i 'normal)) - ((#\?) (lp (cons #\. result) i 'normal)) - ((#\[) (lp (cons c result) i 'char-set)) - ((#\$ #\^ #\. #\+ #\| #\( #\)) - (lp (cons c (cons #\\ result)) i 'normal)) - (else (lp (cons c result) i 'normal)))))))))) + (else (lp (cons c elts) i))))))))) ;;; Is the glob pattern free of *'s, ?'s and [...]'s? diff --git a/scsh/procobj.scm b/scsh/procobj.scm index 64008c1..ff1aab9 100644 --- a/scsh/procobj.scm +++ b/scsh/procobj.scm @@ -296,12 +296,12 @@ (define reaped-procs '()) ; Reaped, but not yet waited. (define (filter-weak-ptr-list pred lis) - (foldr (lambda (wptr result) (let ((val (weak-pointer-ref wptr))) - (if (and val (pred val)) - (cons wptr result) - result))) - '() - list)) + (fold-right (lambda (wptr result) (let ((val (weak-pointer-ref wptr))) + (if (and val (pred val)) + (cons wptr result) + result))) + '() + list)) ;;; Add a newly-reaped proc to the list. (define (add-reaped-proc! pid status) diff --git a/scsh/rdelim.scm b/scsh/rdelim.scm index 1888db1..3583801 100644 --- a/scsh/rdelim.scm +++ b/scsh/rdelim.scm @@ -264,7 +264,7 @@ ;;; (read-paragraph [port handle-delim]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define blank-line-regexp (make-regexp "^[ \t]*\n$")) +(define blank-line-regexp (rx bos (* white) #\newline eos)) (define (read-paragraph . args) (let-optionals args ((port (current-input-port)) @@ -275,14 +275,14 @@ (cond ((eof-object? line) (if (eq? handle-delim 'split) (values line line) line)) - ((regexp-exec blank-line-regexp line) (lp)) + ((regexp-search? blank-line-regexp line) (lp)) ;; Then, read in non-blank lines. (else (let lp ((lines (list line))) (let ((line (read-line port 'concat))) (if (and (string? line) - (not (regexp-exec blank-line-regexp line))) + (not (regexp-search? blank-line-regexp line))) (lp (cons line lines)) diff --git a/scsh/re.c b/scsh/re.c deleted file mode 100644 index 6c26881..0000000 --- a/scsh/re.c +++ /dev/null @@ -1,69 +0,0 @@ -/* This is an Scheme48/C interface file, -** automatically generated by cig. -*/ - -#include -#include /* For malloc. */ -#include "libcig.h" - -/* Make sure foreign-function stubs interface to the C funs correctly: */ -#include "re1.h" - -scheme_value df_re_byte_len(long nargs, scheme_value *args) -{ - extern char *re_byte_len(const char *, int *); - scheme_value ret1; - char *r1; - int r2; - - cig_check_nargs(2, nargs, "re_byte_len"); - r1 = re_byte_len(cig_string_body(args[1]), &r2); - ret1 = VECTOR_REF(*args,0); - {AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);} - VECTOR_REF(*args,1) = ENTER_FIXNUM(r2); - return ret1; - } - -scheme_value df_re_compile(long nargs, scheme_value *args) -{ - extern char *re_compile(const char *, scheme_value ); - scheme_value ret1; - char *r1; - - cig_check_nargs(3, nargs, "re_compile"); - r1 = re_compile(cig_string_body(args[2]), args[1]); - ret1 = VECTOR_REF(*args,0); - {AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);} - return ret1; - } - -scheme_value df_re_exec(long nargs, scheme_value *args) -{ - extern char *re_exec(scheme_value , const char *, int , scheme_value , scheme_value , int *); - scheme_value ret1; - char *r1; - int r2; - - cig_check_nargs(6, nargs, "re_exec"); - r1 = re_exec(args[5], cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2); - ret1 = VECTOR_REF(*args,0); - {AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);} - VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2); - return ret1; - } - -scheme_value df_re_match(long nargs, scheme_value *args) -{ - extern char *re_match(const char *, const char *, int , scheme_value , scheme_value , int *); - scheme_value ret1; - char *r1; - int r2; - - cig_check_nargs(6, nargs, "re_match"); - r1 = re_match(cig_string_body(args[5]), cig_string_body(args[4]), EXTRACT_FIXNUM(args[3]), args[2], args[1], &r2); - ret1 = VECTOR_REF(*args,0); - {AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);} - VECTOR_REF(*args,1) = ENTER_BOOLEAN(r2); - return ret1; - } - diff --git a/scsh/re.scm b/scsh/re.scm deleted file mode 100644 index 9bbc802..0000000 --- a/scsh/re.scm +++ /dev/null @@ -1,283 +0,0 @@ -;;; Regular expression matching for scsh -;;; Copyright (c) 1994 by Olin Shivers. - -(foreign-source - "/* Make sure foreign-function stubs interface to the C funs correctly: */" - "#include \"re1.h\"" - "" "" - ) - -;;; Match data for regexp matches. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-record regexp-match - string ; The string against which we matched. - start ; 10 elt vec - end) ; 10 elt vec - -(define (match:start match . maybe-index) - (vector-ref (regexp-match:start match) - (:optional maybe-index 0))) - -(define (match:end match . maybe-index) - (vector-ref (regexp-match:end match) - (:optional maybe-index 0))) - -(define (match:substring match . maybe-index) - (let* ((i (:optional maybe-index 0)) - (start (vector-ref (regexp-match:start match) i))) - (and start (substring (regexp-match:string match) - start - (vector-ref (regexp-match:end match) i))))) - -;;; Compiling regexps -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-record %regexp - string ; The string form of the regexp. - bytes ; The compiled representation, stuffed into a Scheme string. - ((disclose self) (list "Regexp" (%regexp:string self)))) - -(define regexp? %regexp?) - - -(define (make-regexp pattern) - (receive (err len) (%regexp-compiled-length pattern) - (if err (error err make-regexp pattern) - (let ((buf (make-string len))) - (%regexp-compile pattern buf) - (make-%regexp pattern buf))))) - -(define-foreign %regexp-compiled-length (re_byte_len (string pattern)) - static-string ; Error msg or #f - integer) ; number of bytes needed to compile REGEXP. - -(define-foreign %regexp-compile (re_compile (string pattern) - (string-desc bytes)) - static-string) ; Error msg or #f - - -(define (->regexp x) - (cond ((string? x) (make-regexp x)) - ((regexp? x) x) - (else (error "Not a regexp or string." x)))) - - -;;; Executing compiled regexps -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (regexp-exec regexp str . maybe-start) - (let ((start (:optional maybe-start 0)) - (start-vec (make-vector 10)) - (end-vec (make-vector 10))) - (receive (err match?) - (%regexp-exec (%regexp:bytes regexp) str start start-vec end-vec) - (if err (error err regexp-exec regexp str start) - (and match? - (make-regexp-match str start-vec end-vec)))))) - -(define-foreign %regexp-exec (re_exec (string-desc compiled-regexp) - (string s) - (integer start) - (vector-desc start-vec) - (vector-desc end-vec)) - static-string ; Error msg or #f - bool) ; Matched? - - -;;; Compile&match regexps in one go -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; I could do this with the separate compile and execute procedures, -;;; but I go straight to C just for fun. - -(define (string-match pattern string . maybe-start) - (let ((start (:optional maybe-start 0)) - (start-vec (make-vector 10)) - (end-vec (make-vector 10))) - (receive (err match?) (if (regexp? pattern) - (%regexp-exec (%regexp:bytes pattern) - string start start-vec end-vec) - (%string-match pattern string start - start-vec end-vec)) - (if err (error err string-match pattern string start) - (and match? (make-regexp-match string start-vec end-vec)))))) - -(define-foreign %string-match (re_match (string pattern) - (string s) - (integer start) - (vector-desc start-vec) - (vector-desc end-vec)) - static-string ; Error string or #f if all is ok. - bool) ; match? - - - -;;; Substitutions -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (regexp-substitute port match . items) - (let* ((str (regexp-match:string match)) - (sv (regexp-match:start match)) - (ev (regexp-match:end match)) - (range (lambda (item) ; Return start & end of - (cond ((integer? item) ; ITEM's range in STR. - (values (vector-ref sv item) - (vector-ref ev item))) - ((eq? 'pre item) (values 0 (vector-ref sv 0))) - ((eq? 'post item) (values (vector-ref ev 0) - (string-length str))) - (else (error "Illegal substitution item." - item - regexp-substitute)))))) - (if port - - ;; Output port case. - (for-each (lambda (item) - (if (string? item) (write-string item port) - (receive (si ei) (range item) - (write-string str port si ei)))) - items) - - ;; Here's the string case. Make two passes -- one to - ;; compute the length of the target string, one to fill it in. - (let* ((len (reduce (lambda (i item) - (+ i (if (string? item) (string-length item) - (receive (si ei) (range item) (- ei si))))) - 0 items)) - (ans (make-string len))) - - (reduce (lambda (index item) - (cond ((string? item) - (string-replace! ans index item) - (+ index (string-length item))) - (else (receive (si ei) (range item) - (substring-replace! ans index str si ei) - (+ index (- ei si)))))) - 0 items) - ans)))) - - - -(define (regexp-substitute/global port re str . items) - (let ((re (->regexp re)) - (str-len (string-length str)) - (range (lambda (start sv ev item) ; Return start & end of - (cond ((integer? item) ; ITEM's range in STR. - (values (vector-ref sv item) - (vector-ref ev item))) - ((eq? 'pre item) (values start (vector-ref sv 0))) - (else (error "Illegal substitution item." - item - regexp-substitute/global))))) - (num-posts (reduce (lambda (count item) - (+ count (if (eq? item 'post) 1 0))) - 0 items))) - - (if (and port (< num-posts 2)) - - ;; Output port case, with zero or one POST items. - (let recur ((start 0)) - (if (<= start str-len) - (let ((match (regexp-exec re str start))) - (if match - (let* ((sv (regexp-match:start match)) - (ev (regexp-match:end match)) - (s (vector-ref sv 0)) - (e (vector-ref ev 0)) - (empty? (= s e))) - (for-each (lambda (item) - (cond ((string? item) (write-string item port)) - - ((procedure? item) (write-string (item match) port)) - - ((eq? 'post0 item) - (if (and empty? (< s str-len)) - (write-char (string-ref str s) port))) - - ((eq? 'post item) - (recur (if empty? (+ 1 e) e))) - - (else (receive (si ei) - (range start sv ev item) - (write-string str port si ei))))) - items)) - - (write-string str port start))))) ; No match. - - ;; Either we're making a string, or >1 POST. - (let* ((pieces (let recur ((start 0)) - (if (> start str-len) '() - (let ((match (regexp-exec re str start)) - (cached-post #f)) - (if match - (let* ((sv (regexp-match:start match)) - (ev (regexp-match:end match)) - (s (vector-ref sv 0)) - (e (vector-ref ev 0)) - (empty? (= s e))) - (reduce (lambda (pieces item) - (cond ((string? item) - (cons item pieces)) - - ((procedure? item) - (cons (item match) pieces)) - - ((eq? 'post0 item) - (if (and empty? (< s str-len)) - (cons (string (string-ref str s)) - pieces) - pieces)) - - ((eq? 'post item) - (if (not cached-post) - (set! cached-post - (recur (if empty? (+ e 1) e)))) - (append cached-post pieces)) - - (else (receive (si ei) - (range start sv ev item) - (cons (substring str si ei) - pieces))))) - '() items)) - - ;; No match. Return str[start,end]. - (list (if (zero? start) str - (substring str start (string-length str))))))))) - - (pieces (reverse pieces))) - (if port (for-each (lambda (p) (write-string p port)) pieces) - (apply string-append pieces)))))) - - - -;;; Miscellaneous -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; Convert a string into a regex pattern that matches that string exactly -- -;;; in other words, quote the special chars with backslashes. - -(define (regexp-quote string) - (let lp ((i (- (string-length string) 1)) - (result '())) - (if (< i 0) (list->string result) - (lp (- i 1) - (let* ((c (string-ref string i)) - (result (cons c result))) - (if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+)) - (cons #\\ result) - result)))))) - - -;;; Count the number of possible sub-matches in a regexp -;;; (i.e., the number of left parens). - -(define (regexp-num-submatches s) - (let* ((len (string-length s)) - (len-1 (- len 1))) - (let lp ((i 0) (nsm 0)) - (if (= i len) nsm - (case (string-ref s i) - ((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm)) - ((#\() (lp (+ i 1) (+ nsm 1))) - (else (lp (+ i 1) nsm))))))) diff --git a/scsh/re1.c b/scsh/re1.c deleted file mode 100644 index 0ac7ce0..0000000 --- a/scsh/re1.c +++ /dev/null @@ -1,194 +0,0 @@ -/* Scheme48 interface to Henry Spencer's regular expression package. -** Copyright (c) 1993, 1994 by Olin Shivers. -*/ - -#include -#include "regexp.h" -#include "cstuff.h" - -/* Make sure our exports match up w/the implementation: */ -#include "re1.h" - -/* Not multi-threaded reentrant. */ -static char *regexp_error; - -/* Stash error msg in global. */ -void regerror(char *msg) {regexp_error = msg;} - -/* -** Return NULL normally, error string on error. -** Stash number of bytes needed for compiled regexp into `*len' -*/ - -char *re_byte_len(const char *re, int *len) -{ - int l; - - regexp_error = 0; - *len = regcomp_len(re); - return regexp_error; - } - -/* -** Return NULL normally, error string on error. -** Compile regexp into string described by `cr'. -*/ - -char *re_compile(const char *re, scheme_value cr) -{ - int len = STRING_LENGTH(cr); - regexp *r = (regexp *) &STRING_REF(cr, 0); - - regexp_error = 0; - regcomp_comp(re, r, len); - return regexp_error; - } - -/* Return NULL normally, error string on error. -** Stash match info in start_vec and end_vec. -** Returns boolean match/no-match in hit. -*/ - -char *re_exec(scheme_value cr, const char *string, int start, - scheme_value start_vec, scheme_value end_vec, int *hit) -{ - regexp *r = (regexp *) &STRING_REF(cr, 0); - - *hit = 0; - - if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */ - return "Illegal start vector"; /* never trigger. */ - if( VECTOR_LENGTH(end_vec) != NSUBEXP ) - return "Illegal end vector"; - - regexp_error = 0; - - if( regexec(r, string+start) ) { - int i; - for(i=0; istartp[i]; - const char *e = r->endp[i]; - VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE; - VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE; - r->startp[i] = 0; /* Why did Sommerfeld */ - r->endp[i] = 0; /* put these here? */ - } - *hit = 1; - } - - return regexp_error; - } - - -char *re_subst(scheme_value cr, const char *match, - const char *src, int start, - scheme_value start_vec, scheme_value end_vec, - scheme_value outbuf, int *len) -{ - int i; - regexp *r = (regexp *) &STRING_REF(cr, 0); - - if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */ - return "Illegal start vector"; /* never trigger. */ - if( VECTOR_LENGTH(end_vec) != NSUBEXP ) - return "Illegal end vector"; - - for (i=0; istartp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0; - r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0; - } - - regexp_error = 0; - regnsub(r, src, &STRING_REF(outbuf, 0), STRING_LENGTH(outbuf)); - *len = strlen(&STRING_REF(outbuf, 0)); - return regexp_error; - } - -char *re_subst_len(scheme_value cr, const char *match, - const char *src, int start, - scheme_value start_vec, scheme_value end_vec, - int *len) -{ - int i; - regexp *r = (regexp *) &STRING_REF(cr, 0); - - if( VECTOR_LENGTH(start_vec) != NSUBEXP ) /* These tests should */ - return "Illegal start vector"; /* never trigger. */ - if( VECTOR_LENGTH(end_vec) != NSUBEXP ) - return "Illegal end vector"; - - for (i=0; istartp[i] = FIXNUMP(se) ? (match + EXTRACT_FIXNUM(se)) : 0; - r->endp[i] = FIXNUMP(ee) ? (match + EXTRACT_FIXNUM(ee)) : 0; - } - - regexp_error = 0; - *len = regsublen(r, src); - return regexp_error; - } - - -/* Return NULL normally, error string on error. -** Stash match info in start_vec and end_vec. -** Returns boolean match/no-match in hit. -*/ - -char *re_match(const char *re, const char *string, int start, - scheme_value start_vec, scheme_value end_vec, int *hit) -{ - regexp *prog; - - regexp_error = 0; - *hit = 0; - prog = regcomp(re); - if( !prog ) return regexp_error; - - if( VECTOR_LENGTH(start_vec) != NSUBEXP ) { /* These two tests */ - Free(prog); - return "Illegal start vector"; - } - - if( VECTOR_LENGTH(end_vec) != NSUBEXP ) { /* should never trigger. */ - Free(prog); - return "Illegal end vector"; - } - - if( regexec(prog, string+start) ) { - int i; - for(i=0; istartp[i]; - const char *e = prog->endp[i]; - VECTOR_REF(start_vec,i) = s ? ENTER_FIXNUM(s - string) : SCHFALSE; - VECTOR_REF(end_vec,i) = e ? ENTER_FIXNUM(e - string) : SCHFALSE; - } - *hit = 1; - } - - Free(prog); - return regexp_error; - } - - -char *filter_stringvec(const char *re, char const **stringvec, int *nummatch) -{ - regexp *prog; - regexp_error = 0; - - if( prog=regcomp(re) ) { - char const **p = stringvec; - char const **q = p; - - while(*p) { - if( regexec(prog, *p) ) *q++ = *p; - p++; - } - Free(prog); - *nummatch = q-stringvec; - } - - return regexp_error; - } diff --git a/scsh/re1.h b/scsh/re1.h deleted file mode 100644 index 5249ba6..0000000 --- a/scsh/re1.h +++ /dev/null @@ -1,11 +0,0 @@ -/* Exports from re1.c */ - -char *re_byte_len(const char *re, int *len); -char *re_compile(const char *re, scheme_value target); - -char *re_exec(scheme_value cr, const char *string, int start, - scheme_value start_vec, scheme_value end_vec, int *hit); - -char *re_match(const char *re, const char *string, int start, - scheme_value start_vec, scheme_value end_vec, - int *hit); diff --git a/scsh/rx/re-low.c b/scsh/rx/re-low.c new file mode 100644 index 0000000..1c58762 --- /dev/null +++ b/scsh/rx/re-low.c @@ -0,0 +1,61 @@ +/* This is an Scheme48/C interface file, +** automatically generated by cig. +*/ + +#include +#include /* For malloc. */ +#include "libcig.h" + +/* Make sure foreign-function stubs interface to the C funs correctly: */ +#include +#include "../regexp/regex.h" +#include "re1.h" + +scheme_value df_compile_re(long nargs, scheme_value *args) +{ + extern int compile_re(scheme_value , int , regex_t* *); + scheme_value ret1; + int r1; + regex_t* r2; + + cig_check_nargs(3, nargs, "compile_re"); + r1 = compile_re(args[2], EXTRACT_BOOLEAN(args[1]), &r2); + ret1 = ENTER_FIXNUM(r1); + AlienVal(VECTOR_REF(*args,0)) = (long) r2; + return ret1; + } + +scheme_value df_re_search(long nargs, scheme_value *args) +{ + extern scheme_value re_search(const regex_t *, scheme_value , int , scheme_value , int , scheme_value , scheme_value ); + scheme_value ret1; + scheme_value r1; + + cig_check_nargs(7, nargs, "re_search"); + r1 = re_search((const regex_t *)AlienVal(args[6]), args[5], EXTRACT_FIXNUM(args[4]), args[3], EXTRACT_FIXNUM(args[2]), args[1], args[0]); + ret1 = r1; + return ret1; + } + +scheme_value df_re_errint2str(long nargs, scheme_value *args) +{ + extern const char *re_errint2str(int , const regex_t *); + scheme_value ret1; + const char *r1; + + cig_check_nargs(3, nargs, "re_errint2str"); + r1 = re_errint2str(EXTRACT_FIXNUM(args[2]), (const regex_t *)AlienVal(args[1])); + ret1 = VECTOR_REF(*args,0); + {AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);} + return ret1; + } + +scheme_value df_free_re(long nargs, scheme_value *args) +{ + extern void free_re(regex_t* ); + + cig_check_nargs(1, nargs, "free_re"); + free_re((regex_t* )AlienVal(args[0])); + return SCHFALSE; + } + diff --git a/scsh/rx/re-low.scm b/scsh/rx/re-low.scm index a1b2bb8..1cebb22 100644 --- a/scsh/rx/re-low.scm +++ b/scsh/rx/re-low.scm @@ -4,8 +4,8 @@ (foreign-source "/* Make sure foreign-function stubs interface to the C funs correctly: */" "#include " - "#include \"regex.h\"" - "#include \"scsh/re1.h\"" + "#include \"../regexp/regex.h\"" + "#include \"re1.h\"" "" "" ) @@ -53,7 +53,7 @@ (define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec)) (define (max-live-posix-submatch tvec) - (vfoldl (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec)) + (vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec)) (define (compile-posix-re->c-struct re-string sm?) (receive (errcode c-struct) (%compile-re re-string sm?) @@ -143,10 +143,10 @@ (define (clean-up-cres) (set! *master-cre-list* - (foldl (lambda (elt lis) - (if (weak-pointer-ref (car elt)) ; Still alive - (cons elt lis) - (begin (%free-re (cdr elt)) - lis))) - '() - *master-cre-list*))) + (fold (lambda (elt lis) + (if (weak-pointer-ref (car elt)) ; Still alive + (cons elt lis) + (begin (%free-re (cdr elt)) + lis))) + '() + *master-cre-list*))) diff --git a/scsh/rx/re1.c b/scsh/rx/re1.c new file mode 100644 index 0000000..d5d020d --- /dev/null +++ b/scsh/rx/re1.c @@ -0,0 +1,173 @@ +/* Scheme48 interface to Henry Spencer's Posix regular expression package. +** Copyright (c) 1993, 1994, 1998 by Olin Shivers. +*/ + +/* Todo: +** not_eol not_bol support on searchers +** error code -> err msg +** regex freeing +** regexp-string -> regex_t caching +** make filter_stringvec return an error code. +*/ + +#include +#include +#include "../regexp/regex.h" +#include "../cstuff.h" + +/* Make sure our exports match up w/the implementation: */ +#include "re1.h" + +/* +** Compile regexp into a malloc'd struct. +** The flag sm_p is true if we want to compile for submatches. +** On success, store pointer to struct into cr and return 0. +** On failure, free the struct, store NULL into cr, +** and return a non-zero error code. +*/ + +int compile_re(scheme_value re_str, int sm_p, regex_t **cr) +{ + char *s = &STRING_REF(re_str, 0); + int len = STRING_LENGTH(re_str); + int err; + regex_t *re = Alloc(regex_t); + + if( !re ) return -1; + + re->re_endp = s + len; + err = regcomp(re, s, REG_EXTENDED | REG_PEND + | (sm_p ? 0 : REG_NOSUB)); + if( err ) {Free(re); *cr=0;} + else *cr=re; + + return err; + } + +/* Do a regex search of RE through string STR, beginning at STR[START]. +** - STR is passed as a Scheme value as it is allowed to contain nul bytes. +** +** - trans_vec contains the translation from the user's "virtual" submatches to +** the actual submatches the engine will report: +** - trans_vec[i] = #F means user submatch #i is a dead submatch. +** - trans_vec[i] = j means user submatch #i corresponds to paren #j in re. +** +** Indexing fence-posts are a little complicated due to the fact that you +** get an extra match elt back from the matcher -- match 0 is not a +** paren-based *sub*match, but rather the match info for the whole thing. +** +** So, here is how it works: +** length(start_vec) = length(end_vec) = length(trans_vec) + 1 +** because trans_vec doesn't have a translation for submatch 0, which +** is SRE submatch #0 => Posix submatch #0. For SRE submatch #i (1, 2, ...), +** we want the submatch associated with Posix paren # trans_vec[i-1]. +** +** - MAX_PSM is the maximum paren in which we have submatch interest -- the +** max element in TRANS_VEC. Any parens after paren #MAX_PSM are just for +** grouping, not for marking submatches. We only have to allocate MAX_PSM+1 +** elements in the submatch vector we pass into the search engine. If +** MAX_PSM = -1, then we don't even want the whole-match match bounds, which +** is really good -- the search engine can really fly in this case. +** +** If we match, map re's submatches over to the exported start_vec and +** end_vec match vectors using trans_vec. +** +** Return 0 on success; #f if no match; non-zero integer error code otherwise. +*/ + +scheme_value re_search(const regex_t *re, scheme_value str, int start, + scheme_value trans_vec, int max_psm, + scheme_value start_vec, scheme_value end_vec) +{ + char *s = &STRING_REF(str,0); /* Passed as a scheme_value because */ + int len = STRING_LENGTH(str); /* it might contain nul bytes. */ + + int vlen = VECTOR_LENGTH(start_vec); + int retval; + + regmatch_t static_pmatch[10], *pm; + + /* If max_psm+1 > 10, we can't use static_pmatch. */ + if( max_psm < 10 ) pm = static_pmatch; + else { + pm = Malloc(regmatch_t, max_psm+1);/* Add 1 for the whole-match info. */ + if( !pm ) return ENTER_FIXNUM(-1); + } + + pm[0].rm_so = start; + pm[0].rm_eo = len; + + retval = regexec(re, s, max_psm+1, pm, REG_STARTEND); /* Do it. */ + + /* We matched and have match-bound info, so translate it over. */ + if( !retval && max_psm >= 0 ) { + int i; + + VECTOR_REF(start_vec,0) = ENTER_FIXNUM(pm[0].rm_so); /* whole-match */ + VECTOR_REF(end_vec,0) = ENTER_FIXNUM(pm[0].rm_eo); + + for( i=vlen-1; --i >= 0; ) { /* submatches */ + scheme_value j_scm = VECTOR_REF(trans_vec,i); + if( j_scm != SCHFALSE ) { + int j = EXTRACT_FIXNUM(j_scm); + int k = pm[j].rm_so, + l = pm[j].rm_eo; + VECTOR_REF(start_vec,i+1) = (k != -1) ? ENTER_FIXNUM(k) : SCHFALSE; + VECTOR_REF(end_vec, i+1) = (l != -1) ? ENTER_FIXNUM(l) : SCHFALSE; + } + } + } + + if( max_psm >= 10 ) Free(pm); + + if( retval==REG_NOMATCH ) return SCHFALSE; + if( ! retval ) return SCHTRUE; + return ENTER_FIXNUM(retval); + } + + + +/* Filter a vector of strings by regexp RE_STR. +** Stringvec is a NULL-terminated vector of strings; +** filter it in-place, copying the survivors back to compact them. +** Put the number of survivors in nummatch. +*/ + +int filter_stringvec(scheme_value re_str, char const **stringvec) +{ + int re_len = STRING_LENGTH(re_str);/* Passed as a scheme_value because */ + char *re_chars = &STRING_REF(re_str,0);/* it might contain nul bytes. */ + regex_t re; + + char const **p, **q; + + /* REG_NOSUB -- We just want to know if it matches or not. */ + re.re_endp = re_chars + re_len; + if( regcomp(&re, re_chars, REG_EXTENDED | REG_PEND | REG_NOSUB) ) { + return 0; + } + + for(p=q=stringvec; *p; p++) { + char const *s = *p; + if( ! regexec(&re, s, 0, 0, 0) ) *q++ = s; + } + + regfree(&re); + return q-stringvec; + } + + +const char *re_errint2str(int errcode, const regex_t *re) +{ + int size = regerror(errcode, re, 0, 0); + char *s = Malloc(char,size); + if(s) regerror(errcode, re, s, size); + return s; + } + + +void free_re(regex_t *re) +{ + regfree(re); + Free(re); + } diff --git a/scsh/rx/re1.h b/scsh/rx/re1.h new file mode 100644 index 0000000..141dd75 --- /dev/null +++ b/scsh/rx/re1.h @@ -0,0 +1,15 @@ +/* Exports from re1.c */ + +int compile_re(scheme_value sre, int sm_p, regex_t **cr); + +scheme_value re_search(const regex_t *re, scheme_value str, int start, + scheme_value trans_vec, int max_psm, + scheme_value start_vec, scheme_value end_vec); + +/* Filter a vector of strings by a regexp. */ +int filter_stringvec(scheme_value re_str, char const **stringvec); + +/* Error code -> error msg */ +const char *re_errint2str(int errcode, const regex_t *re); + +void free_re(regex_t *re); /* Free the malloc'd regexp. */ diff --git a/scsh/scsh-interfaces.scm b/scsh/scsh-interfaces.scm index bd34928..2bb1a7a 100644 --- a/scsh/scsh-interfaces.scm +++ b/scsh/scsh-interfaces.scm @@ -208,7 +208,7 @@ port->sexp-list port->string-list port->list - reduce-port + port-fold reduce-port port->fdes read-string read-string! @@ -283,7 +283,7 @@ directory-files glob glob-quote - file-match +; file-match create-temp-file temp-file-iterate @@ -418,6 +418,7 @@ exec-path-list)) +;;; Kill me? (define-interface scsh-regexp-interface (export string-match regexp-match? @@ -427,14 +428,14 @@ make-regexp ->regexp regexp? - regexp-exec + regexp-search regexp-substitute regexp-substitute/global regexp-quote)) (define-interface scsh-string-interface - (export substitute-env-vars index rindex)) + (export substitute-env-vars string-index string-index-right)) (define-interface scsh-file-names-interface (export file-name-as-directory @@ -567,13 +568,15 @@ (define-interface scsh-utilities-interface - (export del delete index rindex reduce filter first any first? nth - any? every? mapv mapv! vector-every? copy-vector initialize-vector + (export del delete filter first first? nth + fold fold-right + any every + mapv mapv! vector-every? copy-vector initialize-vector vector-append + vfold vfold-right check-arg conjoin disjoin negate compose reverse! call/cc deprecated-proc deposit-bit-field - real->exact-integer - string-replace! substring-replace!)) + real->exact-integer)) ;;; semi-standard network magic numbers ;;; should be available on all platforms @@ -683,14 +686,15 @@ char-ascii? char-set? - copy-char-set + char-set-copy char-set= char-set<= char-set-size - set-char-set! + char-set-adjoin char-set-delete + char-set-adjoin! char-set-delete! char-set-for-each - reduce-char-set + char-set-fold reduce-char-set char-set chars->char-set @@ -702,11 +706,19 @@ char-set-members char-set-contains? + char-set-every? + char-set-any + char-set-invert char-set-union char-set-intersection char-set-difference + char-set-invert! + char-set-union! + char-set-intersection! + char-set-difference! + char-set:lower-case char-set:upper-case char-set:alphabetic @@ -758,7 +770,8 @@ skip-char-set)) (define-interface awk-interface - (export (awk :syntax))) + (export (awk :syntax) + (awk/posix-string :syntax))) (define-interface scsh-dbm-interface (export dbm-open diff --git a/scsh/scsh-package.scm b/scsh/scsh-package.scm index 184074d..63766fe 100644 --- a/scsh/scsh-package.scm +++ b/scsh/scsh-package.scm @@ -27,12 +27,16 @@ (define-structure error-package (export error warn) - (open signals)) + (open signals) + (optimize auto-integrate) + ) (define-structure scsh-utilities scsh-utilities-interface - (open bitwise error-package let-opt scheme) - (files utilities)) + (open bitwise error-package loopholes let-opt scheme) + (files utilities) + (optimize auto-integrate) + ) ;;; This guy goes into the FOR-SYNTAX part of scsh's syntax exports. @@ -44,27 +48,36 @@ scsh-utilities ; check-arg scheme ) - (files syntax-helpers)) + (files syntax-helpers) + (optimize auto-integrate) + ) ;;; The bufpol/{block, line, none} values (define-structure buffered-io-flags buffered-io-flags-interface (open defenum-package scheme) - (files (machine bufpol))) + (files (machine bufpol)) + (optimize auto-integrate) + ) (define-structure char-set-package char-set-interface (open error-package ascii define-record-types ; JAR's record macro. + scsh-utilities ; For DEPRECATED-PROC scheme) - (files char-set)) + (files char-set) + (optimize auto-integrate) + ) (define-structures ((tty-flags tty-flags-interface) (scsh-internal-tty-flags scsh-internal-tty-flags-interface)) (open scheme ascii bitwise) - (files (machine tty-consts))) + (files (machine tty-consts)) + (optimize auto-integrate) + ) (define-structure scsh-version scsh-version-interface @@ -79,7 +92,9 @@ $current-input-port $current-output-port $error-output-port) - (open ports)) + (open ports) + (optimize auto-integrate) + ) ;;; The scsh-level-0 package is for implementation convenience. ;;; The scsh startup and top-level modules need access to scsh @@ -124,7 +139,8 @@ (scsh-level-0-internals (export set-command-line-args! init-scsh-hindbrain init-scsh-vars)) - (scsh-regexp-package scsh-regexp-interface)) +; (scsh-regexp-package scsh-regexp-interface) + ) (for-syntax (open scsh-syntax-helpers scheme)) (open externals structure-refs @@ -160,6 +176,13 @@ interrupts ; signal handler code + re-level-0 + rx-syntax + + string-lib + + loopholes ; For my bogus CALL-TERMINALLY implementation. + scheme ) @@ -188,7 +211,7 @@ filesys fileinfo glob - filemtch +; filemtch time ; New in release 0.2. (machine time_dep) network ; New in release 0.3. @@ -198,19 +221,25 @@ pty ; New in release 0.4. sighandlers ; New in release 0.5. scsh - re +; re rdelim - )) + ) +; (optimize auto-integrate) + ) (define-structure defrec-package (export (define-record :syntax)) (open records scheme) (for-syntax (open scheme error-package receiving)) - (files defrec)) + (files defrec) + (optimize auto-integrate) + ) (define-structure defenum-package (export (define-enum-constant :syntax) (define-enum-constants :syntax)) (open scheme) - (files enumconst)) + (files enumconst) + (optimize auto-integrate) + ) ;;; This code opens so many modules of gruesome, low-level S48 internals ;;; that these two modules are segregated into separate packages, each @@ -229,7 +258,9 @@ filenames ; translate scheme-level-2-internal ; usual-resumer scheme) - (files startup)) + (files startup) + (optimize auto-integrate) + ) (define-structure scsh-top-package (export parse-switches-and-execute repl) (open command-processor @@ -249,54 +280,78 @@ ; with-current-output-port exit scsh-level-0-internals ; set-command-line-args! init-scsh-vars scheme) - (files top meta-arg)) + (files top meta-arg) + (optimize auto-integrate) + ) (define-structure field-reader-package scsh-field-reader-interface (open receiving ; receive char-set-package - scsh-utilities + scsh-utilities ; nth error-package ; error scsh-level-0 ; delimited readers - scsh-regexp-package +; scsh-regexp-package + re-exports + string-lib ; join-strings let-opt ; optional-arg parsing & defaulting scheme ) - (files fr)) + (files fr) + (optimize auto-integrate) + ) (define-structures - ((awk-expander-package (export expand-awk)) + ((awk-expander-package (export expand-awk expand-awk/obsolete)) (awk-support-package (export next-range next-:range next-range: next-:range:))) (open receiving ; receive scsh-utilities error-package ; error - scsh-regexp-package +; scsh-regexp-package +; re-exports + sre-syntax-tools scheme ) - (files awk)) + (files awk) + (optimize auto-integrate) + ) (define-structure awk-package awk-interface - (open awk-support-package scsh-regexp-package receiving scheme) + (open awk-support-package ; These packages provide all the stuff + re-exports ; that appears in the code produced by + receiving ; an awk expansion. + scheme) (for-syntax (open awk-expander-package scheme)) - (begin (define-syntax awk expand-awk))) + (begin (define-syntax awk expand-awk) + (define-syntax awk/posix-string expand-awk/obsolete))) +;;; Exports an AWK macro that is just AWK/POSIX-STRING. +(define-structure obsolete-awk-package (export (awk :syntax)) + (open awk-package) + (begin (define-syntax awk + (syntax-rules () ((awk body ...) (awk/posix-string body ....)))))) (define-structure scsh (compound-interface (interface-of scsh-level-0) (interface-of scsh-startup-package) - scsh-regexp-interface +; scsh-regexp-interface + re-exports-interface + re-old-funs-interface scsh-field-reader-interface ; new in 0.3 ; scsh-dbm-interface (export repl) - awk-interface) + awk-interface + ) (open structure-refs scsh-level-0 scsh-level-0-internals - scsh-regexp-package + re-exports + re-old-funs +; scsh-regexp-package scsh-startup-package ; dbm awk-package @@ -304,7 +359,9 @@ scheme) (access scsh-top-package) - (begin (define repl (structure-ref scsh-top-package repl)))) + (begin (define repl (structure-ref scsh-top-package repl))) + (optimize auto-integrate) + ) (define-structure scsh-here-string-hax (export) (open reading @@ -312,11 +369,6 @@ scsh ; Just need the delimited readers. features ; make-immutable! scheme) - (files here)) - -(define-structure test-package (export test-proc) - (open scsh-regexp-package scheme) - (begin (define (test-proc p) - (regexp-substitute p - (string-match "(foo)(.*)(bar)" "Hello foo Olin bar quux") - 'post 3 1 2 'pre)))) + (files here) + (optimize auto-integrate) + ) diff --git a/scsh/scsh.scm b/scsh/scsh.scm index e2bcebb..c992fc0 100644 --- a/scsh/scsh.scm +++ b/scsh/scsh.scm @@ -7,7 +7,8 @@ ;;; and dynamic env to be gc'd away, since this procedure never returns. (define (call-terminally thunk) - (with-continuation #f (lambda () (thunk) (exit 0)))) + (with-continuation (loophole :escape #f) ; Bogus + (lambda () (thunk) (exit 0)))) ;; Alternatively: (with-continuation #f thunk) ;;; More portably, but less usefully: @@ -125,7 +126,7 @@ ;; Main loop. (let split ((i 0)) - (cond ((index clist #\: i) => + (cond ((string-index clist #\: i) => (lambda (colon) (cons (substring clist i colon) (split (+ colon 1))))) @@ -207,10 +208,10 @@ (define (with-env* alist-delta thunk) (let* ((old-env #f) - (new-env (reduce (lambda (alist key/val) - (alist-update (car key/val) (cdr key/val) alist)) - (env->alist) - alist-delta))) + (new-env (fold (lambda (key/val alist) + (alist-update (car key/val) (cdr key/val) alist)) + (env->alist) + alist-delta))) (dynamic-wind (lambda () (set! old-env (env->alist)) @@ -435,12 +436,12 @@ ;;; (port->list reader port) ;;; Repeatedly applies READER to PORT, accumulating results into a list. ;;; On EOF, returns the list of items thus collected. -;;; (reduce-port port reader op . seeds) +;;; (port-fold port reader op . seeds) ;;; Repeatedly read things from PORT with READER. Each time you read ;;; some value V, compute a new set of seeds with (apply OP V SEEDS). ;;; (More than 1 seed means OP must return multiple values). -;;; On eof, return the seeds. -;;; PORT->LIST is just (REDUCE-PORT PORT READ CONS '()) +;;; On eof, return the seeds: (apply value SEEDS). +;;; PORT->LIST is just (PORT-FOLD PORT READ CONS '()) (define (run/port+proc* thunk) (receive (r w) (pipe) @@ -505,13 +506,16 @@ (define (port->string-list port) (port->list read-line port)) -(define (reduce-port port reader op . seeds) - (letrec ((reduce (lambda seeds +(define (port-fold port reader op . seeds) + (letrec ((fold (lambda seeds (let ((x (reader port))) (if (eof-object? x) (apply values seeds) (call-with-values (lambda () (apply op x seeds)) - reduce)))))) - (apply reduce seeds))) + fold)))))) + (apply fold seeds))) + +(define reduce-port + (deprecated-proc port-fold 'reduce-port "Use port-fold instead.")) ;;; Not defined: ;;; (field-reader field-delims record-delims) @@ -671,7 +675,7 @@ (define (exec-path/env prog env . arglist) (flush-all-ports) (let ((prog (stringify prog))) - (if (index prog #\/) + (if (string-index prog #\/) ;; Contains a slash -- no path search. (%exec prog (cons prog arglist) env) diff --git a/scsh/stringlib.scm b/scsh/stringlib.scm new file mode 100644 index 0000000..53270dd --- /dev/null +++ b/scsh/stringlib.scm @@ -0,0 +1,1289 @@ +;;; String-hacking functions -*- Scheme -*- + +;;; Some of this code had (extremely distant) origins in MIT Scheme's string +;;; lib, and was substantially reworked by Olin Shivers (shivers@ai.mit.edu) +;;; 9/98. As such, it is +;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. +;;; The copyright terms are essentially open-software terms; +;;; the precise terms are at the end of this file. +;;; +;;; The KMP string-search code was massively rehacked from Stephen Bevan's +;;; code, written for scmlib, and is thus covered by the GPL. If that's a +;;; problem, write one from scratch (there are citations to standard textbooks +;;; in the comments), or rip it out and use the ten-line doubly-nested loop +;;; that's commented out just above this code. +;;; +;;; I wish I could mark definitions in this code to be inlined. +;;; Certain functions could benefit from compiler support. +;;; +;;; My policy on checking start/end substring specs is not uniform. +;;; I avoided doing arg checks when the function directly calls another +;;; lower-level function that will check the start/end specs as well. +;;; This has the advantage of not doing redundant checks, but the disadvantage +;;; is that errors are not reported early, at the highest possible call. +;;; There's not much high-level error checking of the other args, anyway. +;;; -Olin + +;;; Support for START/END substring specs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This macro parses optional start/end arguments from arg lists, defaulting +;;; them to 0/(string-length s), and checks them for correctness. + +(define-syntax let-start+end + (syntax-rules () + ((let-start+end (start end) proc s-exp args-exp body ...) + (receive (start end) (parse-final-start+end proc s-exp args-exp) + body ...)))) + + +;;; Returns three values: start end rest + +(define (parse-start+end proc s args) + (let ((slen (string-length s))) + (if (pair? args) + + (let ((start (car args)) + (args (cdr args))) + (if (or (not (integer? start)) (< start 0)) + (error "Illegal substring START spec" proc start s) + (receive (end args) + (if (pair? args) + (let ((end (car args)) + (args (cdr args))) + (if (or (not (integer? end)) (< slen end)) + (error "Illegal substring END spec" proc end s) + (values end args))) + (values slen args)) + (if (<= start end) (values start end args) + (error "Illegal substring START/END spec" + proc start end s))))) + + (values 0 (string-length s) '())))) + +(define (parse-final-start+end proc s args) + (receive (start end rest) (parse-start+end proc s args) + (if (pair? rest) (error "Extra arguments to procedure" proc rest) + (values start end)))) + +(define (check-substring-spec proc s start end) + (if (or (< start 0) + (< (string-length s) end) + (< end start)) + (error "Illegal substring START/END spec." proc s start end))) + + + +;;; substring S START [END] +;;; string-copy S [START END] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Redefine SUBSTRING so that the END parameter is optional. +;;; SUBSTRINGX is the underlying R5RS SUBSTRING function. All +;;; the code in this file uses the simple SUBSTRINGX, so you can +;;; easily port this code. + +(define substringx (structure-ref scheme substring)) ; Simple R5RS SUBSTRING + +(define (substring s start . maybe-end) ; Our SUBSTRING + (substringx s start (:optional maybe-end (string-length s)))) + +(define (string-copy s . maybe-start+end) + (let-start+end (start end) string-copy s maybe-start+end + (substringx s start end))) + + + +;;; Basic iterators and other higher-order abstractions +;;; (string-map proc s [start end]) +;;; (string-map! proc s [start end]) +;;; (string-fold kons knil s [start end]) +;;; (string-fold-right kons knil s [start end]) +;;; (string-unfold p f g seed) +;;; (string-for-each proc s [start end]) +;;; (string-iter proc s [start end]) +;;; (string-every? pred s [start end]) +;;; (string-any pred s [start end]) +;;; (string-tabulate proc len) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; No guarantees about order in MAP, FOR-EACH, EVERY, ANY procs. +;;; +;;; You want compiler support for high-level transforms on fold and unfold ops. +;;; You'd at least like a lot of inlining for clients of these procedures. +;;; Hold your breath. + +(define (string-map proc s . maybe-start+end) + (let-start+end (start end) string-map s maybe-start+end + (let* ((len (- end start)) + (ans (make-string len))) + (do ((i (- end 1) (- i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (proc (string-ref s i)))) + ans))) + +(define (string-map! proc s . maybe-start+end) + (let-start+end (start end) string-map! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i (proc (string-ref s i)))))) + +(define (string-fold kons knil s . maybe-start+end) + (let-start+end (start end) string-fold s maybe-start+end + (let lp ((v knil) (i start)) + (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) + v)))) + +(define (string-fold-right kons knil s . maybe-start+end) + (let-start+end (start end) string-fold-right s maybe-start+end + (let lp ((v knil) (i (- end 1))) + (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) + v)))) + +;;; (string-unfold p f g seed) +;;; This is the fundamental constructor for strings. +;;; - G is used to generate a series of "seed" values from the initial seed: +;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... +;;; - P tells us when to stop -- when it returns true when applied to one +;;; of these seed values. +;;; - F maps each seed value to the corresponding character +;;; in the result string. +;;; +;;; In other words, the following (simple, inefficient) definition holds: +;;; (string-unfold p f g seed) = +;;; (if (p seed) "" +;;; (string-append (string (f seed)) +;;; (string-unfold p f g (g seed)))) +;;; +;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to +;;; reverse a string, copy a string, convert a list to a string, read +;;; a port into a string, and so forth. Examples: +;;; (port->string port) = +;;; (string-unfold (compose eof-object? peek-char) +;;; read-char identity port) +;;; +;;; (list->string lis) = (string-unfold null? car cdr lis) +;;; +;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) + +;;; A problem with the following simple formulation is that it pushes one +;;; stack frame for every char in the result string -- an issue if you are +;;; using it to read a 100kchar string. So we don't use it -- but I include +;;; it to give a clear, straightforward description of what the function +;;; does. + +;(define (string-unfold p f g seed) +; (let recur ((seed seed) (i 0)) +; (if (p seed) (make-string i) +; (let* ((c (f seed)) +; (s (recur (g seed) (+ i 1)))) +; (string-set! s i c) +; s)))) + +;;; This formulation chunks up the constructed string into 1024-char chunks, +;;; popping the stack frames. So it'll reduce stack growth by a factor of +;;; 1024. Marc Feeley alerted me to this issue and its general solution. + +(define (string-unfold p f g seed) + (apply string-append + (let recur ((seed seed)) + (receive (s seed done?) + (let recur2 ((seed seed) (i 0)) + (cond ((p seed) (values (make-string i) seed #t)) + ((>= i 1024) (values (make-string i) seed #f)) + (else (let ((c (f seed))) + (receive (s seed done?) + (recur2 (g seed) (+ i 1)) + (string-set! s i c) + (values s seed done?)))))) + + (if done? (list s) + (cons s (recur seed))))))) + + +;;; This is the same as STRING-UNFOLD, but defined for multiple +;;; seed parameters. If you pass N seeds, then +;;; - P maps N parameters to a boolean. +;;; - F maps N parameters to a character. +;;; - G maps N parameters to N return values. +;;; This definition does a lot of consing; it would need a fair amount +;;; of compiler support to be efficient. + +; Not released +;(define (string-unfoldn p f g . seeds) +; (apply string-append +; (let recur ((seeds seeds)) +; (receive (s seeds done?) +; (let recur2 ((seeds seeds) (i 0)) +; (cond ((apply p seeds) (values (make-string i) seeds #t)) +; ((>= i 1024) (values (make-string i) seeds #f)) +; (else (let ((c (apply f seeds))) +; (receive seeds (apply g seeds) +; (receive (s seeds done?) +; (recur2 seeds (+ i 1)) +; (string-set! s i c) +; (values s seeds done?))))))) +; +; (if done? (list s) +; (cons s (recur seeds))))))) + +(define (string-for-each proc s . maybe-start+end) + (let-start+end (start end) string-for-each s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (proc (string-ref s i))))) + +(define (string-iter proc s . maybe-start+end) + (let-start+end (start end) string-iter s maybe-start+end + (do ((i start (+ i 1))) + ((>= i end)) + (proc (string-ref s i))))) + +(define (string-every? pred s . maybe-start+end) + (let-start+end (start end) string-every? s maybe-start+end + (let lp ((i (- end 1))) + (or (< i start) + (and (pred (string-ref s i)) + (lp (- i 1))))))) + +(define (string-any pred s . maybe-start+end) + (let-start+end (start end) string-any s maybe-start+end + (let lp ((i (- end 1))) + (and (>= i start) + (or (pred (string-ref s i)) + (lp (- i 1))))))) + + +(define (string-tabulate proc len) + (let ((s (make-string len))) + (do ((i (- len 1) (- i 1))) + ((< i 0)) + (string-set! s i (proc i))) + s)) + + + +;;; string-prefix-count[-ci] s1 s2 +;;; string-suffix-count[-ci] s1 s2 +;;; substring-prefix-count[-ci] s1 start1 end1 s2 start2 end2 +;;; substring-suffix-count[-ci] s1 start1 end1 s2 start2 end2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Find the length of the common prefix/suffix. +;;; It is not required that the two substrings passed be of equal length. +;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. + +(define (substring-prefix-count s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-prefix-count s1 start1 end1) + (check-substring-spec substring-prefix-count s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + (let lp ((i start1) (j start2)) + (if (or (>= i end1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1)))))) + +(define (substring-suffix-count s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-suffix-count s1 start1 end1) + (check-substring-spec substring-suffix-count s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + (let lp ((i (- end1 1)) (j (- end2 1))) + (if (or (< i start1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1)))))) + +(define (substring-prefix-count-ci s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-prefix-count-ci s1 start1 end1) + (check-substring-spec substring-prefix-count-ci s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + (let lp ((i start1) (j start2)) + (if (or (>= i end1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1)))))) + +(define (substring-suffix-count-ci s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-suffix-count-ci s1 start1 end1) + (check-substring-spec substring-suffix-count-ci s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + (let lp ((i (- end1 1)) (j (- end2 1))) + (if (or (< i start1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1)))))) + + +(define (string-prefix-count s1 s2) + (substring-prefix-count s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-count s1 s2) + (substring-suffix-count s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-prefix-count-ci s1 s2) + (substring-prefix-count-ci s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-count-ci s1 s2) + (substring-suffix-count-ci s1 0 (string-length s1) s2 0 (string-length s2))) + + + +;;; string-prefix? s1 s2 +;;; string-suffix? s1 s2 +;;; string-prefix-ci? s1 s2 +;;; string-suffix-ci? s1 s2 +;;; +;;; substring-prefix? s1 start1 end1 s2 start2 end2 +;;; substring-suffix? s1 start1 end1 s2 start2 end2 +;;; substring-prefix-ci? s1 start1 end1 s2 start2 end2 +;;; substring-suffix-ci? s1 start1 end1 s2 start2 end2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These are all simple derivatives of the previous counting funs. + +(define (string-prefix? s1 s2) + (substring-prefix? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix? s1 s2) + (substring-suffix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-prefix-ci? s1 s2) + (substring-prefix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-ci? s1 s2) + (substring-suffix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (substring-prefix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= (substring-prefix-count s1 start1 end1 + s2 start2 end2) + len1)))) + +(define (substring-suffix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-suffix-count s1 start1 end1 + s2 start2 end2))))) + +(define (substring-prefix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-prefix-count-ci s1 start1 end1 + s2 start2 end2))))) + +(define (substring-suffix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-suffix-count-ci s1 start1 end1 + s2 start2 end2))))) + + +;;; string-compare s1 s2 lt-proc eq-proc gt-proc +;;; string-compare-ci s1 s2 eq-proc lt-proc gt-proc +;;; substring-compare s1 start1 end1 s2 start2 end2 +;;; lt-proc eq-proc gt-proc +;;; substring-compare-ci s1 start1 end1 s2 start2 end2 +;;; lt-proc eq-proc gt-proc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Primitive string-comparison functions. +;;; Continuation order is different from MIT Scheme. +;;; Continuations are applied to s1's mismatch index; +;;; in the case of equality, this is END1. + +(define (substring-compare s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (substring-prefix-count s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) + proc> + (if (char)) + (+ match start1)))))) + +(define (substring-compare-ci s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (substring-prefix-count-ci s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) proc> + (if (char-ci)) + (+ start1 match)))))) + +(define (string-compare s1 s2 proc< proc= proc>) + (substring-compare s1 0 (string-length s1) + s2 0 (string-length s2) + proc< proc= proc>)) + +(define (string-compare-ci s1 s2 proc< proc= proc>) + (substring-compare-ci s1 0 (string-length s1) + s2 0 (string-length s2) + proc< proc= proc>)) + + +;;; string= string<> string-ci= string-ci<> +;;; string< string> string-ci< string-ci> +;;; string<= string>= string-ci<= string-ci>= +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Simple definitions in terms of the previous comparison funs. +;;; Inequality predicates return #f or mismatch index. +;;; I sure hope these defns get integrated. + +(define (string= s1 s2) + (string-compare s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) #f))) + +(define (string< s1 s2) + (string-compare s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) #f))) + +(define (string> s1 s2) + (string-compare s1 s2 (lambda (i) #f) (lambda (i) #f) (lambda (i) i))) + +(define (string<= s1 s2) + (string-compare s1 s2 (lambda (i) i) (lambda (i) i) (lambda (i) #f))) + +(define (string>= s1 s2) + (string-compare s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) i))) + +(define (string<> s1 s2) + (string-compare s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) i))) + + +(define (string-ci= s1 s2) + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) #f))) + +(define (string-ci< s1 s2) + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) #f))) + +(define (string-ci> s1 s2) + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) #f) (lambda (i) i))) + +(define (string-ci<= s1 s2) + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) i) (lambda (i) #f))) + +(define (string-ci>= s1 s2) + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) i))) + +(define (string-ci<> s1 s2) + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) i))) + + +(define (substring= s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) #f) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring<> s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) i))) + +(define (substring< s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) #f))) + +(define (substring> s1 start1 end1 s2 start2 end2) + (substring< s2 start2 end2 s1 start1 end1)) + +(define (substring<= s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring>= s1 start1 end1 s2 start2 end2) + (substring<= s2 start2 end2 s1 start1 end1)) + +(define (substring-ci= s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) #f) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring-ci<> s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) i))) + +(define (substring-ci< s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) #f))) + +(define (substring-ci> s1 start1 end1 s2 start2 end2) + (substring-ci< s2 start2 end2 s1 start1 end1)) + +(define (substring-ci<= s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring-ci>= s1 start1 end1 s2 start2 end2) + (substring-ci<= s2 start2 end2 s1 start1 end1)) + + + +;;; Case hacking +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-upper-case? +;;; string-lower-case? +;;; +;;; string-upcase s [start end] +;;; string-upcase! s [start end] +;;; string-downcase s [start end] +;;; string-downcase! s [start end] +;;; +;;; capitalize-string s [start end] +;;; capitalize-string! s [start end] +;;; Uppercase first alphanum char, lowercase rest. +;;; +;;; capitalize-words s [start end] +;;; capitalize-words! s [start end] +;;; Capitalize every contiguous alphanum sequence: uppercase +;;; first char, lowercase rest. + +;;; These two use a different definition of an "upper-/lower-case string" +;;; than MIT Scheme uses: + +(define (string-upper-case? s . maybe-start+end) + (not (apply string-any char-lower-case? s maybe-start+end))) + +(define (string-lower-case? s . maybe-start+end) + (not (apply string-any char-upper-case? s maybe-start+end))) + + +(define (string-upcase s . maybe-start+end) + (apply string-map char-upcase s maybe-start+end)) + +(define (string-upcase! s . maybe-start+end) + (apply string-map! char-upcase s maybe-start+end)) + +(define (string-downcase s . maybe-start+end) + (apply string-map char-downcase s maybe-start+end)) + +(define (string-downcase! s . maybe-start+end) + (apply string-map! char-downcase s maybe-start+end)) + + +;;; capitalize-string s [start end] +;;; capitalize-string! s [start end] +;;; Uppercase first alphanum char, lowercase rest. + +(define (really-capitalize-string! s start end) + (cond ((string-index s char-set:alphanumeric start end) => + (lambda (i) + (string-set! s i (char-upcase (string-ref s i))) + (string-downcase! s i))))) + +(define (capitalize-string! s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (really-capitalize-string! s start end))) + +(define (capitalize-string s . maybe-start+end) + (let-start+end (start end) capitalize-string s maybe-start+end + (let ((ans (substringx s start end))) + (really-capitalize-string! ans 0 (- end start)) + ans))) + +;;; capitalize-words s [start end] +;;; capitalize-words! s [start end] +;;; Capitalize every contiguous alphanum sequence: uppercase +;;; first char, lowercase rest. + +(define (really-capitalize-words! s start end) + (let lp ((i start)) + (cond ((string-index s char-set:alphanumeric i end) => + (lambda (i) + (string-set! s i (char-upcase (string-ref s i))) + (let ((i1 (+ i 1))) + (cond ((string-skip s char-set:alphanumeric i1 end) => + (lambda (j) + (string-downcase! s i1 j) + (lp (+ j 1)))) + (else (string-downcase! s i1 end))))))))) + +(define (capitalize-words! s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (really-capitalize-words! s start end))) + +(define (capitalize-words s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (let ((ans (substringx s start end))) + (really-capitalize-words! ans 0 (- end start)) + ans))) + + + +;;; Cutting & pasting strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-take string nchars +;;; string-drop string nchars +;;; +;;; string-padl string k [char start end] +;;; string-padr string k [char start end] +;;; +;;; string-trim string [char/char-set/pred start end] +;;; string-triml string [char/char-set/pred start end] +;;; string-trimr string [char/char-set/pred start end] +;;; +;;; These trimmers invert the char-set meaning from MIT Scheme -- you +;;; say what you want to trim. + +(define (string-take s n) + (if (> n 0) + (substringx s 0 n) + (let ((len (string-length s))) + (substringx s (+ len n) len)))) + +(define (string-drop s n) + (let ((len (string-length s))) + (if (> n 0) + (substringx s n len) + (substringx s 0 (+ len n))))) + +(define (string-triml s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip s criteria start end) => + (lambda (i) (substringx s i end))) + (else "")))) + +(define (string-trimr s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip-right s criteria end start) => + (lambda (i) (substringx s 0 (+ 1 i)))) + (else "")))) + +(define (string-trim s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip s criteria start end) => + (lambda (i) (substringx s i (+ 1 (string-skip-right s criteria end))))) + (else "")))) + + +(define (string-padr s n . args) + (let-optionals args ((char #\space) (start 0) (end (string-length s))) + (check-substring-spec string-padr s start end) + (let ((len (- end start))) + (cond ((= n len) ; No pad. + (if (zero? start) s (substringx s start end))) + + ((< n len) (substringx s start (+ start n))) ; Trim. + + (else (let ((ans (make-string n char))) + (string-copy! ans 0 s start end) + ans)))))) + +(define (string-padl s n . args) + (let-optionals args ((char #\space) (start 0) (end (string-length s))) + (check-substring-spec string-padl s start end) + (let ((len (- end start))) + (cond ((= n len) ; No pad. + (if (zero? start) s (substringx s start end))) + + ((< n len) (substringx s (- end n) end)) ; Trim. + + (else (let ((ans (make-string n char))) + (string-copy! ans (- n len) s start end) + ans)))))) + + + +;;; Filtering strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-delete char/char-set/pred string [start end] +;;; string-filter char/char-set/pred string [start end] +;;; +;;; If the filter criteria is a char or char-set, we scan the string twice +;;; with string-fold -- once to determine the length of the result string, +;;; and once to do the filtered copy. +;;; If the filter criteria is a predicate, we don't do this double-scan +;;; strategy, because the predicate might have side-effects or be very +;;; expensive to compute. So we preallocate a temp buffer pessimistically, +;;; and only do one scan over S. This is likely to be faster and more +;;; space-efficient that consing a list. + +(define (string-delete criteria s . maybe-start+end) + (let-start+end (start end) string-delete s maybe-start+end + (if (procedure? criteria) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criteria c) i + (begin (string-set! temp i c) + (+ i 1)))) + 0 s start end))) + (if (= ans-len slen) temp (substringx temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criteria) criteria) + ((char? criteria) (char-set criteria)) + (else (error "string-delete criteria not predicate, char or char-set" criteria)))) + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (+ i 1))) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (begin (string-set! ans i c) + (+ i 1)))) + 0 s start end) + ans)))) + +(define (string-filter criteria s . maybe-start+end) + (let-start+end (start end) string-filter s maybe-start+end + (if (procedure? criteria) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criteria c) + (begin (string-set! temp i c) + (+ i 1)) + i)) + 0 s start end))) + (if (= ans-len slen) temp (substringx temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criteria) criteria) + ((char? criteria) (char-set criteria)) + (else (error "string-delete criteria not predicate, char or char-set" criteria)))) + + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + (+ i 1) + i)) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + (begin (string-set! ans i c) + (+ i 1)) + i)) + 0 s start end) + ans)))) + + + +;;; String search +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-index string char/char-set/pred [start end] +;;; string-index-right string char/char-set/pred [end start] +;;; string-skip string char/char-set/pred [start end] +;;; string-skip-right string char/char-set/pred [end start] +;;; Note the odd start/end ordering of index-right and skip-right params. +;;; There's a lot of replicated code here for efficiency. +;;; For example, the char/char-set/pred discrimination has +;;; been lifted above the inner loop of each proc. + +(define (string-index str criteria . maybe-start+end) + (let-start+end (start end) string-index str maybe-start+end + (cond ((char? criteria) + (let lp ((i start)) + (and (< i end) + (if (char=? criteria (string-ref str i)) i + (lp (+ i 1)))))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criteria (string-ref str i)) i + (lp (+ i 1)))))) + ((procedure? criteria) + (let lp ((i start)) + (and (< i end) + (if (criteria (string-ref str i)) i + (lp (+ i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index criteria))))) + +(define (string-index-right str criteria . maybe-end+start) + (let-optionals maybe-end+start ((start 0) (end (string-length str))) + (check-substring-spec string-index-right str start end) + (cond ((char? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criteria (string-ref str i)) i + (lp (- i 1)))))) + ((char-set? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criteria (string-ref str i)) i + (lp (- i 1)))))) + ((procedure? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criteria (string-ref str i)) i + (lp (- i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index-right criteria))))) + +(define (string-skip str criteria . maybe-start+end) + (let-start+end (start end) string-skip str maybe-start+end + (cond ((char? criteria) + (let lp ((i start)) + (and (< i end) + (if (char=? criteria (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criteria (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (criteria (string-ref str i)) (lp (+ i 1)) + i)))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-skip criteria))))) + +(define (string-skip-right str criteria . maybe-end+start) + (let-optionals maybe-end+start ((start 0) (end (string-length str))) + (check-substring-spec string-index-right str start end) + (cond ((char? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criteria (string-ref str i)) + (lp (- i 1)) + i)))) + ((char-set? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criteria (string-ref str i)) + (lp (- i 1)) + i)))) + ((procedure? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criteria (string-ref str i)) (lp (- i 1)) + i)))) + (else (error "CRITERIA param is neither char-set or char." string-skip-right criteria))))) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-fill! string char [start end] +;;; +;;; string-copy! to tstart from [fstart fend] +;;; Guaranteed to work, even if s1 eq s2. + +(define (string-fill! s char . maybe-start+end) + (let-start+end (start end) string-fill! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i char)))) + +(define (string-copy! to tstart from . maybe-fstart+fend) + (let-start+end (fstart fend) string-copy! from maybe-fstart+fend + (let ((tend (+ tstart (- fend fstart)))) + (check-substring-spec string-copy! to tstart tend) + (if (> fstart tstart) + (do ((i fstart (+ i 1)) + (j tstart (+ j 1))) + ((>= i fend)) + (string-set! to j (string-ref from i))) + + (do ((i (- fend 1) (- i 1)) + (j (- tend 1) (- j 1))) + ((< i fstart)) + (string-set! to j (string-ref from i))))))) + + + +;;; Returns starting-position or #f if not true. +;;; This implementation is slow & simple. See below for KMP. +;;; Boyer-Moore would be nice. +;(define (substring? substring string . maybe-start+end) +; (let-start+end (start end) string substring? maybe-start+end +; (if (string-null? substring) start +; (let* ((len (string-length substring)) +; (i-bound (- end len)) +; (char1 (string-ref substring start))) +; (let lp ((i 0)) +; (cond ((string-index string char1 i i-bound) => +; (lambda (i) +; (if (substring= substring 0 len string i (+ i len)) +; i +; (lp (+ i 1))))) +; (else #f))))))) + + +;;; Searching for an occurence of a substring +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This uses the KMP algorithm +;;; "Fast Pattern Matching in Strings" +;;; SIAM J. Computing 6(2):323-350 1977 +;;; D. E. Knuth, J. H. Morris and V. R. Pratt +;;; also described in +;;; "Pattern Matching in Strings" +;;; Alfred V. Aho +;;; Formal Language Theory - Perspectives and Open Problems +;;; Ronald V. Brook (editor) +;;; This algorithm is O(m + n) where m and n are the +;;; lengths of the pattern and string respectively +;;; Original version of this code by bevan; I have substantially rehacked it. + +(define (substring? pattern source . maybe-start+end) + (let-start+end (start end) substring? source maybe-start+end + (really-substring? char=? pattern source start end))) + +(define (substring-ci? pattern source . maybe-start+end) + (let-start+end (start end) substring-ci? source maybe-start+end + (really-substring? char-ci=? pattern source start end))) + +;;; Compute the Knuth-Morris-Pratt restart vector RV for string PATTERN. If +;;; we have matched chars 0..i-1 of PATTERN against a search string S, and +;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to +;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to +;;; S[k+1] and PATTERN[0]. +;;; +;;; In other words, if you have matched the first i chars of PATTERN, but +;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest +;;; prefix of PATTERN is that you have matched. +;;; +;;; C= is the character comparator -- usefully CHAR= or CHAR-CI=. +;;; +;;; I've split this out as a separate function in case other constant-string +;;; searchers might want to use it. + +(define (make-kmp-restart-vector pattern c=) + (let* ((plen (string-length pattern)) + (rv (make-vector plen))) + (if (> plen 0) + (let ((plen-1 (- plen 1))) + (vector-set! rv 0 -1) + (let lp ((i 0) (j -1)) + (if (< i plen-1) + (if (or (= j -1) + (c= (string-ref pattern i) + (string-ref pattern j))) + (let ((i (+ 1 i)) + (j (+ 1 j))) + (vector-set! rv i j) + (lp i j)) + (lp i (vector-ref rv j))))))) + rv)) + +(define (really-substring? c= pattern source start end) + (let ((plen (string-length pattern)) + (rv (make-kmp-restart-vector pattern c=))) + + ;; The search loop. SJ & PJ are redundant state. + (let lp ((si start) (pi 0) + (sj (- end start)) ; (- end si) -- how many chars left. + (pj plen)) ; (- plen pi) -- how many chars left. + + (if (= pi plen) (- si plen) ; Win. + + (and (<= pj sj) ; Lose. + + (if (c= (string-ref source si) ; Search. + (string-ref pattern pi)) + (lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance. + + (let ((pi (vector-ref rv pi))) ; Retreat. + (if (= pi -1) + (lp (+ si 1) 0 (- sj 1) plen) ; Punt. + (lp si pi sj (- plen pi)))))))))) + + + +;;; Misc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (string-reverse s [start end]) +;;; (string-reverse! s [start end]) +;;; (string-null? s) + +(define (string-null? s) (zero? (string-length s))) + +(define (string-reverse s . maybe-start+end) + (let-start+end (start end) string-reverse s maybe-start+end + (let ((ans (make-string (- end start)))) + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((< i j)) + (string-set! ans i (string-ref s j)) + (string-set! ans j (string-ref s i))) + ans))) + +(define (string-reverse! s . maybe-start+end) + (let-start+end (start end) string-reverse! s maybe-start+end + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((<= i j)) + (let ((ci (string-ref s i))) + (string-set! s i (string-ref s j)) + (string-set! s j ci))))) + + +; This is a perfectly good definition of REVERSE-LIST->STRING, +; but S48 has it as a machine op. +;(define (reverse-list->string clist) +; (let* ((len (length clist)) +; (s (make-string len))) +; (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) +; ((not (pair? clist))) +; (string-set! s i (car clist))) +; s)) + +(define reverse-list->string (structure-ref silly reverse-list->string)) + +;(define (string->list s . maybe-start+end) +; (let-start+end (start end) string->list s maybe-start+end +; (do ((i (- end 1) (- i 1)) +; (ans '() (cons (string-ref s i) ans))) +; ((< i start) ans)))) + +(define (string->list s . maybe-start+end) + (apply string-fold-right s cons '() maybe-start+end)) + + + +;;; string-concat string-list -> string +;;; string-concat/shared string-list -> string +;;; string-append/shared s ... -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; STRING-APPEND/SHARED has license to return a string that shares storage +;;; with any of its arguments. In particular, if there is only one non-empty +;;; string amongst its parameters, it is permitted to return that string as +;;; its result. STRING-APPEND, by contrast, always allocates new storage. +;;; +;;; STRING-CONCAT & STRING-CONCAT/SHARED are passed a list of strings, +;;; which they concatenate into a result string. STRING-CONCAT always +;;; allocates a fresh string; STRING-CONCAT/SHARED may (or may not) return +;;; a result that shares storage with any of its arguments. In particular, +;;; if it is applied to a singleton list, it is permitted to return the +;;; car of that list as its value. +;;; +;;; This is portable code, but could be much more efficient w/compiler +;;; support. Especially the n-ary guys. + +;;; We delete the empty strings from the parameter list before handing +;;; off to string-concat/shared. + +(define (string-append/shared . strings) + (string-concat/shared (fold-right (lambda (s lis) + (if (string-null? s) lis (cons s lis))) + '() + strings))) + +(define (string-concat/shared strings) + (cond ((not (pair? strings)) "") ; () => "". + ((not (pair? (cdr strings))) (car strings)) ; (s) => s. + (else (apply string-append strings)))) ; Allocate & concat. + +(define (string-concat strings) (apply string-append strings)) + + + +;;; xsubstring s from [to start end] -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; S is a string; START and END are optional arguments that demarcate +;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole +;;; string). Replicate this substring up and down index space, in both the +;; positive and negative directions. For example, if S = "abcdefg", START=3, +;;; and END=6, then we have the conceptual bidirectionally-infinite string +;;; ... d e f d e f d e f d e f d e f d e f d e f ... +;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... +;;; XSUBSTRING returns the substring of this string beginning at index FROM, +;;; and ending at TO (which defaults to FROM+(END-START)). +;;; +;;; You can use XSUBSTRING in many ways: +;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" +;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" +;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" +;;; +;;; Note that +;;; - The FROM/TO indices give a half-open range -- the characters from +;;; index FROM up to, but not including index TO. +;;; - The FROM/TO indices are not in terms of the index space for string S. +;;; They are in terms of the replicated index space of the substring +;;; defined by S, START, and END. +;;; +;;; It is an error if START=END -- although this is allowed by special +;;; dispensation when FROM=TO. + +(define (xsubstring s from . maybe-to+start+end) + (receive (to start end) + (if (pair? maybe-to+start+end) + (let-start+end (start end) xsubstring s (cdr maybe-to+start+end) + (values (car maybe-to+start+end) start end)) + (let ((slen (string-length s))) + (values (+ from slen) 0 slen))) + (let ((slen (- end start)) + (anslen (- to from))) + (cond ((< anslen 0) + (error "Illegal FROM/TO spec passed to xsubstring -- FROM > TO." + s from to start end)) + + ((zero? anslen) "") + ((zero? slen) (error "Empty (sub)string passed to xsubstring" + s from to start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (make-string anslen (string-ref s start))) + + ;; Selected text falls entirely within one span. + ((= (floor (/ from slen)) (floor (/ to slen))) + (substringx s (+ start (modulo from slen)) + (+ start (modulo to slen)))) + + ;; Selected text requires multiple spans. + (else (let ((ans (make-string anslen))) + (multispan-repcopy! ans 0 s from to start end) + ans)))))) + + +;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Exactly the same as xsubstring, but the extracted text is written +;;; into the string TARGET starting at index TSTART. +;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy +;;; a string on top of itself. + +(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) + (receive (sto start end) + (if (pair? maybe-sto+start+end) + (let-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) + (values (car maybe-sto+start+end) start end)) + (let ((slen (string-length s))) + (values (+ sfrom slen) 0 slen))) + + (let* ((tocopy (- sto sfrom)) + (tend (+ tstart tocopy)) + (slen (- end start))) + (check-substring-spec string-xcopy! target tstart tend) + (cond ((< tocopy 0) + (error "Illegal FROM/TO spec passed to string-xcopy! -- FROM > TO." + target tstart s sfrom sto start end)) + ((zero? tocopy)) + ((zero? slen) (error "Empty (sub)string passed to string-xcopy!" + target tstart s sfrom sto start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (string-fill! target (string-ref s start) tstart tend)) + + ;; Selected text falls entirely within one span. + ((= (floor (/ sfrom slen)) (floor (/ sto slen))) + (string-copy! target tstart s + (+ start (modulo sfrom slen)) + (+ start (modulo sto slen)))) + + ;; Multi-span copy. + (else (multispan-repcopy! target tstart s sfrom sto start end)))))) + +;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! +;;; Internal -- not exported, no careful arg checking. +(define (multispan-repcopy! target tstart s sfrom sto start end) + (let* ((slen (- end start)) + (i0 (+ start (modulo sfrom slen))) + (total-chars (- sto sfrom))) + + ;; Copy the partial span @ the beginning + (string-copy! target tstart s i0 end) + + (let* ((ncopied (- end i0)) ; We've copied this many. + (nleft (- total-chars ncopied)) ; # chars left to copy. + (nspans (quotient nleft slen))) ; # whole spans to copy + + ;; Copy the whole spans in the middle. + (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. + (nspans nspans (- nspans 1))) ; # spans to copy + ((zero? nspans) + ;; Copy the partial-span @ the end & we're done. + (string-copy! target i s start (+ start (- total-chars (- i tstart))))) + + (string-copy! target i s start end))))) ; Copy a whole span. + + + +;;; (join-strings string-list [delimiter grammar]) => string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Paste strings together using the delimiter string. +;;; +;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" +;;; +;;; DELIMITER defaults to a single space " " +;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix. + +;;; (join-strings strings [delim grammar]) + +(define (join-strings strings . args) + (if (pair? strings) + (let-optionals args ((delim " ") (grammar 'infix)) + (let ((strings (reverse strings))) + (let lp ((strings (cdr strings)) + (ans (case grammar + ((infix) (list (car strings))) + ((suffix) (list (car strings) delim)) + (else (error "Illegal join-strings grammar" grammar))))) + (if (pair? strings) + (lp (cdr strings) + (cons (car strings) (cons delim ans))) + + ; All done + (apply string-append ans))))) + + "")) ; Special-cased for infix grammar. + + + +;;; MIT Scheme copyright terms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This material was developed by the Scheme project at the Massachusetts +;;; Institute of Technology, Department of Electrical Engineering and +;;; Computer Science. Permission to copy and modify this software, to +;;; redistribute either the original software or a modified version, and +;;; to use this software for any purpose is granted, subject to the +;;; following restrictions and understandings. +;;; +;;; 1. Any copy made of this software must include this copyright notice +;;; in full. +;;; +;;; 2. Users of this software agree to make their best efforts (a) to +;;; return to the MIT Scheme project any improvements or extensions that +;;; they make, so that these may be included in future releases; and (b) +;;; to inform MIT of noteworthy uses of this software. +;;; +;;; 3. All materials developed as a consequence of the use of this +;;; software shall duly acknowledge such use, in accordance with the usual +;;; standards of acknowledging credit in academic research. +;;; +;;; 4. MIT has made no warrantee or representation that the operation of +;;; this software will be error-free, and MIT is under no obligation to +;;; provide any services, by way of maintenance, update, or otherwise. +;;; +;;; 5. In conjunction with products arising from the use of this material, +;;; there shall be no use of the name of the Massachusetts Institute of +;;; Technology nor of any adaptation thereof in any advertising, +;;; promotional, or sales literature without prior written consent from +;;; MIT in each case. diff --git a/scsh/stringpack.scm b/scsh/stringpack.scm new file mode 100644 index 0000000..1c65845 --- /dev/null +++ b/scsh/stringpack.scm @@ -0,0 +1,380 @@ +(define-interface string-lib-interface + (export + ;; string-map proc s [start end] -> s + (string-map (proc ((proc (:char) :char) + :string + &opt :exact-integer :exact-integer) + :string)) + + ;; string-map! proc s [start end] -> unspecific + (string-map! (proc ((proc (:char) :values) + :string + &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-fold kons knil s [start end] -> value + ;; string-fold-right kons knil s [start end] -> value + ((string-fold string-fold-right) + (proc ((proc (:char :value) :value) + :value :string + &opt :exact-integer :exact-integer) + :value)) + + ;; string-unfold p f g seed -> string + (string-unfold (proc ((proc (:value) :boolean) + (proc (:value) :char) + (proc (:value) :value) + :value) + :string)) + +; Enough is enough. +; ;; string-unfoldn p f g seed ... -> string +; (string-unfoldn (proc ((procedure :values :boolean) +; (procedure :values :char) +; (procedure :values :values) +; &rest :value) +; :string)) + + ;; string-tabulate proc len -> string + (string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer) + :string)) + + ;; string-for-each proc s [start end] -> unspecific + ;; string-iter proc s [start end] -> unspecific + ((string-for-each string-iter) + (proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-every? pred s [start end] + ;; string-any pred s [start end] + (string-every? + (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) + :boolean)) + (string-any + (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) + :value)) + + ;; string-compare string1 string2 lt-proc eq-proc gt-proc + ;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc + ((string-compare string-compare-ci) + (proc (:string :string (proc (:exact-integer) :values) + (proc (:exact-integer) :values) + (proc (:exact-integer) :values)) + :values)) + + ;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt + ;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt + ((substring-compare substring-compare-ci) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer + (proc (:exact-integer) :values) + (proc (:exact-integer) :values) + (proc (:exact-integer) :values)) + :values)) + + ;; string< string1 string2 + ((string= string< string> string<= string>= string<> + string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>) + (proc (&rest :string) :value)) + + ;; substring< string1 start1 end1 string2 start2 end2 + ((substring= substring<> substring-ci= substring-ci<> + substring< substring> substring-ci< substring-ci> + substring<= substring>= substring-ci<= substring-ci>=) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer) + :value)) + + ;; string-upper-case? string [start end] + ;; string-lower-case? string [start end] + ((string-upper-case? string-lower-case?) + (proc (:string &opt :exact-integer :exact-integer) :boolean)) + + ;; capitalize-string string [start end] + ;; capitalize-words string [start end] + ;; string-downcase string [start end] + ;; string-upcase string [start end] + ;; capitalize-string! string [start end] + ;; capitalize-words! string [start end] + ;; string-downcase! string [start end] + ;; string-upcase! string [start end] + ((capitalize-string capitalize-words string-downcase string-upcase) + (proc (:string &opt :exact-integer :exact-integer) :string)) + ((capitalize-string! capitalize-words! string-downcase! string-upcase!) + (proc (:string &opt :exact-integer :exact-integer) :unspecific)) + + ;; string-take string nchars + ;; string-drop string nchars + ((string-take string-drop) (proc (:string :exact-integer) :string)) + + ;; string-padl string k [char start end] + ;; string-padr string k [char start end] + ((string-padl string-padr) + (proc (:string :exact-integer &opt :char :exact-integer :exact-integer) + :string)) + + ;; string-trim string [char/char-set/pred start end] + ;; string-triml string [char/char-set/pred start end] + ;; string-trimr string [char/char-set/pred start end] + ((string-trim string-triml string-trimr) + (proc (:string &opt :value :exact-integer :exact-integer) + :string)) + + ;; string-filter char/char-set/pred string [start end] + ;; string-delete char/char-set/pred string [start end] + ((string-filter string-delete) + (proc (:value :string &opt :exact-integer :exact-integer) :string)) + + ;; string-index string char/char-set/pred [start end] + ;; string-index-right string char/char-set/pred [end start] + ;; string-skip string char/char-set/pred [start end] + ;; string-skip-right string char/char-set/pred [end start] + ((string-index string-index-right string-skip string-skip-right) + (proc (:string :value &opt :exact-integer :exact-integer) + :value)) + + ;; string-prefix-count string1 string2 + ;; string-suffix-count string1 string2 + ;; string-prefix-count-ci string1 string2 + ;; string-suffix-count-ci string1 string2 + ((string-prefix-count string-prefix-count-ci + string-suffix-count string-suffix-count-ci) + (proc (:string :string) :exact-integer)) + + ;; substring-prefix-count string1 start1 end1 string2 start2 end2 + ;; substring-suffix-count string1 start1 end1 string2 start2 end2 + ;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2 + ;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2 + ((substring-prefix-count substring-prefix-count-ci + substring-suffix-count substring-suffix-count-ci) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer) + :exact-integer)) + + + ;; string-prefix? string1 string2 + ;; string-suffix? string1 string2 + ;; string-prefix-ci? string1 string2 + ;; string-suffix-ci? string1 string2 + ((string-prefix? string-prefix-ci? + string-suffix? string-suffix-ci?) + (proc (:string :string) :boolean)) + + ;; substring-prefix? string1 start1 end1 string2 start2 end2 + ;; substring-suffix? string1 start1 end1 string2 start2 end2 + ;; substring-prefix-ci? string1 start1 end1 string2 start2 end2 + ;; substring-suffix-ci? string1 start1 end1 string2 start2 end2 + ((substring-prefix? substring-prefix-ci? + substring-suffix? substring-suffix-ci?) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer) + :boolean)) + + ;; substring? pattern string [start end] + ;; substring-ci? pattern string [start end] + ((substring? substring-ci?) + (proc (:string :string &opt :exact-integer :exact-integer) + :value)) + + ;; string-fill! string char [start end] + (string-fill! (proc (:string :char &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-copy! to tstart from [fstart fend] + (string-copy! (proc (:string :exact-integer :string + &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-copy s [start end] -> string + ;; substring s start [end] -> string + (string-copy (proc (:string &opt :exact-integer :exact-integer) :string)) + (substring (proc (:string :exact-integer &opt :exact-integer) :string)) + + ;; string-reverse s [start end] + ;; string-reverse! s [start end] + (string-reverse (proc (:string &opt :exact-integer :exact-integer) :string)) + (string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific)) + + ;; reverse-list->string char-list + ;; string->list s [start end] + ;; string-concat string-list + ;; string-concat/shared string-list + ;; string-append/shared s ... + (reverse-list->string (proc (:value) :string)) + (string->list (proc (:string &opt :exact-integer :exact-integer) :value)) + ((string-concat string-concat/shared) (proc (:value) :string)) + (string-append/shared (proc (&rest :string) :string)) + + ;; xsubstring s from [to start end] + ;; string-xcopy! target tstart s from [to start end] + (xsubstring (proc (:string :exact-integer &opt + :exact-integer :exact-integer :exact-integer) + :string)) + (string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt + :exact-integer :exact-integer :exact-integer) + :unspecific)) + + ;; string-null? s + (string-null? (proc (:string) :boolean)) + + (join-strings (proc (:value &opt :string :symbol) :string)) + + ;; Here are the R4RS procs + (string? (proc (:value) :boolean)) + (make-string (proc (:exact-integer &opt :char) :string)) + (string (proc (&rest :char) :string)) + (string-length (proc (:string) :exact-integer)) + (string-ref (proc (:string :exact-integer) :char)) + (string-set! (proc (:string :exact-integer :char) :unspecific)) + + ; Not provided by string-lib. + ;((string=? string-ci=? string? string-ci>? string<=? string-ci<=? + ; string>=? string-ci>=?) (proc (:string :string) :boolean)) + + ;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!, + ;; and STRING->LIST. The string-lib types are different -- extended. + ;(substring (proc (:string :exact-integer :exact-integer) :string)) + ;(string-copy (proc (:string) :string)) + ;(string-fill! (proc (:string :char) :unspecific)) + ;(string->list (proc (:string) :value)) + + (string-append (proc (&rest :string) :string)) + (list->string (proc (:value) :string)) + )) + + +(define-interface string-lib-internals-interface + (export + (parse-final-start+end (proc ((procedure :values :values) :string :value) + (some-values :exact-integer :exact-integer))) + (parse-start+end (proc ((procedure :values :values) :string :value) + (some-values :exact-integer :exact-integer :value))) + (check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer) + :unspecific)) + (make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean)) + :vector)))) + + +(define-structures ((string-lib string-lib-interface) + (string-lib-internals string-lib-internals-interface)) + (open char-set-package + receiving + error-package + let-opt + structure-refs + scsh-utilities ; FOLD-RIGHT + scheme) + (access scheme ; Original SUBSTRING + silly) ; Primitive reverse-list->string + (files stringlib) + (optimize auto-integrate)) + + + +;;; CPP Lib +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Character->Character Partial functions + +;;; Many of these types are pretty weak, but there is no way to +;;; specify that a parameter must be a particular record type. +;;; Every little bit helps, though. + +(define-interface ccp-lib-interface + (export + ;; ccp? x -> boolean + (ccp? (proc (:value) :boolean)) + + ;; ccp-domain ccp -> char-set + (ccp-domain (proc (:value) :value)) ; Not very informative. + + ;; ccp-copy ccp -> ccp + (ccp-copy (proc (:value) :value)) + + ;; ccp= ccp1 ccp2 ... + ;; ccp<= ccp1 ccp2 ... + ((ccp= ccp<=) (proc (&rest :value) :boolean)) ; Not very informative. + + ;; ccp-fold kons knil ccp -> value + (ccp-fold (proc ((proc (:char :char :value) :value) :value :value) :value)) + + ;; ccp-for-each proc ccp + (ccp-for-each (proc ((proc (:char :char) :values)) :unspecific)) + + ;; ccp->alist ccp -> alist + (ccp->alist (proc (:value) :value)) + + ;; ccp-restrict ccp cset -> ccp + ;; ccp-restrict! ccp cset -> ccp + ((ccp-restrict ccp-restrict!) (proc (:value :value) :value)) + + ;; ccp-adjoin ccp from-char1 to-char1 ... -> ccp + ;; ccp-adjoin! ccp from-char1 to-char1 ... -> ccp + ;; ccp-delete ccp from-char1 ... -> ccp + ;; ccp-delete! ccp from-char1 ... -> ccp + ((ccp-adjoin ccp-adjoin!) (proc (:value &rest :char) :value)) + ((ccp-delete ccp-delete!) (proc (:value &rest :char) :value)) + + ;; ccp-extend ccp1 ... -> ccp + ;; ccp-extend! ccp1 ... -> ccp + ((ccp-extend ccp-extend!) (proc (&rest :value) :value)) + + ;; ccp-compose ccp1 ... -> ccp + (ccp-compose (proc (&rest :value) :value)) + + ;; alist->ccp char/char-alist [ccp] -> ccp + ;; alist->ccp! char/char-alist [ccp] -> ccp + ((alist->ccp alist->ccp!) (proc (:value &opt :value) :value)) + + ;; proc->ccp proc [domain ccp] -> ccp + ;; proc->ccp! proc [domain ccp] -> ccp + ((proc->ccp proc->ccp!) (proc ((proc (:char) :char) &opt :value :value) + :value)) + + ;; constant-ccp char [domain ccp] -> ccp + ;; constant-ccp! char domain ccp -> ccp + ((constant-ccp constant-ccp!) (proc (:char &opt :value :value) :value)) + + ;; ccp/mappings from1 to1 ... -> ccp + ;; extend-ccp/mappings ccp from1 to1 ... -> ccp + ;; extend-ccp/mappings! ccp from1 to1 ... -> ccp + (ccp/mappings (proc (&rest :value) :value)) + ((extend-ccp/mappings extend-ccp/mappings!) + (proc (:value &rest :value) :value)) + + ;; construct-ccp ccp elt ... -> ccp + ;; construct-ccp! ccp elt ... -> ccp + ((construct-ccp construct-ccp!) (proc (:value &rest :value) :value)) + + ;; ccp-unfold p f g seed -> ccp + (ccp-unfold (proc ((proc (:value) :boolean) + (procedure :value (some-values :char :char)) + (proc (:value) :value) + :value) + :value)) + + ;; tr ccp string [start end] -> string + ;; ccp-map ccp string [start end] -> string + ;; ccp-map! ccp string [start end] + ;; ccp-app ccp char -> char or false + ((tr ccp-map) + (proc (:value :string &opt :exact-integer :exact-integer) :string)) + (ccp-map! (proc (:value :string &opt :exact-integer :exact-integer) :unspecific)) + (ccp-app (proc (:value :char) :value)) + + ;; Primitive CCP's. + ccp:0 ccp:1 ccp:upcase ccp:downcase + )) + +(define-structure ccp-lib ccp-lib-interface + (open char-set-package + ascii + defrec-package + string-lib + let-opt + receiving + scsh-utilities ; every + error-package + scheme) + (files ccp) + (optimize auto-integrate)) diff --git a/scsh/syscalls.c b/scsh/syscalls.c index 5ebc7b5..d20c536 100644 --- a/scsh/syscalls.c +++ b/scsh/syscalls.c @@ -855,21 +855,6 @@ scheme_value df_scm_sort_filevec(long nargs, scheme_value *args) return SCHFALSE; } -scheme_value df_filter_stringvec(long nargs, scheme_value *args) -{ - extern char *filter_stringvec(const char *, char const ** , int *); - scheme_value ret1; - char *r1; - int r2; - - cig_check_nargs(3, nargs, "filter_stringvec"); - r1 = filter_stringvec(cig_string_body(args[2]), (char const ** )AlienVal(args[1]), &r2); - ret1 = VECTOR_REF(*args,0); - {AlienVal(CAR(ret1)) = (long) r1; CDR(ret1) = strlen_or_false(r1);} - VECTOR_REF(*args,1) = ENTER_FIXNUM(r2); - return ret1; - } - scheme_value df_scm_envvec(long nargs, scheme_value *args) { extern char** scm_envvec(int *); diff --git a/scsh/syscalls.scm b/scsh/syscalls.scm index b7c44dd..b19825c 100644 --- a/scsh/syscalls.scm +++ b/scsh/syscalls.scm @@ -850,23 +850,26 @@ ;;; I do this one in C, I'm not sure why: ;;; It is used by MATCH-FILES. -(define-foreign %filter-C-strings! - (filter_stringvec (string pattern) ((C "char const ** ~a") cvec)) - static-string ; error message -- #f if no error. - integer) ; number of files that pass the filter. +;;; 99/7: No one is using this function, so I'm commenting it out. +;;; Later, we could tune up the globber or regexp file-matcher to use +;;; it (but should shift it into the rx directory). But I should also go +;;; to a file-at-a-time cursor model for directory fetching. -Olin +;(define-foreign %filter-C-strings! +; (filter_stringvec (string-desc pattern) ((C "char const ** ~a") cvec)) +; integer) ; number of files that pass the filter. -(define (match-files regexp . maybe-dir) - (let ((dir (:optional maybe-dir "."))) - (check-arg string? dir match-files) - (receive (err cvec numfiles) - (%open-dir (ensure-file-name-is-nondirectory dir)) - (if err (errno-error err match-files regexp dir)) - (receive (err numfiles) (%filter-C-strings! regexp cvec) - (if err (error err match-files)) - (%sort-file-vector cvec numfiles) - (let ((files (C-string-vec->Scheme&free cvec numfiles))) - (vector->list files)))))) +;(define (match-files regexp . maybe-dir) +; (let ((dir (:optional maybe-dir "."))) +; (check-arg string? dir match-files) +; (receive (err cvec numfiles) +; (%open-dir (ensure-file-name-is-nondirectory dir)) +; (if err (errno-error err match-files regexp dir)) +; (receive (numfiles) (%filter-C-strings! regexp cvec) +; ;(if err (error err match-files)) +; (%sort-file-vector cvec numfiles) +; (let ((files (C-string-vec->Scheme&free cvec numfiles))) +; (vector->list files)))))) ;;; Environment manipulation @@ -875,7 +878,7 @@ ;;; (var . val) / "var=val" rep conversion: (define (split-env-string var=val) - (let ((i (index var=val #\=))) + (let ((i (string-index var=val #\=))) (if i (values (substring var=val 0 i) (substring var=val (+ i 1) (string-length var=val))) (error "No \"=\" in environment string" var=val)))) diff --git a/scsh/utilities.scm b/scsh/utilities.scm index ea65614..b8fb965 100644 --- a/scsh/utilities.scm +++ b/scsh/utilities.scm @@ -16,31 +16,19 @@ (define (delete pred lis) (filter (lambda (x) (not (pred x))) lis)) -(define (index str c . maybe-start) - (let ((start (max 0 (:optional maybe-start 0))) - (len (string-length str))) - (do ((i start (+ 1 i))) - ((or (>= i len) - (char=? c (string-ref str i))) - (and (< i len) i))))) +(define (fold kons knil lis) + (let lp ((lis lis) (ans knil)) + (if (pair? lis) + (lp (cdr lis) (kons (car lis) ans)) + ans))) -(define (rindex str c . maybe-start) - (let* ((len (string-length str)) - (start (min (:optional maybe-start len) - len))) - (do ((i (- start 1) (- i 1))) - ((or (< i 0) - (char=? c (string-ref str i))) - (and (>= i 0) i))))) +(define (fold-right kons knil lis) + (let recur ((lis lis)) + (if (pair? lis) + (let ((head (car lis))) ; Won't need LIS after RECUR call. + (kons head (recur (cdr lis)))) + knil))) -;;; (f (f (f zero x1) x2) x3) -;;; [Richard's does (f x3 (f x2 (f x1 zero))) -(define (reduce f zero l) - (letrec ((lp (lambda (val rest) - (if (pair? rest) (lp (f val (car rest)) (cdr rest)) - val)))) - (lp zero l))) - (define (filter pred list) (letrec ((filter (lambda (list) (if (pair? list) @@ -62,8 +50,6 @@ (lp (cdr list)))))))) (lp list))) -(define any first) - ;;; Returns the first true value produced by PRED, not the list element ;;; that satisfied PRED. @@ -74,26 +60,27 @@ (lp (cdr list))))))) (lp list))) -(define any? first?) +(define any first?) + +(define (every pred list) + (or (not (pair? list)) + (let lp ((head (car list)) (tail (cdr list))) + (if (pair? tail) + (and (pred head) (lp (car tail) (cdr tail))) + (pred head))))) ; Tail-call the last PRED call. -(define (every? pred list) - (letrec ((lp (lambda (list) - (or (not (pair? list)) - (and (pred (car list)) - (lp (cdr list))))))) - (lp list))) (define (mapv f v) (let* ((len (vector-length v)) (ans (make-vector len))) (do ((i 0 (+ i 1))) - ((= i len) ans) + ((>= i len) ans) (vector-set! ans i (f (vector-ref v i)))))) (define (mapv! f v) (let ((len (vector-length v))) (do ((i 0 (+ i 1))) - ((= i len) v) + ((>= i len) v) (vector-set! v i (f (vector-ref v i)))))) (define (vector-every? pred v) @@ -115,9 +102,42 @@ ((< i 0) v) (vector-set! v i (init i))))) +(define (vector-append . vecs) + (let* ((vlen (fold (lambda (v len) (+ (vector-length v) len)) 0 vecs)) + (ans (make-vector vlen))) + (let lp1 ((vecs vecs) (to 0)) + (if (pair? vecs) + (let* ((vec (car vecs)) + (len (vector-length vec))) + (let lp2 ((from 0) (to to)) + (cond ((< from len) + (vector-set! ans to (vector-ref vec from)) + (lp2 (+ from 1) (+ to 1))) + (else (lp1 (cdr vecs) to))))))) + ans)) + + +(define (vfold kons knil v) + (let ((len (vector-length v))) + (do ((i 0 (+ i 1)) + (ans knil (kons (vector-ref v i) ans))) + ((>= i len) ans)))) + +(define (vfold-right kons knil v) + (do ((i (- (vector-length v) 1) (- i 1)) + (ans knil (kons (vector-ref v i) ans))) + ((< i 0) ans))) + + +;;; We loophole the call to ERROR -- the point is that perhaps the +;;; user will interact with a breakpoint, and proceed with a new +;;; value, which we will then pass to a new invocation of CHECK-ARG +;;; for approval. (define (check-arg pred val caller) (if (pred val) val - (check-arg pred (error "Bad argument" val pred caller) caller))) + (check-arg pred + (loophole :value (error "Bad argument" val pred caller)) + caller))) (define (conjoin f g) (lambda args (and (apply f args) (apply g args)))) @@ -170,31 +190,3 @@ (let ((f (round x))) (if (inexact? f) (inexact->exact f) f))) - -;;; Copy string SOURCE into TARGET[start,...] - -(define (string-replace! target start source) - (let ((len (string-length source))) - (do ((i (+ start len -1) (- i 1)) - (j (- len 1) (- j 1))) - ((< j 0) target) - (string-set! target i (string-ref source j))))) - - -;;; Copy SOURCE[source-start, source-end) into TARGET[start,) - -(define (substring-replace! target start source source-start source-end) - (do ((i (+ start (- source-end source-start) -1) (- i 1)) - (j (- source-end 1) (- j 1))) - ((< j source-start) target) - (string-set! target i (string-ref source j)))) - - -;;; Compute (... (f (f (f zero c0) c1) c2) ...) - -(define (string-reduce f zero s) - (let ((len (string-length s))) - (let lp ((v zero) (i 0)) - (if (= i len) - v - (lp (f v (string-ref s i)) (+ i 1))))))