scsh-0.5/scsh/awk.scm

506 lines
18 KiB
Scheme

;;; An awk loop, after the design of David Albertz and Olin Shivers.
;;; Copyright (c) 1994 by Olin Shivers.
;;; - Requires RECEIVE from RECEIVING package.
;;; - Would require DESTRUCTURE from DESTRUCTURING package, but it appears
;;; to be broken, so we hack it w/cars and cdrs.
;;; - Requires STRING-MATCH from SCSH package.
;;; This should be hacked to convert regexp strings into regexp structures
;;; at the top of the form, and then just refer to the structs in the
;;; tests.
;;; Examples:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; ;;; Filter -- pass only lines containing my name.
;;; (awk (read-line) (line) ()
;;; ("Olin" (display line) (newline)))
;;;
;;; ;;; Count the number of non-comment lines of code in my Scheme source.
;;; (awk (read-line) (line) ((nlines 0))
;;; ("^[ \t]*;" nlines) ; A comment line.
;;; (else (+ nlines 1))) ; Not a comment line.
;;;
;;; ;;; Read numbers, counting the evens and odds.
;;; (awk (read) (val) ((evens 0) (odds 0))
;;; ((zero? val) (display "zero ") (values evens odds)) ; Tell me about
;;; ((> val 0) (display "pos ") (values evens odds)) ; sign, too.
;;; (else (display "neg ") (values evens odds))
;;;
;;; ((even? val) (values (+ evens 1) odds))
;;; (else (values evens (+ odds 1))))
;;; Syntax:
;;; (awk <reader-exp> <rec&field-vars> [<rec-counter>] <state-var-inits>
;;; <clause1>
;;; .
;;; .
;;; <clausen>)
;;; This macro is written using Clinger/Rees explicit-renaming low-level
;;; macros. So it is pretty ugly. It takes a little care to generate
;;; cosmetically attractive code, for two reasons:
;;; - It makes it easier for humans to examine the expanded code.
;;; - It helps low-tech compilers compile the code well. Some of the
;;; optimisations the expander implements would be hard for even a
;;; sophisticated compiler to perform automatically. For example, it doesn't
;;; introduce a record-counter variable unless required to do so. It's a
;;; non-trivial analysis to spot and remove an unused loop variable (I show
;;; how to do so in my dissertation; I don't know of any production
;;; compilers that do it). Same remarks apply to the variable that tracks
;;; the state bit for ELSE clauses -- we don't introduce one unless the loop
;;; actually contains ELSE clauses. The lesson here is that loop macros
;;; by definition have information about the data-flow of their bodies that
;;; compilers have to work hard to spot by analysis of their expanded forms.
;;; The macro can exploit this knowledge at the high-level.
;;;
;;; Interesting research issue: Could one design a macro system that would
;;; allow the macro to communicate this knowledge to the compiler? Could
;;; the macro's assertions be verified by the compiler, as well?
;;;
;;; In any even, there's a down-side to this cosmetic clean-up:
;;; all of this optimisation definitely makes the macro a lot more hairy
;;; than it would otherwise be. The expanded code is easier to read; the
;;; macro itself is harder to read.
;;; Simple syntax-hacking utilities.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Return a form that produces multiple values.
;;; () => (values)
;;; (v) => v
;;; (v1 v2 ...) => (values v1 v2 ...)
(define (mult-values vals rename)
(if (or (not (pair? vals)) (pair? (cdr vals)))
`(,(rename 'values) . ,vals)
(car vals)))
;;; () => ()
;;; (v1) => (v1)
;;; (v1 v2) => ((VALUES v1 v2))
;;;
;;; Return an expression list, not an expression. (Either 1 or 0 expressions.)
;;; Use this one when we don't care what happens if we are returning 0 vals.
;;; It pairs up with MV-LET below, which ignores the number of values
;;; returned to it when expecting zero values.
(define (sloppy-mult-values vals rename)
(if (and (pair? vals) (pair? (cdr vals)))
`((,(rename 'values) . ,vals))
vals))
;; DEBLOCK maps an expression to a list of expressions, flattening BEGINS.
;; (deblock '(begin (begin 3 4) 5 6 (begin 7 8))) => (3 4 5 6 7 8)
(define (deblock exp rename compare)
(let ((%block (rename 'begin)))
(let deblock1 ((exp exp))
(if (and (pair? exp)
; (name? (car exp))
(compare %block (car exp)))
(apply append (map deblock1 (cdr exp)))
(list exp)))))
;; BLOCKIFY maps an expression list to a BEGIN form, flattening nested BEGINS.
;; (blockify '( (begin 3 4) 5 (begin 6) )) => (begin 3 4 5 6)
(define (blockify exps rename compare)
(let ((new-exps (apply append
(map (lambda (exp) (deblock exp rename compare))
exps))))
(cond ((null? new-exps)
(error "Empty BEGIN" exps))
((null? (cdr new-exps)) ; (begin exp) => exp
(car new-exps))
(else `(,(rename 'begin) . ,new-exps)))))
(define (mv-let r c vars exp body)
(if (pair? vars)
(if (pair? (cdr vars))
`(,(r 'receive) ,vars ,exp . ,(deblock body r c))
`(,(r 'let) ((,(car vars) ,exp)) . ,(deblock body r c)))
(blockify (list exp body) r c)))
;;; Is X one of the keywords {range, :range, range:, :range:}?
(define (range-keyword? x rename compare)
(or (compare x (rename 'range))
(compare x (rename ':range))
(compare x (rename 'range:))
(compare x (rename ':range:))))
;;; Apply PRED to every element of VALS. Collect & return all the non-#f
;;; values produced.
(define (all-trues pred vals)
(let lp ((vals vals) (ans '()))
(if (pair? vals)
(lp (cdr vals)
(cond ((pred (car vals)) => (lambda (elt) (cons elt ans)))
(else ans)))
(reverse ans))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (expand-awk exp r c)
(let* ((%lambda (r 'lambda)) ; Bind a mess of keywords.
(%let (r 'let))
(%receive (r 'receive))
(%values (r 'values))
(%if (r 'if))
(%eof-object? (r 'eof-object?))
(%after (r 'after))
(%else (r 'else))
(%+ (r '+))
(gensym (let ((i 0))
(lambda (s)
(set! i (+ i 1))
(string->symbol (string-append s (number->string i))))))
;; Is the clause a range-test clause?
(range? (lambda (clause) (range-keyword? (car clause) r c)))
;; Make some standard vars we'll need.
(lp-var (r 'lp))
(reader (r 'read-rec))
;; If I throw in an abort-loop or abort-iteration macro,
;; I'll also need to make two vars for the continuations.
;; Rip the form apart.
(reader-exp (cadr exp))
(rec/field-vars (caddr exp))
(rec-var (car rec/field-vars)) ; The var bound to the record.
(rest (cdddr exp))) ; Stuff after the rec&field-vars.
(receive (rec-counter state-inits clauses) ; Parse out the last
(if (list? (car rest)) ; three parts of the
(values #f (car rest) (cdr rest)) ; form.
(values (car rest) (cadr rest) (cddr rest)))
;; Some analysis: what have we got?
;; Range clauses, else clauses, line num tests,...
(let* ((recnum-tests? ; Do any of the clauses test the record
(any? (lambda (clause) ; count? (I.e., any integer tests?)
(let ((test (car clause)))
(or (integer? test)
(and (range? clause)
(or (integer? (cadr clause))
(integer? (caddr clause)))))))
clauses))
;; If any ELSE clauses, bind this to the var in which we
;; will keep the else state, otherwise #f.
(else-var (and (any? (lambda (clause)
(c (car clause) %else))
clauses)
(r 'elss)))
;; Make a list of state vars for the range clauses.
;; For each range clause, we need a boolean var to track
;; whether or not the range is activated.
(range-vars (all-trues (lambda (clause)
(and (range? clause)
(r (gensym "r."))))
clauses))
(svars (map car state-inits)) ; The user's state variables.
;; If the user didn't declare a record-counter var,
;; but he is testing line numbers (with integer test forms),
;; go ahead and generate a record-counter of our own.
(rec-counter (or rec-counter
(and recnum-tests?
(r (gensym "record-count.")))))
;; Generate the loop vars & their inits.
;; These are: the record counter, the range vars,
;; and the user's state vars.
;; All of these different sets are optional.
(loop-vars (append (if rec-counter (list rec-counter) '())
range-vars
svars))
(loop-var-init-values (append (if rec-counter '(0) '())
(map (lambda (x) #f) range-vars)
(map cadr state-inits)))
;; A LET list initialising all the loop vars.
(loop-var-init (map list loop-vars loop-var-init-values))
;; Build the clause that computes the loop's return value.
;; If the user gave an AFTER clause, use its body. Otherwise,
;; it's (values ,@svars).
(after-clause? (lambda (clause) (c (car clause) %after)))
(after-exp (let ((after-clauses (filter after-clause? clauses)))
(cond ((null? after-clauses)
(mult-values svars r))
((null? (cdr after-clauses))
(blockify (cdar after-clauses) r c))
(else (error "Multiple AFTER clauses in awk body."
after-clauses exp)))))
(loop-body (awk-loop-body lp-var rec-var else-var
rec-counter range-vars svars
clauses r c))
;; Variables that have to be updated per-iteration, as a LET list.
;; Note that we are careful not to increment the record counter
;; until after we've verified the new record isn't EOF.
(per-iteration-updates
(append (if else-var `((,else-var #t)) '()) ; Else state.
(if rec-counter ; Record count.
`((,rec-counter (,%+ ,rec-counter 1)))
'())))
(loop-body (if (pair? per-iteration-updates)
`(,%let ,per-iteration-updates
. ,(deblock loop-body r c))
loop-body)))
`(,%let ((,reader (,%lambda () ,reader-exp)))
(,%let ,lp-var ,loop-var-init
,(mv-let r c rec/field-vars `(,reader)
`(,%if (,%eof-object? ,rec-var) ,after-exp
,loop-body))))))))
;;; Expand into the body of the awk loop -- the code that tests & executes
;;; each clause, and then jumps to the top of the loop.
(define (awk-loop-body lp-var rec-var else-var rec-counter
range-vars svars clauses r c)
(let ((clause-vars (if else-var (cons else-var svars) svars))
(loop-vars (append (if rec-counter (list rec-counter) '())
range-vars
svars))
(range-clause? (lambda (clause) (range-keyword? (car clause) r c)))
(%after (r 'after))
(%else (r 'else)))
(let expand ((clauses clauses) (range-vars range-vars))
(if (pair? clauses)
(let* ((clause (car clauses))
(test (car clause)))
(cond ((range-keyword? test r c)
(let ((tail (expand (cdr clauses) (cdr range-vars))))
(expand-range-clause clause tail (car range-vars)
rec-var else-var rec-counter svars
r c)))
((c test %after) ; An AFTER clause. Skip it.
(expand (cdr clauses) range-vars))
((c test %else) ; An ELSE clause.
(let ((tail (expand (cdr clauses) range-vars)))
(expand-else-clause clause tail else-var svars r c)))
(else ; A simple clause.
(let ((tail (expand (cdr clauses) range-vars)))
(expand-simple-clause clause tail
rec-var else-var rec-counter svars
r c)))))
;; No clauses -- just jump to top of loop.
`(,lp-var . ,loop-vars)))))
;;; Make a Scheme expression out of a test form.
;;; Integer i => (= i <record-counter>)
;;; String s => (string-match s <record>)
;;; Expression e => e
(define (->simple-clause-test test-form rec-var rec-counter r)
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
((string? test-form) `(,(r 'string-match) ,test-form ,rec-var))
(else test-form)))
(define (expand-simple-clause clause tail
rec-var else-var rec-counter svars
r c)
(let* ((%let (r 'let))
(%= (r '=))
(%string-match (r 'string-match))
(%arrow (r '=>))
(%if (r 'if))
(test (car clause))
(test (->simple-clause-test test rec-var rec-counter r))
;; Is clause of the form (test => proc)
(arrow? (and (= 3 (length clause))
(c (cadr clause) %arrow)))
(null-clause-list (null-clause-action else-var svars r))
;; The core form conditionally executes the body.
;; It returns the new else var and the new state vars, if any.
(core (if arrow?
(let* ((tv (r 'tval)) ; APP is the actual
(app `(,(caddr clause) ,tv))) ; body: (proc tv).
`(,%let ((,tv ,test))
(,%if ,tv
,(clause-action (list app) else-var svars r c)
. ,null-clause-list)))
`(,%if ,test ,(clause-action (cdr clause) else-var svars r c)
. ,null-clause-list)))
(loop-vars (if else-var (cons else-var svars) svars)))
;; Do the core computation, update the iteration vars,
;; and then do the tail in the scope of the updated environment.
(core-then-tail loop-vars core tail r c)))
(define (core-then-tail loop-vars core tail r c)
(mv-let r c loop-vars core tail))
(define (expand-range-clause clause tail range-var
rec-var else-var rec-counter svars
r c)
(let* ((start-test (cadr clause))
(stop-test (caddr clause))
(body (cdddr clause))
(%receive (r 'receive))
(%if (r 'if))
(%lambda (r 'lambda))
(keyword (car clause)) ; range or :range or range: or :range:
(tester (r (cond ((c keyword (r 'range)) 'next-range)
((c keyword (r ':range)) 'next-:range)
((c keyword (r 'range:)) 'next-range:)
((c keyword (r ':range:)) 'next-:range:)
(else (error "Unrecognised range keyword!" clause)))))
;; Convert the start and stop test forms to code.
(start-test (->simple-clause-test start-test rec-var rec-counter r))
(stop-test (->simple-clause-test stop-test rec-var rec-counter r))
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
(stop-thunk `(,%lambda () ,stop-test))
(loop-vars (if else-var (cons else-var svars) svars))
(this-rec (r 'this-record?))
(core `(,%if ,this-rec
,(clause-action body else-var svars r c)
. ,(null-clause-action else-var svars r))))
`(,%receive (,this-rec ,range-var)
(,tester ,start-thunk ,stop-thunk ,range-var)
,(core-then-tail loop-vars core tail r c))))
(define (expand-else-clause clause tail else-var svars r c)
(let* ((body (cdr clause))
(tail-exps (deblock tail r c))
(%if (r 'if))
(%receive (r 'receive))
(%let (r 'let))
;; We are hard-wiring the else var to #t after this, so the core
;; expression doesn't need to return it -- just the new values
;; of the user's state vars.
(core `(,%if ,else-var
,(clause-action body #f svars r c)
. ,(sloppy-mult-values svars r))))
(mv-let r c svars core `(,%let ((,else-var #t)) . ,tail-exps))))
;;; BODY is a list of expressions from a loop clause. We want to evaluate it,
;;; under some conditions.
;;; - The body evaluates to multiple values, one for each state variable.
;;; However, if there are no state variables, we want to *ignore* the
;;; values produced by the body, and explicitly return 0 values,
;;; not blow up if the body should happen not to return exactly zero values.
;;; - If we are tracking an else-variable, then the body firing will turn
;;; it off by returning its new #f value.
(define (clause-action body else-var svars r c)
(let ((%values (r 'values))
(%receive (r 'receive)))
(blockify (if (pair? svars)
(if else-var
(if (cdr svars) ; We've got state vars and an else var.
`((,%receive ,svars ,(blockify body r c)
(,%values #f . ,svars)))
`((,%values #f ,(blockify body r c)))) ; Gratuitous.
body) ; State vars, but no else var.
;; No state vars -- ignore value computed by BODY forms.
`(,@body . ,(if else-var '(#f) `())))
r c)))
;;; The clause didn't execute. Return the svars unchanged, and also
;;; return the current else-value if we are tracking one. We return
;;; a 0 or 1 element expression list -- if no values are being expected
;;; this returns the empty list.
(define (null-clause-action else-var svars r)
(sloppy-mult-values (if else-var (cons else-var svars) svars)
r))
;;; These procs are for handling RANGE clauses.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; First return value tells whether this line is active;
;;; next value tells whether region is active after this line.
;;;
;;; (:range 0 4) = 0 1 2 3 This is the most useful one.
;;; (range: 0 4) = 1 2 3 4
;;; (range 0 4) = 1 2 3
;;; (:range: 0 4) = 0 1 2 3 4
;;; If these were inlined and the test thunks substituted, it would
;;; be acceptably efficient. But who writes Scheme compilers that good
;;; in the 90's?
(define (next-:range start-test stop-test state)
(let ((new-state (if state
(or (not (stop-test)) ; Stop,
(start-test)) ; but restart.
(and (start-test) ; Start,
(not (stop-test)))))) ; but stop, too.
(values new-state new-state)))
(define (next-range: start-test stop-test state)
(values state
(if state
(or (not (stop-test)) ; Stop,
(start-test)) ; but restart.
(and (start-test) ; Start,
(not (stop-test)))))) ; but stop, too.
(define (next-range start-test stop-test state)
(if state
(let ((not-stop (not (stop-test))))
(values not-stop
(or not-stop ; Stop,
(start-test)))) ; but restart.
(values #f
(and (start-test) ; Start,
(not (stop-test)))))) ; but stop, too.
(define (next-:range: start-test stop-test state)
(if state
(values #t
(or (not (stop-test)) ; Stop
(start-test))) ; but restart.
(let ((start? (start-test)))
(values start?
(and start? ; Start,
(not (stop-test))))))) ; but stop, too.