549 lines
19 KiB
Scheme
549 lines
19 KiB
Scheme
;;; An awk loop, after the design of David Albertz and Olin Shivers.
|
|
;;; Copyright (c) 1994 by Olin Shivers.
|
|
|
|
;;; - Requires RECEIVE from RECEIVING package.
|
|
;;; - Would require DESTRUCTURE from DESTRUCTURING package, but it appears
|
|
;;; to be broken, so we hack it w/cars and cdrs.
|
|
;;; - Requires STRING-MATCH from SCSH package.
|
|
|
|
;;; Examples:
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;
|
|
;;; ;;; Filter -- pass only lines containing my name.
|
|
;;; (awk (read-line) (line) ()
|
|
;;; ("Olin" (display line) (newline)))
|
|
;;;
|
|
;;; ;;; Count the number of non-comment lines of code in my Scheme source.
|
|
;;; (awk (read-line) (line) ((nlines 0))
|
|
;;; ("^[ \t]*;" nlines) ; A comment line.
|
|
;;; (else (+ nlines 1))) ; Not a comment line.
|
|
;;;
|
|
;;; ;;; Read numbers, counting the evens and odds.
|
|
;;; (awk (read) (val) ((evens 0) (odds 0))
|
|
;;; ((zero? val) (display "zero ") (values evens odds)) ; Tell me about
|
|
;;; ((> val 0) (display "pos ") (values evens odds)) ; sign, too.
|
|
;;; (else (display "neg ") (values evens odds))
|
|
;;;
|
|
;;; ((even? val) (values (+ evens 1) odds))
|
|
;;; (else (values evens (+ odds 1))))
|
|
|
|
;;; Syntax:
|
|
;;; (awk <reader-exp> <rec&field-vars> [<rec-counter>] <state-var-inits>
|
|
;;; <clause1>
|
|
;;; .
|
|
;;; .
|
|
;;; <clausen>)
|
|
|
|
;;; This macro is written using Clinger/Rees explicit-renaming low-level
|
|
;;; macros. So it is pretty ugly. It takes a little care to generate
|
|
;;; cosmetically attractive code, for two reasons:
|
|
;;; - It makes it easier for humans to examine the expanded code.
|
|
;;; - It helps low-tech compilers compile the code well. Some of the
|
|
;;; optimisations the expander implements would be hard for even a
|
|
;;; sophisticated compiler to perform automatically. For example, it doesn't
|
|
;;; introduce a record-counter variable unless required to do so. It's a
|
|
;;; non-trivial analysis to spot and remove an unused loop variable (I show
|
|
;;; how to do so in my dissertation; I don't know of any production
|
|
;;; compilers that do it). Same remarks apply to the variable that tracks
|
|
;;; the state bit for ELSE clauses -- we don't introduce one unless the loop
|
|
;;; actually contains ELSE clauses. The lesson here is that loop macros
|
|
;;; by definition have information about the data-flow of their bodies that
|
|
;;; compilers have to work hard to spot by analysis of their expanded forms.
|
|
;;; The macro can exploit this knowledge at the high-level.
|
|
;;;
|
|
;;; Interesting research issue: Could one design a macro system that would
|
|
;;; allow the macro to communicate this knowledge to the compiler? Could
|
|
;;; the macro's assertions be verified by the compiler, as well?
|
|
;;;
|
|
;;; In any even, there's a down-side to this cosmetic clean-up:
|
|
;;; all of this optimisation definitely makes the macro a lot more hairy
|
|
;;; than it would otherwise be. The expanded code is easier to read; the
|
|
;;; macro itself is harder to read.
|
|
|
|
|
|
;;; Simple syntax-hacking utilities.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Return a form that produces multiple values.
|
|
;;; () => (values)
|
|
;;; (v) => v
|
|
;;; (v1 v2 ...) => (values v1 v2 ...)
|
|
|
|
(define (mult-values vals rename)
|
|
(if (or (not (pair? vals)) (pair? (cdr vals)))
|
|
`(,(rename 'values) . ,vals)
|
|
(car vals)))
|
|
|
|
;;; () => ()
|
|
;;; (v1) => (v1)
|
|
;;; (v1 v2) => ((VALUES v1 v2))
|
|
;;;
|
|
;;; Return an expression list, not an expression. (Either 1 or 0 expressions.)
|
|
;;; Use this one when we don't care what happens if we are returning 0 vals.
|
|
;;; It pairs up with MV-LET below, which ignores the number of values
|
|
;;; returned to it when expecting zero values.
|
|
|
|
(define (sloppy-mult-values vals rename)
|
|
(if (and (pair? vals) (pair? (cdr vals)))
|
|
`((,(rename 'values) . ,vals))
|
|
vals))
|
|
|
|
;; DEBLOCK maps an expression to a list of expressions, flattening BEGINS.
|
|
;; (deblock '(begin (begin 3 4) 5 6 (begin 7 8))) => (3 4 5 6 7 8)
|
|
|
|
(define (deblock exp rename compare)
|
|
(let ((%block (rename 'begin)))
|
|
(let deblock1 ((exp exp))
|
|
(if (and (pair? exp)
|
|
; (name? (car exp))
|
|
(compare %block (car exp)))
|
|
(apply append (map deblock1 (cdr exp)))
|
|
(list exp)))))
|
|
|
|
;; BLOCKIFY maps an expression list to a BEGIN form, flattening nested BEGINS.
|
|
;; (blockify '( (begin 3 4) 5 (begin 6) )) => (begin 3 4 5 6)
|
|
|
|
(define (blockify exps rename compare)
|
|
(let ((new-exps (apply append
|
|
(map (lambda (exp) (deblock exp rename compare))
|
|
exps))))
|
|
(cond ((null? new-exps)
|
|
(error "Empty BEGIN" exps))
|
|
((null? (cdr new-exps)) ; (begin exp) => exp
|
|
(car new-exps))
|
|
(else `(,(rename 'begin) . ,new-exps)))))
|
|
|
|
|
|
(define (mv-let r c vars exp body)
|
|
(if (pair? vars)
|
|
(if (pair? (cdr vars))
|
|
`(,(r 'receive) ,vars ,exp . ,(deblock body r c))
|
|
`(,(r 'let) ((,(car vars) ,exp)) . ,(deblock body r c)))
|
|
(blockify (list exp body) r c)))
|
|
|
|
|
|
;;; Is X one of the keywords {range, :range, range:, :range:}?
|
|
(define (range-keyword? x rename compare)
|
|
(or (compare x (rename 'range))
|
|
(compare x (rename ':range))
|
|
(compare x (rename 'range:))
|
|
(compare x (rename ':range:))))
|
|
|
|
;;; Apply PRED to every element of VALS. Collect & return all the non-#f
|
|
;;; values produced.
|
|
(define (all-trues pred vals)
|
|
(let lp ((vals vals) (ans '()))
|
|
(if (pair? vals)
|
|
(lp (cdr vals)
|
|
(cond ((pred (car vals)) => (lambda (elt) (cons elt ans)))
|
|
(else ans)))
|
|
(reverse ans))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define (expand-awk exp r c)
|
|
(let* ((%lambda (r 'lambda)) ; Bind a mess of keywords.
|
|
(%let (r 'let))
|
|
(%receive (r 'receive))
|
|
(%values (r 'values))
|
|
(%if (r 'if))
|
|
(%eof-object? (r 'eof-object?))
|
|
(%after (r 'after))
|
|
(%else (r 'else))
|
|
(%+ (r '+))
|
|
(%make-regexp (r 'make-regexp))
|
|
|
|
(gensym (let ((i 0))
|
|
(lambda (s)
|
|
(set! i (+ i 1))
|
|
(string->symbol (string-append s (number->string i))))))
|
|
|
|
;; Is the clause a range-test clause?
|
|
(range? (lambda (clause) (range-keyword? (car clause) r c)))
|
|
|
|
;; Make some standard vars we'll need.
|
|
(lp-var (r 'lp))
|
|
(reader (r 'read-rec))
|
|
;; If I throw in an abort-loop or abort-iteration macro,
|
|
;; I'll also need to make two vars for the continuations.
|
|
|
|
;; Rip the form apart.
|
|
(reader-exp (cadr exp))
|
|
(rec/field-vars (caddr exp))
|
|
(rec-var (car rec/field-vars)) ; The var bound to the record.
|
|
(rest (cdddr exp))) ; Stuff after the rec&field-vars.
|
|
|
|
(receive (rec-counter state-inits clauses) ; Parse out the last
|
|
(if (list? (car rest)) ; three parts of the
|
|
(values #f (car rest) (cdr rest)) ; form.
|
|
(values (car rest) (cadr rest) (cddr rest)))
|
|
|
|
;; Some analysis: what have we got?
|
|
;; Range clauses, else clauses, line num tests,...
|
|
(let* ((recnum-tests? ; Do any of the clauses test the record
|
|
(any? (lambda (clause) ; count? (I.e., any integer tests?)
|
|
(let ((test (car clause)))
|
|
(or (integer? test)
|
|
(and (range? clause)
|
|
(or (integer? (cadr clause))
|
|
(integer? (caddr clause)))))))
|
|
clauses))
|
|
|
|
;; If any ELSE clauses, bind this to the var in which we
|
|
;; will keep the else state, otherwise #f.
|
|
(else-var (and (any? (lambda (clause)
|
|
(c (car clause) %else))
|
|
clauses)
|
|
(r 'else)))
|
|
|
|
;; We compile all of the regexp patterns into regexp
|
|
;; data structures outside the AWK loop. So we need to
|
|
;; make a list of all the regexps that are used as tests.
|
|
(patterns (apply append
|
|
(map (lambda (clause)
|
|
(let ((test (car clause)))
|
|
(cond ((string? test) (list test))
|
|
((range? clause)
|
|
(let ((t1 (cadr clause))
|
|
(t2 (caddr clause)))
|
|
(append (if (string? t1)
|
|
(list t1)
|
|
'())
|
|
(if (string? t2)
|
|
(list t2)
|
|
'()))))
|
|
(else '()))))
|
|
clauses)))
|
|
|
|
;; Gratuitous optimisation: uniquify the patterns.
|
|
(patterns (let recur ((pats patterns))
|
|
(if (pair? pats)
|
|
(let ((pat (car pats))
|
|
(ans (recur (cdr pats))))
|
|
(if (member pat ans) ans (cons pat ans)))
|
|
'())))
|
|
|
|
;; An alist matching regexp patterns with the vars to which
|
|
;; we will bind their compiled regexp data structure.
|
|
(pats/vars (map (lambda (p) (cons p (r (gensym "re."))))
|
|
patterns))
|
|
|
|
;; A LET-list binding the regexp vars to their compiled regexps.
|
|
(regexp-inits (map (lambda (p/v)
|
|
`(,(cdr p/v) (,%make-regexp ,(car p/v))))
|
|
pats/vars))
|
|
|
|
|
|
;; Make a list of state vars for the range clauses.
|
|
;; For each range clause, we need a boolean var to track
|
|
;; whether or not the range is activated.
|
|
(range-vars (all-trues (lambda (clause)
|
|
(and (range? clause)
|
|
(r (gensym "r."))))
|
|
clauses))
|
|
|
|
(svars (map car state-inits)) ; The user's state variables.
|
|
|
|
;; If the user didn't declare a record-counter var,
|
|
;; but he is testing line numbers (with integer test forms),
|
|
;; go ahead and generate a record-counter of our own.
|
|
(rec-counter (or rec-counter
|
|
(and recnum-tests?
|
|
(r (gensym "record-count.")))))
|
|
|
|
;; Generate the loop vars & their inits.
|
|
;; These are: the record counter, the range vars,
|
|
;; and the user's state vars.
|
|
;; All of these different sets are optional.
|
|
(loop-vars (append (if rec-counter (list rec-counter) '())
|
|
range-vars
|
|
svars))
|
|
(loop-var-init-values (append (if rec-counter '(0) '())
|
|
(map (lambda (x) #f) range-vars)
|
|
(map cadr state-inits)))
|
|
;; A LET list initialising all the loop vars.
|
|
(loop-var-init (map list loop-vars loop-var-init-values))
|
|
|
|
;; Build the clause that computes the loop's return value.
|
|
;; If the user gave an AFTER clause, use its body. Otherwise,
|
|
;; it's (values ,@svars).
|
|
(after-clause? (lambda (clause) (c (car clause) %after)))
|
|
(after-exp (let ((after-clauses (filter after-clause? clauses)))
|
|
(cond ((null? after-clauses)
|
|
(mult-values svars r))
|
|
((null? (cdr after-clauses))
|
|
(blockify (cdar after-clauses) r c))
|
|
(else (error "Multiple AFTER clauses in awk body."
|
|
after-clauses exp)))))
|
|
|
|
|
|
(loop-body (awk-loop-body lp-var rec-var else-var
|
|
rec-counter range-vars svars
|
|
clauses pats/vars r c))
|
|
|
|
;; Variables that have to be updated per-iteration, as a LET list.
|
|
;; Note that we are careful not to increment the record counter
|
|
;; until after we've verified the new record isn't EOF.
|
|
(per-iteration-updates
|
|
(append (if else-var `((,else-var #t)) '()) ; Else state.
|
|
(if rec-counter ; Record count.
|
|
`((,rec-counter (,%+ ,rec-counter 1)))
|
|
'())))
|
|
|
|
(loop-body (if (pair? per-iteration-updates)
|
|
`(,%let ,per-iteration-updates
|
|
. ,(deblock loop-body r c))
|
|
loop-body)))
|
|
|
|
`(,%let ((,reader (,%lambda () ,reader-exp))
|
|
. ,regexp-inits)
|
|
(,%let ,lp-var ,loop-var-init
|
|
,(mv-let r c rec/field-vars `(,reader)
|
|
`(,%if (,%eof-object? ,rec-var) ,after-exp
|
|
,loop-body))))))))
|
|
|
|
|
|
;;; Expand into the body of the awk loop -- the code that tests & executes
|
|
;;; each clause, and then jumps to the top of the loop.
|
|
|
|
(define (awk-loop-body lp-var rec-var else-var rec-counter
|
|
range-vars svars clauses pats/vars r c)
|
|
(let ((clause-vars (if else-var (cons else-var svars) svars))
|
|
(loop-vars (append (if rec-counter (list rec-counter) '())
|
|
range-vars
|
|
svars))
|
|
(range-clause? (lambda (clause) (range-keyword? (car clause) r c)))
|
|
|
|
(%after (r 'after))
|
|
(%else (r 'else)))
|
|
|
|
(let expand ((clauses clauses) (range-vars range-vars))
|
|
(if (pair? clauses)
|
|
(let* ((clause (car clauses))
|
|
(test (car clause)))
|
|
(cond ((range-keyword? test r c)
|
|
(let ((tail (expand (cdr clauses) (cdr range-vars))))
|
|
(expand-range-clause clause tail (car range-vars)
|
|
rec-var else-var rec-counter svars
|
|
pats/vars
|
|
r c)))
|
|
|
|
((c test %after) ; An AFTER clause. Skip it.
|
|
(expand (cdr clauses) range-vars))
|
|
|
|
((c test %else) ; An ELSE clause.
|
|
(let ((tail (expand (cdr clauses) range-vars)))
|
|
(expand-else-clause clause tail else-var svars r c)))
|
|
|
|
(else ; A simple clause.
|
|
(let ((tail (expand (cdr clauses) range-vars)))
|
|
(expand-simple-clause clause tail
|
|
rec-var else-var rec-counter svars
|
|
pats/vars r c)))))
|
|
|
|
;; No clauses -- just jump to top of loop.
|
|
`(,lp-var . ,loop-vars)))))
|
|
|
|
|
|
;;; Make a Scheme expression out of a test form.
|
|
;;; Integer i => (= i <record-counter>)
|
|
;;; String s => (regexp-exec s <record>)
|
|
;;; Expression e => e
|
|
|
|
(define (->simple-clause-test test-form rec-var rec-counter pats/vars r)
|
|
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
|
|
((string? test-form)
|
|
(let ((re-var (cond ((assoc test-form pats/vars) => cdr)
|
|
(else (error "Impossible AWK error -- unknown regexp"
|
|
test-form pats/vars)))))
|
|
`(,(r 'regexp-exec) ,re-var ,rec-var)))
|
|
(else test-form)))
|
|
|
|
|
|
(define (expand-simple-clause clause tail
|
|
rec-var else-var rec-counter svars
|
|
pats/vars r c)
|
|
(let* ((%let (r 'let))
|
|
(%= (r '=))
|
|
(%string-match (r 'string-match))
|
|
(%arrow (r '=>))
|
|
(%if (r 'if))
|
|
|
|
(test (car clause))
|
|
(test (->simple-clause-test test rec-var rec-counter pats/vars r))
|
|
|
|
;; Is clause of the form (test => proc)
|
|
(arrow? (and (= 3 (length clause))
|
|
(c (cadr clause) %arrow)))
|
|
|
|
(null-clause-list (null-clause-action else-var svars r))
|
|
|
|
;; The core form conditionally executes the body.
|
|
;; It returns the new else var and the new state vars, if any.
|
|
(core (if arrow?
|
|
(let* ((tv (r 'tval)) ; APP is the actual
|
|
(app `(,(caddr clause) ,tv))) ; body: (proc tv).
|
|
`(,%let ((,tv ,test))
|
|
(,%if ,tv
|
|
,(clause-action (list app) else-var svars r c)
|
|
. ,null-clause-list)))
|
|
|
|
`(,%if ,test ,(clause-action (cdr clause) else-var svars r c)
|
|
. ,null-clause-list)))
|
|
|
|
(loop-vars (if else-var (cons else-var svars) svars)))
|
|
|
|
;; Do the core computation, update the iteration vars,
|
|
;; and then do the tail in the scope of the updated environment.
|
|
(core-then-tail loop-vars core tail r c)))
|
|
|
|
(define (core-then-tail loop-vars core tail r c)
|
|
(mv-let r c loop-vars core tail))
|
|
|
|
(define (expand-range-clause clause tail range-var
|
|
rec-var else-var rec-counter svars
|
|
pats/vars r c)
|
|
(let* ((start-test (cadr clause))
|
|
(stop-test (caddr clause))
|
|
(body (cdddr clause))
|
|
|
|
(%receive (r 'receive))
|
|
(%if (r 'if))
|
|
(%lambda (r 'lambda))
|
|
|
|
(keyword (car clause)) ; range or :range or range: or :range:
|
|
(tester (r (cond ((c keyword (r 'range)) 'next-range)
|
|
((c keyword (r ':range)) 'next-:range)
|
|
((c keyword (r 'range:)) 'next-range:)
|
|
((c keyword (r ':range:)) 'next-:range:)
|
|
(else (error "Unrecognised range keyword!" clause)))))
|
|
|
|
;; Convert the start and stop test forms to code.
|
|
(start-test (->simple-clause-test start-test rec-var
|
|
rec-counter pats/vars r))
|
|
(stop-test (->simple-clause-test stop-test rec-var
|
|
rec-counter pats/vars r))
|
|
|
|
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
|
|
(stop-thunk `(,%lambda () ,stop-test))
|
|
|
|
(loop-vars (if else-var (cons else-var svars) svars))
|
|
(this-rec (r 'this-record?))
|
|
|
|
(core `(,%if ,this-rec
|
|
,(clause-action body else-var svars r c)
|
|
. ,(null-clause-action else-var svars r))))
|
|
|
|
`(,%receive (,this-rec ,range-var)
|
|
(,tester ,start-thunk ,stop-thunk ,range-var)
|
|
,(core-then-tail loop-vars core tail r c))))
|
|
|
|
|
|
(define (expand-else-clause clause tail else-var svars r c)
|
|
(let* ((body (cdr clause))
|
|
(tail-exps (deblock tail r c))
|
|
|
|
(%if (r 'if))
|
|
(%receive (r 'receive))
|
|
(%let (r 'let))
|
|
|
|
;; We are hard-wiring the else var to #t after this, so the core
|
|
;; expression doesn't need to return it -- just the new values
|
|
;; of the user's state vars.
|
|
(core `(,%if ,else-var
|
|
,(clause-action body #f svars r c)
|
|
. ,(sloppy-mult-values svars r))))
|
|
|
|
(mv-let r c svars core `(,%let ((,else-var #t)) . ,tail-exps))))
|
|
|
|
|
|
;;; BODY is a list of expressions from a loop clause. We want to evaluate it,
|
|
;;; under some conditions.
|
|
;;; - The body evaluates to multiple values, one for each state variable.
|
|
;;; However, if there are no state variables, we want to *ignore* the
|
|
;;; values produced by the body, and explicitly return 0 values,
|
|
;;; not blow up if the body should happen not to return exactly zero values.
|
|
;;; - If we are tracking an else-variable, then the body firing will turn
|
|
;;; it off by returning its new #f value.
|
|
|
|
(define (clause-action body else-var svars r c)
|
|
(let ((%values (r 'values))
|
|
(%receive (r 'receive)))
|
|
|
|
(blockify (if (pair? svars)
|
|
|
|
(if else-var
|
|
(if (pair? (cdr svars)) ; state vars and an else var.
|
|
`((,%receive ,svars ,(blockify body r c)
|
|
(,%values #f . ,svars)))
|
|
`((,%values #f ,(blockify body r c)))) ; Gratuitous.
|
|
body) ; State vars, but no else var.
|
|
|
|
;; No state vars -- ignore value computed by BODY forms.
|
|
`(,@body . ,(if else-var '(#f) `())))
|
|
r c)))
|
|
|
|
|
|
;;; The clause didn't execute. Return the svars unchanged, and also
|
|
;;; return the current else-value if we are tracking one. We return
|
|
;;; a 0 or 1 element expression list -- if no values are being expected
|
|
;;; this returns the empty list.
|
|
|
|
(define (null-clause-action else-var svars r)
|
|
(sloppy-mult-values (if else-var (cons else-var svars) svars)
|
|
r))
|
|
|
|
|
|
|
|
;;; These procs are for handling RANGE clauses.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; First return value tells whether this line is active;
|
|
;;; next value tells whether region is active after this line.
|
|
;;;
|
|
;;; (:range 0 4) = 0 1 2 3 This is the most useful one.
|
|
;;; (range: 0 4) = 1 2 3 4
|
|
;;; (range 0 4) = 1 2 3
|
|
;;; (:range: 0 4) = 0 1 2 3 4
|
|
|
|
;;; If these were inlined and the test thunks substituted, it would
|
|
;;; be acceptably efficient. But who writes Scheme compilers that good
|
|
;;; in the 90's?
|
|
|
|
(define (next-:range start-test stop-test state)
|
|
(let ((new-state (if state
|
|
(or (not (stop-test)) ; Stop,
|
|
(start-test)) ; but restart.
|
|
|
|
(and (start-test) ; Start,
|
|
(not (stop-test)))))) ; but stop, too.
|
|
(values new-state new-state)))
|
|
|
|
(define (next-range: start-test stop-test state)
|
|
(values state
|
|
(if state
|
|
(or (not (stop-test)) ; Stop,
|
|
(start-test)) ; but restart.
|
|
(and (start-test) ; Start,
|
|
(not (stop-test)))))) ; but stop, too.
|
|
|
|
(define (next-range start-test stop-test state)
|
|
(if state
|
|
(let ((not-stop (not (stop-test))))
|
|
(values not-stop
|
|
(or not-stop ; Stop,
|
|
(start-test)))) ; but restart.
|
|
(values #f
|
|
(and (start-test) ; Start,
|
|
(not (stop-test)))))) ; but stop, too.
|
|
|
|
(define (next-:range: start-test stop-test state)
|
|
(if state
|
|
(values #t
|
|
(or (not (stop-test)) ; Stop
|
|
(start-test))) ; but restart.
|
|
|
|
(let ((start? (start-test)))
|
|
(values start?
|
|
(and start? ; Start,
|
|
(not (stop-test))))))) ; but stop, too.
|