AWK now pre-compiles its regexps outside the main loop.

This commit is contained in:
shivers 1996-09-08 09:08:47 +00:00
parent 9610aeab66
commit d16ad942c0
1 changed files with 61 additions and 14 deletions

View File

@ -155,6 +155,7 @@
(%after (r 'after))
(%else (r 'else))
(%+ (r '+))
(%make-regexp (r 'make-regexp))
(gensym (let ((i 0))
(lambda (s)
@ -199,6 +200,44 @@
clauses)
(r 'elss)))
;; We compile all of the regexp patterns into regexp
;; data structures outside the AWK loop. So we need to
;; make a list of all the regexps that are used as tests.
(patterns (apply append
(map (lambda (clause)
(let ((test (car clause)))
(cond ((string? test) (list test))
((range? clause)
(let ((t1 (cadr clause))
(t2 (caddr clause)))
(append (if (string? t1)
(list t1)
'())
(if (string? t2)
(list t2)
'()))))
(else '()))))
clauses)))
;; Gratuitous optimisation: uniquify the patterns.
(patterns (let recur ((pats patterns))
(if (pair? pats)
(let ((pat (car pats))
(ans (recur (cdr pats))))
(if (member pat ans) ans (cons pat ans)))
'())))
;; An alist matching regexp patterns with the vars to which
;; we will bind their compiled regexp data structure.
(pats/vars (map (lambda (p) (cons p (r (gensym "re."))))
patterns))
;; A LET-list binding the regexp vars to their compiled regexps.
(regexp-inits (map (lambda (p/v)
`(,(cdr p/v) (,%make-regexp ,(car p/v))))
pats/vars))
;; Make a list of state vars for the range clauses.
;; For each range clause, we need a boolean var to track
;; whether or not the range is activated.
@ -244,7 +283,7 @@
(loop-body (awk-loop-body lp-var rec-var else-var
rec-counter range-vars svars
clauses r c))
clauses pats/vars r c))
;; Variables that have to be updated per-iteration, as a LET list.
;; Note that we are careful not to increment the record counter
@ -260,7 +299,8 @@
. ,(deblock loop-body r c))
loop-body)))
`(,%let ((,reader (,%lambda () ,reader-exp)))
`(,%let ((,reader (,%lambda () ,reader-exp))
. ,regexp-inits)
(,%let ,lp-var ,loop-var-init
,(mv-let r c rec/field-vars `(,reader)
`(,%if (,%eof-object? ,rec-var) ,after-exp
@ -271,7 +311,7 @@
;;; each clause, and then jumps to the top of the loop.
(define (awk-loop-body lp-var rec-var else-var rec-counter
range-vars svars clauses r c)
range-vars svars clauses pats/vars r c)
(let ((clause-vars (if else-var (cons else-var svars) svars))
(loop-vars (append (if rec-counter (list rec-counter) '())
range-vars
@ -289,6 +329,7 @@
(let ((tail (expand (cdr clauses) (cdr range-vars))))
(expand-range-clause clause tail (car range-vars)
rec-var else-var rec-counter svars
pats/vars
r c)))
((c test %after) ; An AFTER clause. Skip it.
@ -302,7 +343,7 @@
(let ((tail (expand (cdr clauses) range-vars)))
(expand-simple-clause clause tail
rec-var else-var rec-counter svars
r c)))))
pats/vars r c)))))
;; No clauses -- just jump to top of loop.
`(,lp-var . ,loop-vars)))))
@ -310,18 +351,22 @@
;;; Make a Scheme expression out of a test form.
;;; Integer i => (= i <record-counter>)
;;; String s => (string-match s <record>)
;;; String s => (regexp-exec s <record>)
;;; Expression e => e
(define (->simple-clause-test test-form rec-var rec-counter r)
(define (->simple-clause-test test-form rec-var rec-counter pats/vars r)
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
((string? test-form) `(,(r 'string-match) ,test-form ,rec-var))
((string? test-form)
(let ((re-var (cond ((assoc test-form pats/vars) => cdr)
(else (error "Impossible AWK error -- unknown regexp"
test-form pats/vars)))))
`(,(r 'regexp-exec) ,re-var ,rec-var)))
(else test-form)))
(define (expand-simple-clause clause tail
rec-var else-var rec-counter svars
r c)
pats/vars r c)
(let* ((%let (r 'let))
(%= (r '=))
(%string-match (r 'string-match))
@ -329,7 +374,7 @@
(%if (r 'if))
(test (car clause))
(test (->simple-clause-test test rec-var rec-counter r))
(test (->simple-clause-test test rec-var rec-counter pats/vars r))
;; Is clause of the form (test => proc)
(arrow? (and (= 3 (length clause))
@ -361,7 +406,7 @@
(define (expand-range-clause clause tail range-var
rec-var else-var rec-counter svars
r c)
pats/vars r c)
(let* ((start-test (cadr clause))
(stop-test (caddr clause))
(body (cdddr clause))
@ -378,8 +423,10 @@
(else (error "Unrecognised range keyword!" clause)))))
;; Convert the start and stop test forms to code.
(start-test (->simple-clause-test start-test rec-var rec-counter r))
(stop-test (->simple-clause-test stop-test rec-var rec-counter r))
(start-test (->simple-clause-test start-test rec-var
rec-counter pats/vars r))
(stop-test (->simple-clause-test stop-test rec-var
rec-counter pats/vars r))
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
(stop-thunk `(,%lambda () ,stop-test))
@ -430,7 +477,7 @@
(blockify (if (pair? svars)
(if else-var
(if (cdr svars) ; We've got state vars and an else var.
(if (pair? (cdr svars)) ; state vars and an else var.
`((,%receive ,svars ,(blockify body r c)
(,%values #f . ,svars)))
`((,%values #f ,(blockify body r c)))) ; Gratuitous.