AWK now pre-compiles its regexps outside the main loop.
This commit is contained in:
parent
9610aeab66
commit
d16ad942c0
73
scsh/awk.scm
73
scsh/awk.scm
|
@ -155,6 +155,7 @@
|
|||
(%after (r 'after))
|
||||
(%else (r 'else))
|
||||
(%+ (r '+))
|
||||
(%make-regexp (r 'make-regexp))
|
||||
|
||||
(gensym (let ((i 0))
|
||||
(lambda (s)
|
||||
|
@ -199,6 +200,44 @@
|
|||
clauses)
|
||||
(r 'elss)))
|
||||
|
||||
;; We compile all of the regexp patterns into regexp
|
||||
;; data structures outside the AWK loop. So we need to
|
||||
;; make a list of all the regexps that are used as tests.
|
||||
(patterns (apply append
|
||||
(map (lambda (clause)
|
||||
(let ((test (car clause)))
|
||||
(cond ((string? test) (list test))
|
||||
((range? clause)
|
||||
(let ((t1 (cadr clause))
|
||||
(t2 (caddr clause)))
|
||||
(append (if (string? t1)
|
||||
(list t1)
|
||||
'())
|
||||
(if (string? t2)
|
||||
(list t2)
|
||||
'()))))
|
||||
(else '()))))
|
||||
clauses)))
|
||||
|
||||
;; Gratuitous optimisation: uniquify the patterns.
|
||||
(patterns (let recur ((pats patterns))
|
||||
(if (pair? pats)
|
||||
(let ((pat (car pats))
|
||||
(ans (recur (cdr pats))))
|
||||
(if (member pat ans) ans (cons pat ans)))
|
||||
'())))
|
||||
|
||||
;; An alist matching regexp patterns with the vars to which
|
||||
;; we will bind their compiled regexp data structure.
|
||||
(pats/vars (map (lambda (p) (cons p (r (gensym "re."))))
|
||||
patterns))
|
||||
|
||||
;; A LET-list binding the regexp vars to their compiled regexps.
|
||||
(regexp-inits (map (lambda (p/v)
|
||||
`(,(cdr p/v) (,%make-regexp ,(car p/v))))
|
||||
pats/vars))
|
||||
|
||||
|
||||
;; Make a list of state vars for the range clauses.
|
||||
;; For each range clause, we need a boolean var to track
|
||||
;; whether or not the range is activated.
|
||||
|
@ -244,7 +283,7 @@
|
|||
|
||||
(loop-body (awk-loop-body lp-var rec-var else-var
|
||||
rec-counter range-vars svars
|
||||
clauses r c))
|
||||
clauses pats/vars r c))
|
||||
|
||||
;; Variables that have to be updated per-iteration, as a LET list.
|
||||
;; Note that we are careful not to increment the record counter
|
||||
|
@ -260,7 +299,8 @@
|
|||
. ,(deblock loop-body r c))
|
||||
loop-body)))
|
||||
|
||||
`(,%let ((,reader (,%lambda () ,reader-exp)))
|
||||
`(,%let ((,reader (,%lambda () ,reader-exp))
|
||||
. ,regexp-inits)
|
||||
(,%let ,lp-var ,loop-var-init
|
||||
,(mv-let r c rec/field-vars `(,reader)
|
||||
`(,%if (,%eof-object? ,rec-var) ,after-exp
|
||||
|
@ -271,7 +311,7 @@
|
|||
;;; each clause, and then jumps to the top of the loop.
|
||||
|
||||
(define (awk-loop-body lp-var rec-var else-var rec-counter
|
||||
range-vars svars clauses r c)
|
||||
range-vars svars clauses pats/vars r c)
|
||||
(let ((clause-vars (if else-var (cons else-var svars) svars))
|
||||
(loop-vars (append (if rec-counter (list rec-counter) '())
|
||||
range-vars
|
||||
|
@ -289,6 +329,7 @@
|
|||
(let ((tail (expand (cdr clauses) (cdr range-vars))))
|
||||
(expand-range-clause clause tail (car range-vars)
|
||||
rec-var else-var rec-counter svars
|
||||
pats/vars
|
||||
r c)))
|
||||
|
||||
((c test %after) ; An AFTER clause. Skip it.
|
||||
|
@ -302,7 +343,7 @@
|
|||
(let ((tail (expand (cdr clauses) range-vars)))
|
||||
(expand-simple-clause clause tail
|
||||
rec-var else-var rec-counter svars
|
||||
r c)))))
|
||||
pats/vars r c)))))
|
||||
|
||||
;; No clauses -- just jump to top of loop.
|
||||
`(,lp-var . ,loop-vars)))))
|
||||
|
@ -310,18 +351,22 @@
|
|||
|
||||
;;; Make a Scheme expression out of a test form.
|
||||
;;; Integer i => (= i <record-counter>)
|
||||
;;; String s => (string-match s <record>)
|
||||
;;; String s => (regexp-exec s <record>)
|
||||
;;; Expression e => e
|
||||
|
||||
(define (->simple-clause-test test-form rec-var rec-counter r)
|
||||
(define (->simple-clause-test test-form rec-var rec-counter pats/vars r)
|
||||
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
|
||||
((string? test-form) `(,(r 'string-match) ,test-form ,rec-var))
|
||||
((string? test-form)
|
||||
(let ((re-var (cond ((assoc test-form pats/vars) => cdr)
|
||||
(else (error "Impossible AWK error -- unknown regexp"
|
||||
test-form pats/vars)))))
|
||||
`(,(r 'regexp-exec) ,re-var ,rec-var)))
|
||||
(else test-form)))
|
||||
|
||||
|
||||
(define (expand-simple-clause clause tail
|
||||
rec-var else-var rec-counter svars
|
||||
r c)
|
||||
pats/vars r c)
|
||||
(let* ((%let (r 'let))
|
||||
(%= (r '=))
|
||||
(%string-match (r 'string-match))
|
||||
|
@ -329,7 +374,7 @@
|
|||
(%if (r 'if))
|
||||
|
||||
(test (car clause))
|
||||
(test (->simple-clause-test test rec-var rec-counter r))
|
||||
(test (->simple-clause-test test rec-var rec-counter pats/vars r))
|
||||
|
||||
;; Is clause of the form (test => proc)
|
||||
(arrow? (and (= 3 (length clause))
|
||||
|
@ -361,7 +406,7 @@
|
|||
|
||||
(define (expand-range-clause clause tail range-var
|
||||
rec-var else-var rec-counter svars
|
||||
r c)
|
||||
pats/vars r c)
|
||||
(let* ((start-test (cadr clause))
|
||||
(stop-test (caddr clause))
|
||||
(body (cdddr clause))
|
||||
|
@ -378,8 +423,10 @@
|
|||
(else (error "Unrecognised range keyword!" clause)))))
|
||||
|
||||
;; Convert the start and stop test forms to code.
|
||||
(start-test (->simple-clause-test start-test rec-var rec-counter r))
|
||||
(stop-test (->simple-clause-test stop-test rec-var rec-counter r))
|
||||
(start-test (->simple-clause-test start-test rec-var
|
||||
rec-counter pats/vars r))
|
||||
(stop-test (->simple-clause-test stop-test rec-var
|
||||
rec-counter pats/vars r))
|
||||
|
||||
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
|
||||
(stop-thunk `(,%lambda () ,stop-test))
|
||||
|
@ -430,7 +477,7 @@
|
|||
(blockify (if (pair? svars)
|
||||
|
||||
(if else-var
|
||||
(if (cdr svars) ; We've got state vars and an else var.
|
||||
(if (pair? (cdr svars)) ; state vars and an else var.
|
||||
`((,%receive ,svars ,(blockify body r c)
|
||||
(,%values #f . ,svars)))
|
||||
`((,%values #f ,(blockify body r c)))) ; Gratuitous.
|
||||
|
|
Loading…
Reference in New Issue