AWK now pre-compiles its regexps outside the main loop.
This commit is contained in:
parent
9610aeab66
commit
d16ad942c0
75
scsh/awk.scm
75
scsh/awk.scm
|
@ -155,6 +155,7 @@
|
||||||
(%after (r 'after))
|
(%after (r 'after))
|
||||||
(%else (r 'else))
|
(%else (r 'else))
|
||||||
(%+ (r '+))
|
(%+ (r '+))
|
||||||
|
(%make-regexp (r 'make-regexp))
|
||||||
|
|
||||||
(gensym (let ((i 0))
|
(gensym (let ((i 0))
|
||||||
(lambda (s)
|
(lambda (s)
|
||||||
|
@ -199,6 +200,44 @@
|
||||||
clauses)
|
clauses)
|
||||||
(r 'elss)))
|
(r 'elss)))
|
||||||
|
|
||||||
|
;; We compile all of the regexp patterns into regexp
|
||||||
|
;; data structures outside the AWK loop. So we need to
|
||||||
|
;; make a list of all the regexps that are used as tests.
|
||||||
|
(patterns (apply append
|
||||||
|
(map (lambda (clause)
|
||||||
|
(let ((test (car clause)))
|
||||||
|
(cond ((string? test) (list test))
|
||||||
|
((range? clause)
|
||||||
|
(let ((t1 (cadr clause))
|
||||||
|
(t2 (caddr clause)))
|
||||||
|
(append (if (string? t1)
|
||||||
|
(list t1)
|
||||||
|
'())
|
||||||
|
(if (string? t2)
|
||||||
|
(list t2)
|
||||||
|
'()))))
|
||||||
|
(else '()))))
|
||||||
|
clauses)))
|
||||||
|
|
||||||
|
;; Gratuitous optimisation: uniquify the patterns.
|
||||||
|
(patterns (let recur ((pats patterns))
|
||||||
|
(if (pair? pats)
|
||||||
|
(let ((pat (car pats))
|
||||||
|
(ans (recur (cdr pats))))
|
||||||
|
(if (member pat ans) ans (cons pat ans)))
|
||||||
|
'())))
|
||||||
|
|
||||||
|
;; An alist matching regexp patterns with the vars to which
|
||||||
|
;; we will bind their compiled regexp data structure.
|
||||||
|
(pats/vars (map (lambda (p) (cons p (r (gensym "re."))))
|
||||||
|
patterns))
|
||||||
|
|
||||||
|
;; A LET-list binding the regexp vars to their compiled regexps.
|
||||||
|
(regexp-inits (map (lambda (p/v)
|
||||||
|
`(,(cdr p/v) (,%make-regexp ,(car p/v))))
|
||||||
|
pats/vars))
|
||||||
|
|
||||||
|
|
||||||
;; Make a list of state vars for the range clauses.
|
;; Make a list of state vars for the range clauses.
|
||||||
;; For each range clause, we need a boolean var to track
|
;; For each range clause, we need a boolean var to track
|
||||||
;; whether or not the range is activated.
|
;; whether or not the range is activated.
|
||||||
|
@ -244,7 +283,7 @@
|
||||||
|
|
||||||
(loop-body (awk-loop-body lp-var rec-var else-var
|
(loop-body (awk-loop-body lp-var rec-var else-var
|
||||||
rec-counter range-vars svars
|
rec-counter range-vars svars
|
||||||
clauses r c))
|
clauses pats/vars r c))
|
||||||
|
|
||||||
;; Variables that have to be updated per-iteration, as a LET list.
|
;; Variables that have to be updated per-iteration, as a LET list.
|
||||||
;; Note that we are careful not to increment the record counter
|
;; Note that we are careful not to increment the record counter
|
||||||
|
@ -260,7 +299,8 @@
|
||||||
. ,(deblock loop-body r c))
|
. ,(deblock loop-body r c))
|
||||||
loop-body)))
|
loop-body)))
|
||||||
|
|
||||||
`(,%let ((,reader (,%lambda () ,reader-exp)))
|
`(,%let ((,reader (,%lambda () ,reader-exp))
|
||||||
|
. ,regexp-inits)
|
||||||
(,%let ,lp-var ,loop-var-init
|
(,%let ,lp-var ,loop-var-init
|
||||||
,(mv-let r c rec/field-vars `(,reader)
|
,(mv-let r c rec/field-vars `(,reader)
|
||||||
`(,%if (,%eof-object? ,rec-var) ,after-exp
|
`(,%if (,%eof-object? ,rec-var) ,after-exp
|
||||||
|
@ -271,7 +311,7 @@
|
||||||
;;; each clause, and then jumps to the top of the loop.
|
;;; each clause, and then jumps to the top of the loop.
|
||||||
|
|
||||||
(define (awk-loop-body lp-var rec-var else-var rec-counter
|
(define (awk-loop-body lp-var rec-var else-var rec-counter
|
||||||
range-vars svars clauses r c)
|
range-vars svars clauses pats/vars r c)
|
||||||
(let ((clause-vars (if else-var (cons else-var svars) svars))
|
(let ((clause-vars (if else-var (cons else-var svars) svars))
|
||||||
(loop-vars (append (if rec-counter (list rec-counter) '())
|
(loop-vars (append (if rec-counter (list rec-counter) '())
|
||||||
range-vars
|
range-vars
|
||||||
|
@ -289,6 +329,7 @@
|
||||||
(let ((tail (expand (cdr clauses) (cdr range-vars))))
|
(let ((tail (expand (cdr clauses) (cdr range-vars))))
|
||||||
(expand-range-clause clause tail (car range-vars)
|
(expand-range-clause clause tail (car range-vars)
|
||||||
rec-var else-var rec-counter svars
|
rec-var else-var rec-counter svars
|
||||||
|
pats/vars
|
||||||
r c)))
|
r c)))
|
||||||
|
|
||||||
((c test %after) ; An AFTER clause. Skip it.
|
((c test %after) ; An AFTER clause. Skip it.
|
||||||
|
@ -302,7 +343,7 @@
|
||||||
(let ((tail (expand (cdr clauses) range-vars)))
|
(let ((tail (expand (cdr clauses) range-vars)))
|
||||||
(expand-simple-clause clause tail
|
(expand-simple-clause clause tail
|
||||||
rec-var else-var rec-counter svars
|
rec-var else-var rec-counter svars
|
||||||
r c)))))
|
pats/vars r c)))))
|
||||||
|
|
||||||
;; No clauses -- just jump to top of loop.
|
;; No clauses -- just jump to top of loop.
|
||||||
`(,lp-var . ,loop-vars)))))
|
`(,lp-var . ,loop-vars)))))
|
||||||
|
@ -310,18 +351,22 @@
|
||||||
|
|
||||||
;;; Make a Scheme expression out of a test form.
|
;;; Make a Scheme expression out of a test form.
|
||||||
;;; Integer i => (= i <record-counter>)
|
;;; Integer i => (= i <record-counter>)
|
||||||
;;; String s => (string-match s <record>)
|
;;; String s => (regexp-exec s <record>)
|
||||||
;;; Expression e => e
|
;;; Expression e => e
|
||||||
|
|
||||||
(define (->simple-clause-test test-form rec-var rec-counter r)
|
(define (->simple-clause-test test-form rec-var rec-counter pats/vars r)
|
||||||
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
|
(cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form))
|
||||||
((string? test-form) `(,(r 'string-match) ,test-form ,rec-var))
|
((string? test-form)
|
||||||
(else test-form)))
|
(let ((re-var (cond ((assoc test-form pats/vars) => cdr)
|
||||||
|
(else (error "Impossible AWK error -- unknown regexp"
|
||||||
|
test-form pats/vars)))))
|
||||||
|
`(,(r 'regexp-exec) ,re-var ,rec-var)))
|
||||||
|
(else test-form)))
|
||||||
|
|
||||||
|
|
||||||
(define (expand-simple-clause clause tail
|
(define (expand-simple-clause clause tail
|
||||||
rec-var else-var rec-counter svars
|
rec-var else-var rec-counter svars
|
||||||
r c)
|
pats/vars r c)
|
||||||
(let* ((%let (r 'let))
|
(let* ((%let (r 'let))
|
||||||
(%= (r '=))
|
(%= (r '=))
|
||||||
(%string-match (r 'string-match))
|
(%string-match (r 'string-match))
|
||||||
|
@ -329,7 +374,7 @@
|
||||||
(%if (r 'if))
|
(%if (r 'if))
|
||||||
|
|
||||||
(test (car clause))
|
(test (car clause))
|
||||||
(test (->simple-clause-test test rec-var rec-counter r))
|
(test (->simple-clause-test test rec-var rec-counter pats/vars r))
|
||||||
|
|
||||||
;; Is clause of the form (test => proc)
|
;; Is clause of the form (test => proc)
|
||||||
(arrow? (and (= 3 (length clause))
|
(arrow? (and (= 3 (length clause))
|
||||||
|
@ -361,7 +406,7 @@
|
||||||
|
|
||||||
(define (expand-range-clause clause tail range-var
|
(define (expand-range-clause clause tail range-var
|
||||||
rec-var else-var rec-counter svars
|
rec-var else-var rec-counter svars
|
||||||
r c)
|
pats/vars r c)
|
||||||
(let* ((start-test (cadr clause))
|
(let* ((start-test (cadr clause))
|
||||||
(stop-test (caddr clause))
|
(stop-test (caddr clause))
|
||||||
(body (cdddr clause))
|
(body (cdddr clause))
|
||||||
|
@ -378,8 +423,10 @@
|
||||||
(else (error "Unrecognised range keyword!" clause)))))
|
(else (error "Unrecognised range keyword!" clause)))))
|
||||||
|
|
||||||
;; Convert the start and stop test forms to code.
|
;; Convert the start and stop test forms to code.
|
||||||
(start-test (->simple-clause-test start-test rec-var rec-counter r))
|
(start-test (->simple-clause-test start-test rec-var
|
||||||
(stop-test (->simple-clause-test stop-test rec-var rec-counter r))
|
rec-counter pats/vars r))
|
||||||
|
(stop-test (->simple-clause-test stop-test rec-var
|
||||||
|
rec-counter pats/vars r))
|
||||||
|
|
||||||
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
|
(start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them.
|
||||||
(stop-thunk `(,%lambda () ,stop-test))
|
(stop-thunk `(,%lambda () ,stop-test))
|
||||||
|
@ -430,7 +477,7 @@
|
||||||
(blockify (if (pair? svars)
|
(blockify (if (pair? svars)
|
||||||
|
|
||||||
(if else-var
|
(if else-var
|
||||||
(if (cdr svars) ; We've got state vars and an else var.
|
(if (pair? (cdr svars)) ; state vars and an else var.
|
||||||
`((,%receive ,svars ,(blockify body r c)
|
`((,%receive ,svars ,(blockify body r c)
|
||||||
(,%values #f . ,svars)))
|
(,%values #f . ,svars)))
|
||||||
`((,%values #f ,(blockify body r c)))) ; Gratuitous.
|
`((,%values #f ,(blockify body r c)))) ; Gratuitous.
|
||||||
|
|
Loading…
Reference in New Issue