From d16ad942c03b4d8607b333e5da35d5f7f571285a Mon Sep 17 00:00:00 2001 From: shivers Date: Sun, 8 Sep 1996 09:08:47 +0000 Subject: [PATCH] AWK now pre-compiles its regexps outside the main loop. --- scsh/awk.scm | 75 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/scsh/awk.scm b/scsh/awk.scm index 81c3e9d..f2d6070 100644 --- a/scsh/awk.scm +++ b/scsh/awk.scm @@ -155,6 +155,7 @@ (%after (r 'after)) (%else (r 'else)) (%+ (r '+)) + (%make-regexp (r 'make-regexp)) (gensym (let ((i 0)) (lambda (s) @@ -199,6 +200,44 @@ clauses) (r 'elss))) + ;; We compile all of the regexp patterns into regexp + ;; data structures outside the AWK loop. So we need to + ;; make a list of all the regexps that are used as tests. + (patterns (apply append + (map (lambda (clause) + (let ((test (car clause))) + (cond ((string? test) (list test)) + ((range? clause) + (let ((t1 (cadr clause)) + (t2 (caddr clause))) + (append (if (string? t1) + (list t1) + '()) + (if (string? t2) + (list t2) + '())))) + (else '())))) + clauses))) + + ;; Gratuitous optimisation: uniquify the patterns. + (patterns (let recur ((pats patterns)) + (if (pair? pats) + (let ((pat (car pats)) + (ans (recur (cdr pats)))) + (if (member pat ans) ans (cons pat ans))) + '()))) + + ;; An alist matching regexp patterns with the vars to which + ;; we will bind their compiled regexp data structure. + (pats/vars (map (lambda (p) (cons p (r (gensym "re.")))) + patterns)) + + ;; A LET-list binding the regexp vars to their compiled regexps. + (regexp-inits (map (lambda (p/v) + `(,(cdr p/v) (,%make-regexp ,(car p/v)))) + pats/vars)) + + ;; Make a list of state vars for the range clauses. ;; For each range clause, we need a boolean var to track ;; whether or not the range is activated. @@ -244,7 +283,7 @@ (loop-body (awk-loop-body lp-var rec-var else-var rec-counter range-vars svars - clauses r c)) + clauses pats/vars r c)) ;; Variables that have to be updated per-iteration, as a LET list. ;; Note that we are careful not to increment the record counter @@ -260,7 +299,8 @@ . ,(deblock loop-body r c)) loop-body))) - `(,%let ((,reader (,%lambda () ,reader-exp))) + `(,%let ((,reader (,%lambda () ,reader-exp)) + . ,regexp-inits) (,%let ,lp-var ,loop-var-init ,(mv-let r c rec/field-vars `(,reader) `(,%if (,%eof-object? ,rec-var) ,after-exp @@ -271,7 +311,7 @@ ;;; each clause, and then jumps to the top of the loop. (define (awk-loop-body lp-var rec-var else-var rec-counter - range-vars svars clauses r c) + range-vars svars clauses pats/vars r c) (let ((clause-vars (if else-var (cons else-var svars) svars)) (loop-vars (append (if rec-counter (list rec-counter) '()) range-vars @@ -289,6 +329,7 @@ (let ((tail (expand (cdr clauses) (cdr range-vars)))) (expand-range-clause clause tail (car range-vars) rec-var else-var rec-counter svars + pats/vars r c))) ((c test %after) ; An AFTER clause. Skip it. @@ -302,7 +343,7 @@ (let ((tail (expand (cdr clauses) range-vars))) (expand-simple-clause clause tail rec-var else-var rec-counter svars - r c))))) + pats/vars r c))))) ;; No clauses -- just jump to top of loop. `(,lp-var . ,loop-vars))))) @@ -310,18 +351,22 @@ ;;; Make a Scheme expression out of a test form. ;;; Integer i => (= i ) -;;; String s => (string-match s ) +;;; String s => (regexp-exec s ) ;;; Expression e => e -(define (->simple-clause-test test-form rec-var rec-counter r) +(define (->simple-clause-test test-form rec-var rec-counter pats/vars r) (cond ((integer? test-form) `(,(r '=) ,rec-counter ,test-form)) - ((string? test-form) `(,(r 'string-match) ,test-form ,rec-var)) - (else test-form))) + ((string? test-form) + (let ((re-var (cond ((assoc test-form pats/vars) => cdr) + (else (error "Impossible AWK error -- unknown regexp" + test-form pats/vars))))) + `(,(r 'regexp-exec) ,re-var ,rec-var))) + (else test-form))) (define (expand-simple-clause clause tail rec-var else-var rec-counter svars - r c) + pats/vars r c) (let* ((%let (r 'let)) (%= (r '=)) (%string-match (r 'string-match)) @@ -329,7 +374,7 @@ (%if (r 'if)) (test (car clause)) - (test (->simple-clause-test test rec-var rec-counter r)) + (test (->simple-clause-test test rec-var rec-counter pats/vars r)) ;; Is clause of the form (test => proc) (arrow? (and (= 3 (length clause)) @@ -361,7 +406,7 @@ (define (expand-range-clause clause tail range-var rec-var else-var rec-counter svars - r c) + pats/vars r c) (let* ((start-test (cadr clause)) (stop-test (caddr clause)) (body (cdddr clause)) @@ -378,8 +423,10 @@ (else (error "Unrecognised range keyword!" clause))))) ;; Convert the start and stop test forms to code. - (start-test (->simple-clause-test start-test rec-var rec-counter r)) - (stop-test (->simple-clause-test stop-test rec-var rec-counter r)) + (start-test (->simple-clause-test start-test rec-var + rec-counter pats/vars r)) + (stop-test (->simple-clause-test stop-test rec-var + rec-counter pats/vars r)) (start-thunk `(,%lambda () ,start-test)) ; ...and thunkate them. (stop-thunk `(,%lambda () ,stop-test)) @@ -430,7 +477,7 @@ (blockify (if (pair? svars) (if else-var - (if (cdr svars) ; We've got state vars and an else var. + (if (pair? (cdr svars)) ; state vars and an else var. `((,%receive ,svars ,(blockify body r c) (,%values #f . ,svars))) `((,%values #f ,(blockify body r c)))) ; Gratuitous.