scsh-0.6/scheme/bcomp/comp-exp.scm

698 lines
21 KiB
Scheme
Raw Permalink Normal View History

1999-09-14 08:45:02 -04:00
; -*- Mode: Scheme; Syntax: Scheme; Package: Scheme; -*-
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
; Compiling expressions.
; This is a two-phase compiler. The first phase does macro expansion,
; variable resolution, and instruction selection, and computes the
; size of the code vector. The second phase (assembly) creates the
; code vector, "template" (literals vector), and debugging data
; structures.
; The output of the first phase (the COMPILE- and INSTRUCTION-
; routines) and the input to the second phase (SEGMENT->TEMPLATE) is a
; "segment." A segment is a pair (size . proc) where size is the size
; of the code segment in bytes, and proc is a procedure that during
; phase 2 will store the segment's bytes into the code vector.
; Optimizations are marked with +++, and may be flushed if desired.
(define (compile-expression node depth cont)
(compile node 0 depth cont))
; Main dispatch for compiling a single expression.
(define (compile node level depth cont)
(let ((node (type-check node)))
((operator-table-ref compilators (node-operator-id node))
node
level
depth
cont)))
; Specialists
(define compilators
(make-operator-table
(lambda (node level depth cont)
(generate-trap cont
"not valid in expression context"
(schemify node)))))
(define (define-compilator name type proc)
(operator-define! compilators name type proc))
(define-compilator 'literal 'leaf
(lambda (node level depth cont)
(let ((obj (node-form node)))
(if (eq? obj #f)
;; +++ hack for bootstrap from Schemes that don't distinguish #f/()
(deliver-value (instruction (enum op false)) cont)
(compile-constant obj depth cont)))))
(define-compilator 'unspecific (proc () unspecific-type)
(lambda (node level depth cont)
(deliver-value (instruction (enum op unspecific))
cont)))
(define-compilator 'unassigned (proc () unspecific-type)
(lambda (node level depth cont)
(deliver-value (instruction (enum op unassigned))
cont)))
(define-compilator 'quote syntax-type
(lambda (node level depth cont)
(let ((exp (node-form node)))
level ;ignored
(let ((obj (cadr exp)))
(compile-constant obj depth cont)))))
(define (compile-constant obj depth cont)
(if (ignore-values-cont? cont)
empty-segment ;+++ dead code
(deliver-value (instruction-with-literal (enum op literal) obj)
cont)))
; Variable reference
(define-compilator 'name 'leaf
(lambda (node level depth cont)
(let* ((binding (name-node-binding node))
(name (node-form node)))
(deliver-value
(if (pair? binding)
(let ((back (- level (car binding)))
(over (cdr binding)))
(if (or (>= back byte-limit)
(>= over byte-limit))
(instruction (enum op big-local)
(high-byte back)
(low-byte back)
(high-byte over)
(low-byte over))
(case back
((0) (instruction (enum op local0) over)) ;+++
((1) (instruction (enum op local1) over)) ;+++
((2) (instruction (enum op local2) over)) ;+++
(else (instruction (enum op local) back over)))))
(instruction-with-location (enum op global)
binding
name
value-type))
cont))))
; Assignment
(define-compilator 'set! syntax-type
(lambda (node level depth cont)
(let* ((exp (node-form node))
(lhs-node (cadr exp))
(name (node-form lhs-node))
;; Error if not a name node...
(binding (name-node-binding lhs-node)))
(sequentially
(compile (caddr exp) level depth (named-cont name))
(deliver-value
(if (pair? binding)
(let ((back (- level (car binding)))
(over (cdr binding)))
(instruction (enum op set-local!)
(high-byte back)
(low-byte back)
(high-byte over)
(low-byte over)))
(instruction-with-location (enum op set-global!)
binding
name
usual-variable-type))
cont)))))
; Conditional
(define-compilator 'if syntax-type
(lambda (node level depth cont)
(let ((exp (node-form node))
(alt-label (make-label))
(join-label (make-label)))
(sequentially
;; Test
(compile (cadr exp) level depth (fall-through-cont node 1))
(instruction-using-label (enum op jump-if-false) alt-label)
;; Consequent
(compile (caddr exp) level depth cont)
(if (fall-through-cont? cont)
(instruction-using-label (enum op jump) join-label)
empty-segment)
;; Alternate
(attach-label alt-label
(compile (cadddr exp) level depth cont))
(attach-label join-label
empty-segment)))))
(define-compilator 'begin syntax-type
(lambda (node level depth cont)
(let ((exp-list (cdr (node-form node))))
(if (null? exp-list)
(generate-trap cont "null begin")
(let ((dummy
(make-node operator/begin ;For debugging database
`(begin ,@exp-list))))
(let loop ((exp-list exp-list) (i 1))
(if (null? (cdr exp-list))
(compile (car exp-list) level depth cont)
(sequentially
(compile (car exp-list) level depth
(ignore-values-cont dummy i))
(loop (cdr exp-list) (+ i 1))))))))))
; Compile a call
(define (compile-call node level depth cont)
(if (node-ref node 'type-error)
(compile-unknown-call node level depth cont)
(let ((proc-node (car (node-form node))))
(cond ((name-node? proc-node)
(compile-name-call node proc-node level depth cont))
((and (lambda-node? proc-node)
(not (n-ary? (cadr (node-form proc-node)))))
(compile-redex proc-node (cdr (node-form node)) level depth cont))
((and (literal-node? proc-node)
(primop? (node-form proc-node)))
(let ((primop (node-form proc-node)))
(if (primop-compilator primop)
((primop-compilator primop) node level depth cont)
(error "compiler bug: primop has no compilator"
primop
(schemify node)))))
(else
(compile-unknown-call node level depth cont))))))
(define (compile-name-call node proc-node level depth cont)
(let ((binding (name-node-binding proc-node)))
(if (binding? binding)
(let ((static (binding-static binding)))
(cond ((primop? static)
(if (primop-compilator static)
((primop-compilator static) node level depth cont)
(compile-unknown-call node level depth cont)))
((transform? static)
(let* ((form (node-form node))
(new (apply-inline-transform static
form
(node-form proc-node))))
(if (eq? new form)
(compile-unknown-call node level depth cont)
(compile new level depth cont))))
(else
(compile-unknown-call node level depth cont))))
(compile-unknown-call node level depth cont))))
(define-compilator 'call 'internal compile-call)
; A redex is a call of the form ((lambda (x1 ... xn) body ...) e1 ... en).
(define (compile-redex proc-node args level depth cont)
(let* ((proc-exp (node-form proc-node))
(formals (cadr proc-exp))
(body (caddr proc-exp)))
(cond ((not (= (length formals)
(length args)))
(generate-trap cont
"wrong number of arguments"
(cons (schemify proc-node)
(map schemify args))))
((null? formals)
(compile body level depth cont)) ;+++
(else
(maybe-push-continuation
(sequentially
(push-all-with-names args formals level 0)
(compile-lambda-code formals body level (cont-name cont)))
depth
cont)))))
; Compile a call to a computed procedure.
(define (compile-unknown-call node level depth cont)
(let ((exp (node-form node)))
(let ((call (sequentially (push-arguments node level 0)
(compile (car exp)
level
(length (cdr exp))
(fall-through-cont node 0))
(let ((nargs (length (cdr exp))))
(if (> nargs maximum-stack-args)
(instruction (enum op big-call)
(high-byte nargs)
(low-byte nargs))
(instruction (enum op call) nargs))))))
(maybe-push-continuation call depth cont))))
(define (maybe-push-continuation code depth cont)
(if (return-cont? cont)
code
(let ((label (make-label)))
(sequentially (if (>= depth byte-limit)
(instruction-using-label (enum op make-big-cont)
label
(high-byte depth)
(low-byte depth))
(instruction-using-label (enum op make-cont)
label
depth))
(if (keep-source-code?)
(note-source-code (fixup-source (cont-source-info cont))
code)
code)
(attach-label label
(cont-segment cont))))))
(define (fixup-source info)
;; Abbreviate this somehow?
(if (pair? info)
(cons (car info)
(schemify (cdr info)))
;; Name might be generated...
info))
; Continuation is implicitly fall-through.
(define (push-arguments node level depth)
(let recur ((args (cdr (node-form node))) (depth depth) (i 1))
(if (null? args)
empty-segment
(sequentially (compile (car args) level depth
(fall-through-cont node i))
(instruction (enum op push))
(recur (cdr args) (+ depth 1) (+ i 1))))))
(define (push-all-with-names exp-list names level depth)
(if (null? exp-list)
empty-segment
(sequentially (compile (car exp-list)
level depth
(named-cont (node-form (car names))))
(instruction (enum op push))
(push-all-with-names (cdr exp-list)
(cdr names)
level
(+ depth 1)))))
; OK, now that you've got all that under your belt, here's LAMBDA.
(define-compilator 'lambda syntax-type
(lambda (node level depth cont)
(let ((exp (node-form node))
(name (cont-name cont)))
(deliver-value
(sequentially
(instruction (enum op closure))
(template (compile-lambda exp level #f)
(if (name? name)
(name->symbol name)
#f))
(instruction 0)) ; last byte of closure instruction, 0 means use
; *env* for environment
cont))))
(define (compile-lambda exp level body-name)
(let* ((formals (cadr exp))
(nargs (number-of-required-args formals))
(fast-protocol? (and (<= nargs maximum-stack-args)
(not (n-ary? formals)))))
(sequentially
;; Insert protocol
(cond (fast-protocol?
(instruction (enum op protocol) nargs))
((<= nargs available-stack-space)
(instruction (enum op protocol)
(if (n-ary? formals)
two-byte-nargs+list-protocol
two-byte-nargs-protocol)
(high-byte nargs)
(low-byte nargs)))
(else
(error "compiler bug: too many formals"
(schemify exp))))
(compile-lambda-code formals (caddr exp) level body-name))))
; name isn't the name of the procedure, it's the name to be given to
; the value that the procedure will return.
(define (compile-lambda-code formals body level name)
(if (null? formals) ;+++ Don't make null environment
(compile body level 0 (return-cont name))
;; (if (node-ref node 'no-inferior-lambdas) ...)
(sequentially
(let* ((nargs (number-of-required-args formals))
(nargs (if (n-ary? formals)
(+ nargs 1)
nargs)))
(instruction (enum op make-env)
(high-byte nargs)
(low-byte nargs)))
(let ((vars (normalize-formals formals))
(level (+ level 1)))
(set-lexical-offsets! (reverse vars) level)
(note-environment
(map name-node->symbol vars)
(compile body level 0 (return-cont name)))))))
(define (name-node->symbol node)
(let ((form (node-form node)))
(cond ((name? form)
(name->symbol form))
((symbol? form)
form)
(else
#f))))
; Give each name node in NAMES a binding record that has the names lexical
; level and offset.
(define (set-lexical-offsets! names level)
(let loop ((over 1) (names names))
(if (not (null? names))
(begin
(node-set! (car names)
'binding
(cons level over))
(loop (+ over 1) (cdr names))))))
(define-compilator 'flat-lambda syntax-type
(lambda (node level depth cont)
(let ((exp (node-form node))
(name (cont-name cont)))
(let ((vars (cadr exp))
(free (caddr exp))
(body (cadddr exp)))
(deliver-value (compile-flat-lambda name vars body free level)
cont)))))
; The MAKE-FLAT-ENV instruction is designed to allow us to make nested flat
; environments (i.e. flat environments consisting of a linked chain of vectors)
; but this code doesn't generate them. The nested environments would avoid
; the need for offsets larger than a byte. The current code cannot handle
; large environments.
; When we're done we have to restore the old locations of the free variables.
(define (compile-flat-lambda name vars body free level)
(let* ((alist (sort-list (get-variables-offsets free level)
(lambda (p1 p2)
(< (cadr p1)
(cadr p2)))))
(free (map car alist))
(old-locations (map name-node-binding free)))
(set-lexical-offsets! free 0) ; 0 is the level
(let ((code (sequentially
(instruction (enum op false)) ; either the super env or the env
(if (null? free)
empty-segment
(apply instruction (enum op make-flat-env)
1 ; add in *val*
(+ (length free) 1)
(variable-env-data (map cdr alist))))
(instruction (enum op closure))
(note-environment (reverse (map node-form free))
(template (compile-lambda `(lambda ,vars
,body)
0
#f)
(if (name? name)
(name->symbol name)
#f)))
(instruction 1)))) ; last byte of closure instruction, 1 means
; use *val* as environment, instead of *env*
(for-each (lambda (node location)
(node-set! node 'binding location))
free
old-locations)
code)))
; Looks up VARS in CENV and returns an alist of (<name> . (<level> <over>))
; pairs.
(define (get-variables-offsets vars level)
(let loop ((vars vars) (locs '()))
(if (null? vars)
locs
(let ((binding (name-node-binding (car vars))))
(if (pair? binding)
(let ((back (- level (car binding)))
(over (cdr binding)))
(if (< byte-limit over)
(error "lexical environment limit exceeded; complain to implementors"))
(loop (cdr vars)
(cons (cons (car vars)
(cons back over))
locs)))
(error "variable in flat-lambda list is not local"
(car vars)))))))
; Addresses is a list of (level . over) pairs, sorted by level.
; This returns the reverse of the following data:
; <back for level>
; <number of variables from this level>
; <over of 1st var> ...
; <back for next level>
; ...
; If a <back> is too large we insert some empty levels.
(define (variable-env-data addresses)
(let level-loop ((addresses addresses) (last-level 0) (data '()))
(if (null? addresses)
(reverse data)
(let ((level (caar addresses)))
(let loop ((addresses addresses) (overs '()))
(if (or (null? addresses)
(not (= level (caar addresses))))
(level-loop addresses
level
(append overs
(list (length overs))
(let loop ((delta (- level last-level))
(back '()))
(if (<= delta byte-limit)
(cons delta back)
(loop (- delta byte-limit)
`(0 ,byte-limit . ,back))))
data))
(loop (cdr addresses)
(cons (cdar addresses) overs))))))))
; We should probably just use the sort from big-scheme.
(define (sort-list xs less?)
(letrec ((insert (lambda (x xs)
(if (null? xs)
(list x)
(if (less? (car xs) x)
(cons (car xs)
(insert x (cdr xs)))
(cons x xs))))))
(let sort ((xs xs))
(if (null? xs)
'()
(insert (car xs)
(sort (cdr xs)))))))
; LETREC.
(define-compilator 'letrec syntax-type
(lambda (node level depth cont)
;; (if (node-ref node 'pure-letrec) ...)
(let* ((exp (node-form node))
(specs (cadr exp))
(body (caddr exp))
(body (make-node operator/begin
`(begin
,@(map (lambda (spec)
(make-node operator/set!
`(set! ,@spec)))
specs)
,body))))
(if (null? specs)
(compile body level depth cont) ;+++
(maybe-push-continuation
(sequentially
(apply sequentially
(map (lambda (spec)
(sequentially
(instruction (enum op unassigned))
(instruction (enum op push))))
specs))
(compile-lambda-code (map car specs) body level (cont-name cont)))
depth
cont)))))
; --------------------
; Compile-time continuations
;
; A compile-time continuation is a pair (segment . source-info).
; Segment is one of the following:
; a return instruction - invoke the current full continuation.
; empty-segment - fall through to subsequent instructions.
; an ignore-values instruction - ignore values, then fall through.
; Source-info is one of:
; #f - we don't know anything
; symbol - value delivered to subsequent instructions will be assigned to
; a variable with this name. If the value being assigned is a lambda, we
; can give that lambda that name.
; (i . node) - the value being computed is the i'th subexpression of the node.
(define (make-cont seg source-info) (cons seg source-info))
(define cont-segment car)
(define cont-source-info cdr)
; We could probably be able to optimize jumps to jumps.
;(define (make-jump-cont label cont)
; (if (fall-through-cont? cont)
; (make-cont label (cont-name cont))
; cont))
(define return-cont-segment (instruction (enum op return)))
(define (return-cont name)
(make-cont return-cont-segment name))
(define (return-cont? cont)
(eq? (cont-segment cont) return-cont-segment))
; Fall through into next instruction while compiling the I'th part of NODE.
(define (fall-through-cont node i)
(make-cont empty-segment (cons i node)))
(define (fall-through-cont? cont)
(not (return-cont? cont)))
; Ignore return value, then fall through
(define ignore-values-segment
(instruction (enum op ignore-values)))
(define (ignore-values-cont node i)
(make-cont ignore-values-segment (cons i node)))
(define (ignore-values-cont? cont)
(eq? (cont-segment cont) ignore-values-segment))
; Value is in *val*; deliver it to its continuation.
; No need to generate an ignore-values instruction in this case.
(define (deliver-value segment cont)
(if (ignore-values-cont? cont) ;+++
segment
(sequentially segment (cont-segment cont))))
; For putting names to lambda expressions:
(define (named-cont name)
(make-cont empty-segment name))
(define (cont-name cont)
(if (pair? (cont-source-info cont))
#f
(cont-source-info cont)))
; Find lookup result that was cached by classifier
(define (name-node-binding node)
(or (node-ref node 'binding)
(node-form node)))
; --------------------
; Utilities
; Produce something for source code that contains a compile-time error.
(define (generate-trap cont . stuff)
(apply warn stuff)
(sequentially (instruction-with-literal (enum op literal)
(cons 'error stuff))
(deliver-value (instruction (enum op trap))
cont)))
; --------------------
; Type checking. This gets called on all nodes.
(define (type-check node)
(if *type-check?*
(let ((form (node-form node)))
(if (pair? form)
(let ((proc-node (car form)))
(if (node? proc-node)
(let ((proc-type (node-type proc-node)))
(cond ((procedure-type? proc-type)
(if (restrictive? proc-type)
(let* ((args (cdr form))
(args-type (make-some-values-type
(map (lambda (arg)
(meet-type
(node-type arg)
value-type))
args)))
(node (make-similar-node node
(cons proc-node
args))))
(if (not (meet? args-type
(procedure-type-domain proc-type)))
(diagnose-call-error node proc-type))
node)
node))
((not (meet? proc-type any-procedure-type))
;; Could also check args for one-valuedness.
(let ((message "non-procedure in operator position"))
(warn message
(schemify node)
`(procedure: ,proc-type))
(node-set! node 'type-error message))
node)
(else node)))
node))
node))
node))
(define (set-type-check?! check?)
(set! *type-check?* check?))
(define *type-check?* #t)
(define (diagnose-call-error node proc-type)
(let ((message
(cond ((not (fixed-arity-procedure-type? proc-type))
"invalid arguments")
((= (procedure-type-arity proc-type)
(length (cdr (node-form node))))
"argument type error")
(else
"wrong number of arguments"))))
(warn message
(schemify node)
`(procedure wants:
,(rail-type->sexp (procedure-type-domain proc-type)
#f))
`(arguments are: ,(map (lambda (arg)
(type->sexp (node-type arg) #t))
(cdr (node-form node)))))
(node-set! node 'type-error message)))
; Type system loophole
(define-compilator 'loophole syntax-type
(lambda (node level depth cont)
(compile (caddr (node-form node)) level depth cont)))
; Node predicates and operators.
(define lambda-node? (node-predicate 'lambda syntax-type))
(define name-node? (node-predicate 'name 'leaf))
(define literal-node? (node-predicate 'literal 'leaf))
(define operator/lambda (get-operator 'lambda syntax-type))
(define operator/set! (get-operator 'set! syntax-type))
(define operator/call (get-operator 'call 'internal))
(define operator/begin (get-operator 'begin syntax-type))