; -*- Mode: Scheme; Syntax: Scheme; Package: Scheme; -*-
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.

; Compiling expressions.

; This is a two-phase compiler.  The first phase does macro expansion,
; variable resolution, and instruction selection, and computes the
; size of the code vector.  The second phase (assembly) creates the
; code vector, "template" (literals vector), and debugging data
; structures.

; The output of the first phase (the COMPILE- and INSTRUCTION-
; routines) and the input to the second phase (SEGMENT->TEMPLATE) is a
; "segment."  A segment is a pair (size . proc) where size is the size
; of the code segment in bytes, and proc is a procedure that during
; phase 2 will store the segment's bytes into the code vector.

; Optimizations are marked with +++, and may be flushed if desired.

(define (compile-expression node depth cont)
  (compile node 0 depth cont))

; Main dispatch for compiling a single expression.

(define (compile node level depth cont)
  (let ((node (type-check node)))
    ((operator-table-ref compilators (node-operator-id node))
     node
     level
     depth
     cont)))

; Specialists

(define compilators
  (make-operator-table
    (lambda (node level depth cont)
      (generate-trap cont
		     "not valid in expression context"
		     (schemify node)))))

(define (define-compilator name type proc)
  (operator-define! compilators name type proc))

(define-compilator 'literal 'leaf
  (lambda (node level depth cont)
    (let ((obj (node-form node)))
      (if (eq? obj #f)
	  ;; +++ hack for bootstrap from Schemes that don't distinguish #f/()
	  (deliver-value (instruction (enum op false)) cont)
	  (compile-constant obj depth cont)))))

(define-compilator 'unspecific (proc () unspecific-type)
  (lambda (node level depth cont)
    (deliver-value (instruction (enum op unspecific))
		   cont)))

(define-compilator 'unassigned (proc () unspecific-type)
  (lambda (node level depth cont)
    (deliver-value (instruction (enum op unassigned))
		   cont)))

(define-compilator 'quote syntax-type
  (lambda (node level depth cont)
    (let ((exp (node-form node)))
      level				;ignored
      (let ((obj (cadr exp)))
	(compile-constant obj depth cont)))))

(define (compile-constant obj depth cont)
  (if (ignore-values-cont? cont)
      empty-segment			;+++ dead code
      (deliver-value (instruction-with-literal (enum op literal) obj)
		     cont)))

; Variable reference

(define-compilator 'name 'leaf
  (lambda (node level depth cont)
    (let* ((binding (name-node-binding node))
	   (name (node-form node)))
      (deliver-value
         (if (pair? binding)
	     (let ((back (- level (car binding)))
		   (over (cdr binding)))
	       (if (or (>= back byte-limit)
		       (>= over byte-limit))
		   (instruction (enum op big-local)
				(high-byte back)
				(low-byte back)
				(high-byte over)
				(low-byte over))
		   (case back
		     ((0) (instruction (enum op local0) over)) ;+++
		     ((1) (instruction (enum op local1) over)) ;+++
		     ((2) (instruction (enum op local2) over)) ;+++
		     (else (instruction (enum op local) back over)))))
	     (instruction-with-location (enum op global)
					binding
					name
					value-type))
	 cont))))

; Assignment

(define-compilator 'set! syntax-type
  (lambda (node level depth cont)
    (let* ((exp (node-form node))
	   (lhs-node (cadr exp))
	   (name (node-form lhs-node))
	   ;; Error if not a name node...
	   (binding (name-node-binding lhs-node)))
      (sequentially
       (compile (caddr exp) level depth (named-cont name))
       (deliver-value
	(if (pair? binding)
	    (let ((back (- level (car binding)))
		  (over (cdr binding)))
	      (instruction (enum op set-local!)
			   (high-byte back)
			   (low-byte back)
			   (high-byte over)
			   (low-byte over)))
	    (instruction-with-location (enum op set-global!)
				       binding
				       name
				       usual-variable-type))
	cont)))))

; Conditional

(define-compilator 'if syntax-type
  (lambda (node level depth cont)
    (let ((exp (node-form node))
	  (alt-label (make-label))
	  (join-label (make-label)))
      (sequentially
       ;; Test
       (compile (cadr exp) level depth (fall-through-cont node 1))
       (instruction-using-label (enum op jump-if-false) alt-label)
       ;; Consequent
       (compile (caddr exp) level depth cont)
       (if (fall-through-cont? cont)
	   (instruction-using-label (enum op jump) join-label)
	   empty-segment)
       ;; Alternate
       (attach-label alt-label
		     (compile (cadddr exp) level depth cont))
       (attach-label join-label
		     empty-segment)))))

(define-compilator 'begin syntax-type
  (lambda (node level depth cont)
    (let ((exp-list (cdr (node-form node))))
      (if (null? exp-list)
	  (generate-trap cont "null begin")
	  (let ((dummy
		 (make-node operator/begin ;For debugging database
			    `(begin ,@exp-list))))
	    (let loop ((exp-list exp-list) (i 1))
	      (if (null? (cdr exp-list))
		  (compile (car exp-list) level depth cont)
		  (sequentially
		    (compile (car exp-list) level depth
			     (ignore-values-cont dummy i))
		    (loop (cdr exp-list) (+ i 1))))))))))

; Compile a call

(define (compile-call node level depth cont)
  (if (node-ref node 'type-error)
      (compile-unknown-call node level depth cont)
      (let ((proc-node (car (node-form node))))
	(cond ((name-node? proc-node)
	       (compile-name-call node proc-node level depth cont))
	      ((and (lambda-node? proc-node)
		    (not (n-ary? (cadr (node-form proc-node)))))
	       (compile-redex proc-node (cdr (node-form node)) level depth cont))
 	      ((and (literal-node? proc-node)
 		    (primop? (node-form proc-node)))
 	       (let ((primop (node-form proc-node)))
 		 (if (primop-compilator primop)
 		     ((primop-compilator primop) node level depth cont)
 		     (error "compiler bug: primop has no compilator"
 			    primop
 			    (schemify node)))))
	      (else
	       (compile-unknown-call node level depth cont))))))

(define (compile-name-call node proc-node level depth cont)
  (let ((binding (name-node-binding proc-node)))
    (if (binding? binding)
	(let ((static (binding-static binding)))
	  (cond ((primop? static)
		 (if (primop-compilator static)
		     ((primop-compilator static) node level depth cont)
		     (compile-unknown-call node level depth cont)))
		((transform? static)
		 (let* ((form (node-form node))
			(new (apply-inline-transform static
						     form
						     (node-form proc-node))))
		   (if (eq? new form)
		       (compile-unknown-call node level depth cont)
		       (compile new level depth cont))))
		(else
		 (compile-unknown-call node level depth cont))))
	(compile-unknown-call node level depth cont))))

(define-compilator 'call 'internal compile-call)

; A redex is a call of the form ((lambda (x1 ... xn) body ...) e1 ... en).

(define (compile-redex proc-node args level depth cont)
  (let* ((proc-exp (node-form proc-node))
	 (formals (cadr proc-exp))
	 (body (caddr proc-exp)))
    (cond ((not (= (length formals)
		   (length args)))
	   (generate-trap cont
			  "wrong number of arguments"
			  (cons (schemify proc-node)
				(map schemify args))))
	  ((null? formals)
	   (compile body level depth cont)) ;+++
	  (else
	   (maybe-push-continuation
	    (sequentially 
	     (push-all-with-names args formals level 0)
	     (compile-lambda-code formals body level (cont-name cont)))
	    depth
	    cont)))))

; Compile a call to a computed procedure.

(define (compile-unknown-call node level depth cont)
  (let ((exp (node-form node)))
    (let ((call (sequentially (push-arguments node level 0)
			      (compile (car exp)
				       level
				       (length (cdr exp))
				       (fall-through-cont node 0))
			      (let ((nargs (length (cdr exp))))
				(if (> nargs maximum-stack-args)
				    (instruction (enum op big-call)
						 (high-byte nargs)
						 (low-byte nargs))
				    (instruction (enum op call) nargs))))))
      (maybe-push-continuation call depth cont))))

(define (maybe-push-continuation code depth cont)
  (if (return-cont? cont) 
      code
      (let ((label (make-label)))
	(sequentially (if (>= depth byte-limit)
			  (instruction-using-label (enum op make-big-cont)
						   label
						   (high-byte depth)
						   (low-byte depth))
			  (instruction-using-label (enum op make-cont)
						   label
						   depth))
		      (if (keep-source-code?)
			  (note-source-code (fixup-source (cont-source-info cont))
					    code)
			  code)
		      (attach-label label
				    (cont-segment cont))))))

(define (fixup-source info)
  ;; Abbreviate this somehow?
  (if (pair? info)
      (cons (car info)
	    (schemify (cdr info)))
      ;; Name might be generated...
      info))

; Continuation is implicitly fall-through.

(define (push-arguments node level depth)
  (let recur ((args (cdr (node-form node))) (depth depth) (i 1))
    (if (null? args)
	empty-segment
	(sequentially (compile (car args) level depth
			       (fall-through-cont node i))
		      (instruction (enum op push))
		      (recur (cdr args) (+ depth 1) (+ i 1))))))

(define (push-all-with-names exp-list names level depth)
  (if (null? exp-list)
      empty-segment
      (sequentially (compile (car exp-list)
			     level depth
			     (named-cont (node-form (car names))))
		    (instruction (enum op push))
                    (push-all-with-names (cdr exp-list)
					 (cdr names)
					 level
					 (+ depth 1)))))
     
; OK, now that you've got all that under your belt, here's LAMBDA.

(define-compilator 'lambda syntax-type
  (lambda (node level depth cont)
    (let ((exp (node-form node))
	  (name (cont-name cont)))
      (deliver-value
       (sequentially
	 (instruction (enum op closure))
	 (template (compile-lambda exp level #f)
		   (if (name? name)
		       (name->symbol name)
		       #f))
	 (instruction 0)) ; last byte of closure instruction, 0 means use
                          ; *env* for environment
       cont))))

(define (compile-lambda exp level body-name)
  (let* ((formals (cadr exp))
	 (nargs (number-of-required-args formals))
	 (fast-protocol? (and (<= nargs maximum-stack-args)
			      (not (n-ary? formals)))))
    (sequentially
     ;; Insert protocol
     (cond (fast-protocol?
	    (instruction (enum op protocol) nargs))
	   ((<= nargs available-stack-space)
	    (instruction (enum op protocol)
			 (if (n-ary? formals)
			     two-byte-nargs+list-protocol
			     two-byte-nargs-protocol)
			 (high-byte nargs)
			 (low-byte nargs)))
	   (else
	    (error "compiler bug: too many formals"
		   (schemify exp))))
     (compile-lambda-code formals (caddr exp) level body-name))))

; name isn't the name of the procedure, it's the name to be given to
; the value that the procedure will return.

(define (compile-lambda-code formals body level name)
  (if (null? formals)		;+++ Don't make null environment
      (compile body level 0 (return-cont name))
      ;; (if (node-ref node 'no-inferior-lambdas) ...)
      (sequentially
       (let* ((nargs (number-of-required-args formals))
	      (nargs (if (n-ary? formals)
			 (+ nargs 1)
			 nargs)))
	 (instruction (enum op make-env)
		      (high-byte nargs)
		      (low-byte nargs)))
       (let ((vars (normalize-formals formals))
	     (level (+ level 1)))
	 (set-lexical-offsets! (reverse vars) level)
	 (note-environment
	  (map name-node->symbol vars)
	  (compile body level 0 (return-cont name)))))))

(define (name-node->symbol node)
  (let ((form (node-form node)))
    (cond ((name? form)
	   (name->symbol form))
	  ((symbol? form)
	   form)
	  (else
	   #f))))

; Give each name node in NAMES a binding record that has the names lexical
; level and offset.

(define (set-lexical-offsets! names level)
  (let loop ((over 1) (names names))
    (if (not (null? names))
	(begin
	  (node-set! (car names) 
		     'binding
		     (cons level over))
	  (loop (+ over 1) (cdr names))))))

(define-compilator 'flat-lambda syntax-type
  (lambda (node level depth cont)
    (let ((exp (node-form node))
	  (name (cont-name cont)))
      (let ((vars (cadr exp))
	    (free (caddr exp))
 	    (body (cadddr exp)))
 	(deliver-value (compile-flat-lambda name vars body free level)
 		       cont)))))
 
; The MAKE-FLAT-ENV instruction is designed to allow us to make nested flat
; environments (i.e. flat environments consisting of a linked chain of vectors)
; but this code doesn't generate them.  The nested environments would avoid
; the need for offsets larger than a byte.  The current code cannot handle
; large environments.
; When we're done we have to restore the old locations of the free variables.

(define (compile-flat-lambda name vars body free level)
  (let* ((alist (sort-list (get-variables-offsets free level)
			   (lambda (p1 p2)
			     (< (cadr p1)
				(cadr p2)))))
	 (free (map car alist))
	 (old-locations (map name-node-binding free)))
    (set-lexical-offsets! free 0)  ; 0 is the level
    (let ((code (sequentially
		 (instruction (enum op false)) ; either the super env or the env
		 (if (null? free)
		     empty-segment
		     (apply instruction (enum op make-flat-env)
			    1   ; add in *val*
			    (+ (length free) 1)
			    (variable-env-data (map cdr alist))))
		 (instruction (enum op closure))
		 (note-environment (reverse (map node-form free))
				   (template (compile-lambda `(lambda ,vars
								,body)
							     0
							     #f)
					     (if (name? name)
						 (name->symbol name)
						 #f)))
		 (instruction 1)))) ; last byte of closure instruction, 1 means
                                    ; use *val* as environment, instead of *env*
      (for-each (lambda (node location)
		  (node-set! node 'binding location))
		free
		old-locations)
      code)))

; Looks up VARS in CENV and returns an alist of (<name> . (<level> <over>))
; pairs.

(define (get-variables-offsets vars level)
  (let loop ((vars vars) (locs '()))
    (if (null? vars)
	locs
	(let ((binding (name-node-binding (car vars))))
	  (if (pair? binding)
	      (let ((back (- level (car binding)))
		    (over (cdr binding)))
		(if (< byte-limit over)
		    (error "lexical environment limit exceeded; complain to implementors"))
		(loop (cdr vars)
		      (cons (cons (car vars)
				  (cons back over))
			    locs)))
	      (error "variable in flat-lambda list is not local"
		     (car vars)))))))

; Addresses is a list of (level . over) pairs, sorted by level.
; This returns the reverse of the following data:
;   <back for level>
;   <number of variables from this level>
;   <over of 1st var> ...
;   <back for next level>
;   ...
; If a <back> is too large we insert some empty levels.

(define (variable-env-data addresses)
  (let level-loop ((addresses addresses) (last-level 0) (data '()))
    (if (null? addresses)
	(reverse data)
	(let ((level (caar addresses)))
	  (let loop ((addresses addresses) (overs '()))
	    (if (or (null? addresses)
		    (not (= level (caar addresses))))
		(level-loop addresses
			    level
			    (append overs
				    (list (length overs))
				    (let loop ((delta (- level last-level))
					       (back '()))
				      (if (<= delta byte-limit)
					  (cons delta back)
					  (loop (- delta byte-limit)
						`(0 ,byte-limit . ,back))))
				    data))
		(loop (cdr addresses)
		      (cons (cdar addresses) overs))))))))
	  
; We should probably just use the sort from big-scheme.

(define (sort-list xs less?)
  (letrec ((insert (lambda (x xs)
		     (if (null? xs)
			 (list x)
			 (if (less? (car xs) x)
			     (cons (car xs)
				   (insert x (cdr xs)))
			     (cons x xs))))))
    (let sort ((xs xs))    
      (if (null? xs)
	  '()
	  (insert (car xs)
		  (sort (cdr xs)))))))

; LETREC.

(define-compilator 'letrec syntax-type
  (lambda (node level depth cont)
    ;; (if (node-ref node 'pure-letrec) ...)
    (let* ((exp (node-form node))
	   (specs (cadr exp))
	   (body (caddr exp))
	   (body (make-node operator/begin
			    `(begin
			       ,@(map (lambda (spec)
					(make-node operator/set!
						   `(set! ,@spec)))
				      specs)
			       ,body))))
      (if (null? specs)
	  (compile body level depth cont) ;+++
	  (maybe-push-continuation
	    (sequentially
	      (apply sequentially
		     (map (lambda (spec)
			    (sequentially
			      (instruction (enum op unassigned))
			      (instruction (enum op push))))
			  specs))
	      (compile-lambda-code (map car specs) body level (cont-name cont)))
	    depth
	    cont)))))

; --------------------
; Compile-time continuations
;
; A compile-time continuation is a pair (segment . source-info).
; Segment is one of the following:
;   a return instruction - invoke the current full continuation.
;   empty-segment - fall through to subsequent instructions.
;   an ignore-values instruction - ignore values, then fall through.
; Source-info is one of:
;   #f - we don't know anything
;   symbol - value delivered to subsequent instructions will be assigned to
;     a variable with this name.  If the value being assigned is a lambda, we
;     can give that lambda that name.
;   (i . node) - the value being computed is the i'th subexpression of the node.

(define (make-cont seg source-info) (cons seg source-info))
(define cont-segment car)
(define cont-source-info cdr)

; We could probably be able to optimize jumps to jumps.
;(define (make-jump-cont label cont)
;  (if (fall-through-cont? cont)
;      (make-cont label (cont-name cont))
;      cont))

(define return-cont-segment (instruction (enum op return)))

(define (return-cont name)
  (make-cont return-cont-segment name))

(define (return-cont? cont)
  (eq? (cont-segment cont) return-cont-segment))

; Fall through into next instruction while compiling the I'th part of NODE.

(define (fall-through-cont node i)
  (make-cont empty-segment (cons i node)))

(define (fall-through-cont? cont)
  (not (return-cont? cont)))

; Ignore return value, then fall through

(define ignore-values-segment
  (instruction (enum op ignore-values)))

(define (ignore-values-cont node i)
  (make-cont ignore-values-segment (cons i node)))

(define (ignore-values-cont? cont)
  (eq? (cont-segment cont) ignore-values-segment))

; Value is in *val*; deliver it to its continuation.
; No need to generate an ignore-values instruction in this case.

(define (deliver-value segment cont)
  (if (ignore-values-cont? cont)	;+++
      segment
      (sequentially segment (cont-segment cont))))

; For putting names to lambda expressions:

(define (named-cont name)
  (make-cont empty-segment name))

(define (cont-name cont)
  (if (pair? (cont-source-info cont))
      #f
      (cont-source-info cont)))

; Find lookup result that was cached by classifier

(define (name-node-binding node)
  (or (node-ref node 'binding)
      (node-form node)))

; --------------------
; Utilities

; Produce something for source code that contains a compile-time error.

(define (generate-trap cont . stuff)
  (apply warn stuff)
  (sequentially (instruction-with-literal (enum op literal)
					  (cons 'error stuff))
		(deliver-value (instruction (enum op trap))
			       cont)))

; --------------------
; Type checking.  This gets called on all nodes.

(define (type-check node)
  (if *type-check?*
      (let ((form (node-form node)))
	(if (pair? form)
	    (let ((proc-node (car form)))
	      (if (node? proc-node)
		  (let ((proc-type (node-type proc-node)))
		    (cond ((procedure-type? proc-type)
			   (if (restrictive? proc-type)
			       (let* ((args (cdr form))
				      (args-type (make-some-values-type
						  (map (lambda (arg)
							 (meet-type
							  (node-type arg)
							  value-type))
						       args)))
				      (node (make-similar-node node
							       (cons proc-node
								     args))))
				 (if (not (meet? args-type
						 (procedure-type-domain proc-type)))
				     (diagnose-call-error node proc-type))
				 node)
			       node))
			  ((not (meet? proc-type any-procedure-type))
			   ;; Could also check args for one-valuedness.
			   (let ((message "non-procedure in operator position"))
			     (warn message
				   (schemify node)
				   `(procedure: ,proc-type))
			     (node-set! node 'type-error message))
			   node)
			  (else node)))
		  node))
	    node))
      node))

(define (set-type-check?! check?)
  (set! *type-check?* check?))

(define *type-check?* #t)


(define (diagnose-call-error node proc-type)
  (let ((message
	 (cond ((not (fixed-arity-procedure-type? proc-type))
		"invalid arguments")
	       ((= (procedure-type-arity proc-type)
		   (length (cdr (node-form node))))
		"argument type error")
	       (else
		"wrong number of arguments"))))
    (warn message
	  (schemify node)
	  `(procedure wants:
		      ,(rail-type->sexp (procedure-type-domain proc-type)
					#f))
	  `(arguments are: ,(map (lambda (arg)
				   (type->sexp (node-type arg) #t))
				 (cdr (node-form node)))))
    (node-set! node 'type-error message)))


; Type system loophole

(define-compilator 'loophole syntax-type
  (lambda (node level depth cont)
    (compile (caddr (node-form node)) level depth cont)))

; Node predicates and operators.

(define lambda-node?  (node-predicate 'lambda syntax-type))
(define name-node?    (node-predicate 'name 'leaf))
(define literal-node? (node-predicate 'literal 'leaf))

(define operator/lambda     (get-operator 'lambda syntax-type))
(define operator/set!	    (get-operator 'set!   syntax-type))
(define operator/call	    (get-operator 'call   'internal))
(define operator/begin      (get-operator 'begin  syntax-type))