vx-scheme/src/simulator.scm

562 lines
18 KiB
Scheme

;; VM Simulator for Vx-Scheme
;;
;; Copyright (c) 2003,2006 and onwards Colin Smith
;;
;; This program can execute the machine code generated by the compiler
;; in compiler.scm. It's meant as a testbed for compiler development,
;; not for production use; the C implementation of the VM in vm.cpp
;; is considerably faster.
;;
;;; -------------------
;;; COMPILED PROCEDURES
;;; -------------------
(define (tagged-list? tag obj)
(and (list? obj)
(not (null? obj))
(eq? tag (car obj))))
(define (make-procedure env code start)
(list '*cproc* code start env))
(define (compiled-procedure? obj)
(tagged-list? '*cproc* obj))
(define (compiled-procedure-code cproc)
(cadr cproc))
(define (compiled-procedure-start cproc)
(caddr cproc))
(define (compiled-procedure-env cproc)
(cadddr cproc))
;;; ------------
;;; ENVIRONMENTS
;;; ------------
(define (make-empty-environment)
'())
(define (extend-environment env args)
(cons (cons 'E: args) env))
; attach a new entry to the end of the first environment
; in the list.
(define (adjoin-environment! env arg)
(set-car! env (append (car env) (list arg))))
; =========================
; VIRTUAL MACHINE SIMULATOR
; =========================
; ---------------
; VM global state
;
; This are kept global, so that (run) may be called multiple
; times, with subsequent runs seeing bindings established in
; previous runs. Call (init-vm) to prepare a clean slate for
; execution.
(define global-env '())
(define (init-vm)
(set! global-env '()))
(define (set-global-var! var value)
(cond ((assq var global-env) => (lambda (assoc) (set-cdr! assoc value)))
(else
(set! global-env (cons (cons var value) global-env)))))
(define (sim-execute insns)
(let* ((stack '())
(env '())
(n-args 0))
;; XXX: this needs to be kept in sync with the compiler
(define *inline-procedures*
'(+ * - quotient remainder vector-ref vector-set! car cdr
zero? null? not eq? pair? cons))
(define (push x)
(set! stack (cons x stack)))
(define (pop)
(let ((value (car stack)))
(set! stack (cdr stack))
value))
(define (take n L)
;; Take the n'th item from the list (zero-based) and move it to the
;; head. We use append, so the performance is poor, but in this
;; simulator n is always very small so we can get away with this
;; cheap implementation.
(let loop ((head '())
(tail L)
(i n))
(if (= i 0) (append (list (car tail)) head (cdr tail))
(loop (append head (list (car tail))) (cdr tail) (- i 1)))))
(define (empty?)
(null? stack))
(define (top)
(if (empty?) 'empty (car stack)))
(define (pop-list n)
(let loop ((l '())
(i n))
(if (= i 0) l
(loop (cons (pop) l) (- i 1)))))
(define (push-list l)
(let loop ((rest l))
(if (not (null? rest))
(begin
(push (car rest))
(loop (cdr rest))))))
(define (globally-bound? var)
(assq var global-env))
(define (global-ref var)
(cdr (assq var global-env)))
;;
;; Local Variables
;;
(define (local-variable-ref env eloc vloc)
(list-ref (cdr (list-ref env eloc)) vloc))
(define (local-variable-set! env eloc vloc value)
(let ((e (list-ref env eloc)))
(let ((cell (let loop ((i vloc)
(rest (cdr e)))
(if (= i 0)
rest
(loop (- i 1)
(cdr rest))))))
(set-car! cell value))))
(define (sim-procedure? p)
(or (procedure? p)
(compiled-procedure? p)))
(define (sim-output thing output stream)
;; While running in the VM, don't allow display/write to operate on
;; compiled procedures (due to the captured environment, these
;; objects may contain cycles).
(if (not (pair? thing))
(output thing stream)
(cond ((compiled-procedure? thing)
(display "#<sim-cproc>" stream))
((tagged-list? '*cont* thing)
(display "#<cont>" stream))
(else
(display "(" stream)
(let loop ((rest thing))
(cond ((null? (cdr rest))
(sim-output (car rest) output stream))
((pair? (cdr rest))
(sim-output (car rest) output stream)
(display " " stream)
(loop (cdr rest)))
(else
(sim-output (car rest) output stream)
(display " . " stream)
(sim-output (cdr rest) output stream))))
(display ")" stream)))))
;; intercept application attempts to run certain procedures and
;; substitute adjusted versions.
(define (remap-sim-procedure proc)
(cond ((eq? proc procedure?)
sim-procedure?)
((eq? proc display)
(lambda (e . stream) (sim-output e display
(if (null? stream)
(current-output-port)
(car stream)))))
((eq? proc write)
(lambda (e . stream) (sim-output e write
(if (null? stream)
(current-output-port)
(car stream)))))
((eq? proc load)
sim-load)
((eq? proc pair?)
; our compiled procedures are implemented as lists but
; they shouldn't appear to be pairs
(lambda (p) (and (pair? p) (not (compiled-procedure? p)))))
(else
proc)))
; set up a dummy continuation to catch the return to toplevel
(set! stack (list 'halt))
(call-with-current-continuation
(lambda (exit-with-value)
(let execute-instruction ((pc 0))
(define (make-continuation label)
(list '*cont* env insns label))
(define (continuation? obj)
(tagged-list? '*cont* obj))
(define (resume continuation)
(if (eq? continuation 'halt)
(begin
(let ((value (pop)))
(if (> (length stack) 0)
(begin
(display "program left material on stack:")
(display stack)
(newline)))
(exit-with-value value)))
(begin
(set! env (list-ref continuation 1))
(set! insns (list-ref continuation 2))
(execute-instruction (list-ref continuation 3)))))
(define (return)
(let ((value (pop))
(continuation (pop)))
(push value)
(resume continuation)))
(define (dump-stack)
(let loop ((rest stack))
(if (null? rest) 'ok
(let ((item (car rest)))
(cond ((continuation? item)
(display "<cont ")
(display (list-ref item 3))
(display "> "))
((compiled-procedure? item)
(display "<cproc> "))
((and (list? item)
(not (null? item))
(list? (car item))
(not (null? (car item)))
(eq? (caar item) 'e:))
(display "<env> "))
(else
(display item)
(display " ")))
(loop (cdr rest))))))
(define (trace insn)
(display insn) (display "\t| ")
(dump-stack)
(newline))
; If we fall off the end of the instruction list, we treat
; that like a return instruction.
(if (>= pc (vector-length insns))
(return))
; Fetch an instruction.
(let* ((insn (vector-ref insns pc))
(opcode (car insn))
(operand (if (null? (cdr insn)) #f (cadr insn)))
(operand2 (if (or (null? (cdr insn)) (null? (cddr insn))) #f
(caddr insn))))
;(trace insn)
; Dispatch.
(cond
;; ------------------------
;; THE MACHINE INSTRUCTIONS
;; ------------------------
;;
;; CONST x : push x onto stack.
;; CONSTI x : push x onto stack (x is a small integer).
;; INT x : push x onto stack (x is an integer).
((memq opcode '(const consti int))
(push operand))
;; UNSPC : push the unspecified value.
((eq? opcode 'unspc)
(push (if #f #f)))
;; UNASSN : push a signalling unassigned value
((eq? opcode 'unassn)
(push '*unassigned*)) ; xxx: arrange for signal-on-reference
;; NIL : push nil
((eq? opcode 'nil)
(push '()))
;; CODE c : just like CONST, but used when the top of stack
;; contains a vector of instructions.
((eq? opcode 'code)
(push operand))
; GREF s : push value of global variable s onto stack.
((eq? opcode 'gref)
(let ((value (cond ((globally-bound? operand)
(global-ref operand))
; snarf an implementation from the
; enclosing scheme
((and (symbol? operand)
(procedure? (eval operand)))
(eval operand))
(else "error: no global variable " operand))))
(push value)))
; GSET v : pop value; bind it to v in the global environment.
((eq? opcode 'gset)
(set-global-var! operand (pop)))
; LREF e i : push local variable from relative frame e, index i.
((eq? opcode 'lref)
(push (local-variable-ref env operand operand2)))
; LSET e i : pop stack, and set local variable from relative
; frame e, index i, to this value.
((eq? opcode 'lset)
(local-variable-set! env operand operand2 (pop)))
; GOTO n : goto instruction n
((eq? opcode 'goto)
(execute-instruction operand))
; FALSE?P n : pop stack; if that value is false, GOTO n
((eq? opcode 'false?p)
(if (not (pop))
(execute-instruction operand)))
; FALSE? n : if top of stack is #f, GOTO n
((eq? opcode 'false?)
(if (not (top))
(execute-instruction operand)))
; TRUE?P n : pop stack; if that value is true, GOTO n
((eq? opcode 'true?p)
(if (pop)
(execute-instruction operand)))
; TRUE? n : if top of stack is not #f, GOTO n
((eq? opcode 'true?)
(if (top)
(execute-instruction operand)))
; TRUE : push a true value
((eq? opcode 'true)
(push #t))
; FALSE : push a false value
((eq? opcode 'false)
(push #f))
; PROC : pop stack; join the code in that value with the
; the current environment to form a closure.
((eq? opcode 'proc)
; if the top of the stack held a vector of instructions, we
; understand the procedure to start at the first instruction
; in that vector. If the TOS is an integer, we regard that as
; an index into the current instruction vector. In either
; case, what we store is a cons of the vector and the correct
; index within it.
(let* ((code (pop))
(procedure (if (vector? code)
(make-procedure env code 0)
(make-procedure env insns code))))
(push procedure)))
((eq? opcode 'promise)
;; like proc, but we create a promise instead. A
;; promise (in the simulator) is a list with a flag indicating
;; whether the value has been forced, and the code.
(let* ((code (pop))
(procedure (if (vector? code)
(make-procedure env code 0)
(make-procedure env insns code))))
(push `(*promise* #f ,procedure))))
; EXTEND n : take n items from stack and bind them in env
((eq? opcode 'extend)
(if (< n-args operand) (error "VM: too few arguments"))
(set! env (extend-environment env (pop-list operand))))
; EXTEND! : take 1 argument (guaranteed to be on stack
; and extend the environment
((eq? opcode 'extend!)
(set! env (extend-environment env (list (pop)))))
; EXTEND. n : take n items from the stack and bind them; then
; take the remaining items and bind them as a list
; (used for (lambda (u v . x) ...))
((eq? opcode 'extend.)
(if (< n-args operand) (error "VM: too few arguments"))
(let ((rest-args (pop-list (- n-args operand))))
(set! env (extend-environment env (pop-list operand)))
(adjoin-environment! env rest-args)))
; SAVE c : create a continuation for label c on the stack
((eq? opcode 'save)
(push (make-continuation operand)))
; RETURN : resume continuation under value
((eq? opcode 'return)
(return))
; POP : discard the value at the top of the stack
((eq? opcode 'pop)
(pop))
; DUP : duplicate: push the value at the top of the stack.
((eq? opcode 'dup)
(push (top)))
; TAKE n : extract n'th element of stack (zero-based count)
; and place it at the top.
((eq? opcode 'take)
(set! stack (take operand stack)))
; CC : take a continuation from the stack and replace it
; with a procedure that will resume it.
((eq? opcode 'cc)
(push (make-procedure
(extend-environment env (list stack))
(assemble
`((extend 1)
(lref 1 0)
(lref 0 0)
(resume)))
0)))
; RESUME : take a return value and a continuation from the
; stack, and resume the continuation with the given
; return value
((eq? opcode 'resume)
(let ((retval (pop))
(newstack (pop)))
(set! stack newstack)
(push retval)
(return)))
; APPLY n : pop stack; apply that proc to next n stack entries
; APPLY. : reorganize the stack from the format
; (a1 a2... rest proc)
; where a1... are individual arguments, rest is a
; list, and proc is a procedure, into the form
; (proc rest a2 a1)
; where rest is reversed and spliced in. Then proceed
; as in apply.
((or (eq? opcode 'apply)
(eq? opcode 'apply.))
(if (eq? opcode 'apply.)
(begin
(set! operand (+ (length (top)) (- n-args 2)))
(let* ((arglist (do ((i (- n-args 2) (- i 1))
(arglist (pop) (cons (pop) arglist)))
((= i 0) arglist)))
(proc (pop)))
(push-list arglist)
(push proc))))
(let ((proc (pop)))
(cond
((compiled-procedure? proc)
; A compiled procedure. Establish the environment,
; and branch to the code.
(set! env (compiled-procedure-env proc))
(set! n-args operand)
(set! insns (compiled-procedure-code proc))
(execute-instruction (compiled-procedure-start proc)))
((procedure? proc)
; A primitive procedure.
; Collect the indicated number of arguments into a list.
; We intercept certain procedures (like display and
; procedure?) where the enclosing Scheme implementation
; isn't quite what we want.
(let ((arglist (pop-list operand))
(continuation (pop))
(the-proc (remap-sim-procedure proc)))
(push (apply the-proc arglist))
; primitive procedures aren't implemented in the virtual
; machine so they have no RETURN instruction at the end;
; we perform it here.
(resume continuation)))
(else
(display "-->") (display proc) (display "<--\n")
(error "can't apply that.")))))
;; SUBR f n : pop n entries from stack and apply primitive
;; procedure f to those arguments. Since the
;; call has been coded as a subr, this means
;; the compiler knows the function is primitive
;; and isn't expecting us to pop a continuation
;; and resume it.
((eq? opcode 'subr)
(let* ((arglist (pop-list operand2))
(real-proc (eval operand))
(the-proc (remap-sim-procedure real-proc)))
(push (apply the-proc arglist))))
;; <INLINE-FUNCTION> n : we inline certian functions (like car)
;; as opocdes. The one operand is the
;; number of arguments on the stack. From
;; the simulator's point of view, this
;; is simply a rearrangement of SUBR above,
;; but in the C VM, procedures like this
;; are handled in the VM's internal loop.
((memq opcode *inline-procedures*)
(let* ((arglist (pop-list operand))
(real-proc (eval opcode))
(the-proc (remap-sim-procedure real-proc)))
(display* "opcode=" opcode ", n=" operand ", alist=" arglist ", value=" (apply the-proc arglist) "\n")
(push (apply the-proc arglist))))
(else
(display opcode)
(error "bad opcode"))))
; Advance pc and continue.
(execute-instruction (+ pc 1)))))))
;; --------------------------
;; INTERFACE TO THE SIMULATOR
;; --------------------------
;; run an expression in the simulator's execution path
(define (sim-run exp)
(sim-execute (link (compile exp))))
;; load a file via the simulator's execution path
;; essentially this is a REPL into sim-run
(define (sim-load file)
(let ((input (open-input-file file)))
(do ((form (read input) (read input)))
((eof-object? form) 'ok)
(sim-run form))))
(sim-load "library.scm")
(set-global-var! 'apply
(make-procedure
(make-empty-environment)
(assemble `((apply.)))
0))
(set-global-var! 'call-with-current-continuation
(make-procedure
(make-empty-environment)
(assemble `((extend 1)
(cc)
(lref 0 0)
(apply 1)))
0))
(sim-run '(define (force promise)
(if (not (eq? (car promise) '*promise*))
(error "can't force that")
(if (cadr promise)
(caddr promise)
(let ((putative-value ((caddr promise))))
(if (cadr promise)
(caddr promise)
(begin
(set-car! (cdr promise) #t)
(set-car! (cddr promise) putative-value)
putative-value)))))))
;; ============
;; DISASSEMBLER
;; ============
;;
;; The disassembler just pretty-prints the output of the compile step
;; above (which produces code in the form of a tree of vectors). This
;; procedure uses indentation to display the internal procedures.
(define (sim-disassemble proc)
(define (dis insns indent)
(let loop ((rest insns))
(if (not (null? rest))
(let* ((insn (car rest))
(opcode (car insn))
(opnd1 (and (not (null? (cdr insn))) (cadr insn)))
(opnd2 (and opnd1 (not (null? (cddr insn))) (caddr insn))))
(if (eq? opcode 'code)
(dis (vector->list opnd1) (string-append " " indent))
(begin
(display indent) (display opcode)
(if opnd1 (begin (display "\t") (display opnd1)
(if opnd2 (begin (display ",")
(display opnd2)))))))
(newline)
(loop (cdr rest))))))
(cond
((symbol? proc) ; disassemble a function known to the global environment
(dis (vector->list (caddr (assq proc global-env))) ""))
((vector? proc) ; disassemble a vector of instructions
(dis (vector->list proc) ""))
(else
(error "don't know how to disassemble that."))))