434 lines
12 KiB
Scheme
434 lines
12 KiB
Scheme
; -*- Mode: Scheme; Syntax: Scheme; Package: Scheme; -*-
|
|
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
|
|
|
|
|
|
; A little Scheme reader.
|
|
|
|
; Nonstandard things used:
|
|
; Ascii stuff: char->ascii, ascii->char, ascii-whitespaces, ascii-limit
|
|
; (for dispatch table; portable definitions in alt/ascii.scm)
|
|
; reverse-list->string -- ok to define as follows:
|
|
; (define (reverse-list->string l n)
|
|
; (list->string (reverse l)))
|
|
; make-immutable! -- ok to define as follows:
|
|
; (define (make-immutable! x) x)
|
|
; signal (only for use by reading-error; easily excised)
|
|
|
|
;scsh start
|
|
|
|
(define preferred-case (lambda (x) x))
|
|
|
|
(define (script-skip c port)
|
|
(read-char port)
|
|
(let lp ((state 0))
|
|
(let ((advance-if (lambda (look-for)
|
|
(let ((c (read-char port)))
|
|
(if (eof-object? c)
|
|
(reading-error port
|
|
"EOF inside block comment -- #! missing a closing !#")
|
|
(lp (cond ((char=? c look-for) (+ state 1))
|
|
((char=? c #\newline) 1)
|
|
((char=? c cr) state)
|
|
(else 0))))))))
|
|
(case state
|
|
((0) (advance-if #\newline))
|
|
((1) (advance-if #\!)) ; Found \n
|
|
((2) (advance-if #\#)) ; Found \n!
|
|
((3) (advance-if #\newline)) ; Found \n!#
|
|
((4) (read port))
|
|
(else
|
|
(reading-error port "case other")))))) ; Found \n!#\n -- done.
|
|
; was sub-read ^
|
|
|
|
|
|
(define (multi-line-comment-skip c port)
|
|
(read-char port)
|
|
(let lp ((state 0) (nested? #f))
|
|
(let* ((advance-one-of-two
|
|
(lambda (look-for1 state1 look-for2 state2 nested?)
|
|
(let ((c (read-char port)))
|
|
(if (eof-object? c)
|
|
(error
|
|
"EOF inside block comment -- #| missing a closing |#")
|
|
(lp (cond ((char=? c look-for1) state1)
|
|
((char=? c look-for2) state2)
|
|
(else 0)) nested?)))))
|
|
(advance-if (lambda (look-for state nested?)
|
|
(advance-one-of-two look-for state
|
|
look-for state
|
|
nested?))))
|
|
(case state
|
|
((0) (advance-one-of-two #\| 1 #\# 5 nested?))
|
|
((1) (advance-if #\# 2 nested?))
|
|
((2) (if nested? #f (sub-read port)))
|
|
((5) (advance-if #\| 6 nested?))
|
|
((6) (lp 0 #t) (lp 0 nested?))))))
|
|
|
|
|
|
; scsh stop
|
|
|
|
(define (read . port-option)
|
|
(let ((port (input-port-option port-option)))
|
|
(let loop ()
|
|
(let ((form (sub-read port)))
|
|
(cond ((not (reader-token? form)) form)
|
|
((eq? form close-paren)
|
|
;; Too many right parens.
|
|
(warn "discarding extraneous right parenthesis")
|
|
(loop))
|
|
(else
|
|
(reading-error port (cdr form))))))))
|
|
|
|
(define (sub-read-carefully port)
|
|
(let ((form (sub-read port)))
|
|
(cond ((eof-object? form)
|
|
(reading-error port "unexpected end of file"))
|
|
((reader-token? form) (reading-error port (cdr form)))
|
|
(else form))))
|
|
|
|
(define reader-token-marker (list 'reader-token))
|
|
(define (make-reader-token message) (cons reader-token-marker message))
|
|
(define (reader-token? form)
|
|
(and (pair? form) (eq? (car form) reader-token-marker)))
|
|
|
|
(define close-paren (make-reader-token "unexpected right parenthesis"))
|
|
(define dot (make-reader-token "unexpected \" . \""))
|
|
|
|
|
|
; Main dispatch
|
|
|
|
(define (sub-read port)
|
|
(let ((c (read-char port)))
|
|
(if (eof-object? c)
|
|
c
|
|
((vector-ref read-dispatch-vector (char->ascii c))
|
|
c port))))
|
|
|
|
(define read-dispatch-vector
|
|
(make-vector ascii-limit
|
|
(lambda (c port)
|
|
(reading-error port "illegal character read" c))))
|
|
|
|
(define read-terminating?-vector
|
|
(make-vector ascii-limit #t))
|
|
|
|
(define (set-standard-syntax! char terminating? reader)
|
|
(vector-set! read-dispatch-vector (char->ascii char) reader)
|
|
(vector-set! read-terminating?-vector (char->ascii char) terminating?))
|
|
|
|
(let ((sub-read-whitespace
|
|
(lambda (c port)
|
|
c ;ignored
|
|
(sub-read port))))
|
|
(for-each (lambda (c)
|
|
(vector-set! read-dispatch-vector c sub-read-whitespace))
|
|
ascii-whitespaces))
|
|
|
|
(let ((sub-read-constituent
|
|
(lambda (c port)
|
|
(parse-token (sub-read-token c port) port))))
|
|
(for-each (lambda (c)
|
|
(set-standard-syntax! c #f sub-read-constituent))
|
|
(string->list
|
|
(string-append "!$%&*+-./0123456789:<=>?@^_~ABCDEFGHIJKLM"
|
|
"NOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))))
|
|
|
|
; Usual read macros
|
|
|
|
(define (set-standard-read-macro! c terminating? proc)
|
|
(set-standard-syntax! c terminating? proc))
|
|
|
|
(define (sub-read-list c port)
|
|
(let ((form (sub-read port)))
|
|
(if (eq? form dot)
|
|
(reading-error port
|
|
"missing car -- ( immediately followed by .")
|
|
(let recur ((form form))
|
|
(cond ((eof-object? form)
|
|
(reading-error port
|
|
"end of file inside list -- unbalanced parentheses"))
|
|
((eq? form close-paren) '())
|
|
((eq? form dot)
|
|
(let* ((last-form (sub-read-carefully port))
|
|
(another-form (sub-read port)))
|
|
(if (eq? another-form close-paren)
|
|
last-form
|
|
(reading-error port
|
|
"randomness after form after dot"
|
|
another-form))))
|
|
(else
|
|
(cons form (recur (sub-read port)))))))))
|
|
|
|
(set-standard-read-macro! #\( #t sub-read-list)
|
|
|
|
(set-standard-read-macro! #\) #t
|
|
(lambda (c port)
|
|
c port
|
|
close-paren))
|
|
|
|
(set-standard-read-macro! #\' #t
|
|
(lambda (c port)
|
|
c
|
|
(list 'quote (sub-read-carefully port))))
|
|
|
|
(set-standard-read-macro! #\` #t
|
|
(lambda (c port)
|
|
c
|
|
(list 'quasiquote (sub-read-carefully port))))
|
|
|
|
(set-standard-read-macro! #\, #t
|
|
(lambda (c port)
|
|
c
|
|
(let* ((next (peek-char port))
|
|
;; DO NOT beta-reduce!
|
|
(keyword (cond ((eof-object? next)
|
|
(reading-error port "end of file after ,"))
|
|
((char=? next #\@)
|
|
(read-char port)
|
|
'unquote-splicing)
|
|
(else 'unquote))))
|
|
(list keyword
|
|
(sub-read-carefully port)))))
|
|
|
|
;(set-standard-read-macro! #\" #t
|
|
; (lambda (c port)
|
|
; c ;ignored
|
|
; (let loop ((l '()) (i 0))
|
|
; (let ((c (read-char port)))
|
|
; (cond ((eof-object? c)
|
|
; (reading-error port "end of file within a string"))
|
|
; ((char=? c #\\)
|
|
; (let ((c (read-char port)))
|
|
; (cond ((eof-object? c)
|
|
; (reading-error port "end of file within a string"))
|
|
; ((or (char=? c #\\) (char=? c #\"))
|
|
; (loop (cons c l) (+ i 1)))
|
|
; (else
|
|
; (reading-error port
|
|
; "invalid escaped character in string"
|
|
; c)))))
|
|
; ((char=? c #\")
|
|
; (reverse-list->string l i))
|
|
; (else
|
|
; (loop (cons c l) (+ i 1))))))))
|
|
|
|
(set-standard-read-macro! #\; #t
|
|
(lambda (c port)
|
|
c ;ignored
|
|
(gobble-line port)
|
|
(sub-read port)))
|
|
|
|
(define (gobble-line port)
|
|
(let loop ()
|
|
(let ((c (read-char port)))
|
|
(cond ((eof-object? c) c)
|
|
((char=? c #\newline) #f)
|
|
(else (loop))))))
|
|
|
|
(define *sharp-macros* '())
|
|
|
|
(define (define-sharp-macro c proc)
|
|
(set! *sharp-macros* (cons (cons c proc) *sharp-macros*)))
|
|
|
|
(set-standard-read-macro! #\# #f
|
|
(lambda (c port)
|
|
c ;ignored
|
|
(let* ((c (peek-char port))
|
|
(c (if (eof-object? c)
|
|
(reading-error port "end of file after #")
|
|
(char-downcase c)))
|
|
(probe (assq c *sharp-macros*)))
|
|
(if probe
|
|
((cdr probe) c port)
|
|
(reading-error port "unknown # syntax" c)))))
|
|
|
|
(define-sharp-macro #\f
|
|
(lambda (c port) (read-char port) #f))
|
|
|
|
(define-sharp-macro #\t
|
|
(lambda (c port) (read-char port) #t))
|
|
|
|
(define-sharp-macro #\\
|
|
(lambda (c port)
|
|
(read-char port)
|
|
(let ((c (peek-char port)))
|
|
(cond ((eof-object? c)
|
|
(reading-error port "end of file after #\\"))
|
|
((char-alphabetic? c)
|
|
(let ((name (sub-read-carefully port)))
|
|
(cond ((= (string-length (symbol->string name)) 1)
|
|
c)
|
|
((assq name '((space #\space)
|
|
(newline #\newline)))
|
|
=> cadr)
|
|
(else
|
|
(reading-error port "unknown #\\ name" name)))))
|
|
(else
|
|
(read-char port)
|
|
c)))))
|
|
|
|
(define-sharp-macro #\(
|
|
(lambda (c port)
|
|
(read-char port)
|
|
(list->vector (sub-read-list c port))))
|
|
|
|
(let ((number-sharp-macro
|
|
(lambda (c port)
|
|
(let ((string (sub-read-token #\# port)))
|
|
(or (string->number string)
|
|
(reading-error port "unsupported number syntax" string))))))
|
|
(for-each (lambda (c)
|
|
(define-sharp-macro c number-sharp-macro))
|
|
'(#\b #\o #\d #\x #\i #\e)))
|
|
|
|
(define-sharp-macro #\! script-skip)
|
|
|
|
(define-sharp-macro #\| multi-line-comment-skip)
|
|
|
|
; Tokens
|
|
|
|
(define (sub-read-token c port)
|
|
(let loop ((l (list (preferred-case c))) (n 1))
|
|
(let ((c (peek-char port)))
|
|
(cond ((or (eof-object? c)
|
|
(vector-ref read-terminating?-vector (char->ascii c)))
|
|
(reverse-list->string l n))
|
|
(else
|
|
(read-char port)
|
|
(loop (cons (preferred-case c) l)
|
|
(+ n 1)))))))
|
|
|
|
;(define (parse-token string port)
|
|
; (if (let ((c (string-ref string 0)))
|
|
; (or (char-numeric? c) (char=? c #\+) (char=? c #\-) (char=? c #\.)))
|
|
; (cond ((string->number string))
|
|
; ((member string strange-symbol-names)
|
|
; (string->symbol (make-immutable! string)))
|
|
; ((string=? string ".")
|
|
; dot)
|
|
; (else
|
|
; (reading-error port "unsupported number syntax" string)))
|
|
; (string->symbol (make-immutable! string))))
|
|
|
|
; scsh start
|
|
(define (parse-token string port)
|
|
(if (let ((c (string-ref string 0)))
|
|
(or (char-numeric? c) (char=? c #\+) (char=? c #\-) (char=? c #\.)))
|
|
(cond ((string->number string))
|
|
((string=? string ".") dot)
|
|
(else (string->symbol (make-immutable! string))))
|
|
(string->symbol (make-immutable! string))))
|
|
|
|
(set-standard-syntax! #\| #f
|
|
(lambda (c port)
|
|
(parse-token (sub-read-token c port) port)))
|
|
|
|
|
|
|
|
(define bel (ascii->char 7))
|
|
(define bs (ascii->char 8))
|
|
(define ff (ascii->char 12))
|
|
(define cr (ascii->char 13))
|
|
(define ht (ascii->char 9))
|
|
(define vt (ascii->char 11))
|
|
|
|
;;; Full ANSI C strings:
|
|
;;; - read as themselves: \\ \? \" \'
|
|
;;; - control chars:
|
|
;;; \a alert (bell -- ^g)
|
|
;;; \b backspace (^h)
|
|
;;; \f form feed (^l)
|
|
;;; \n newline (^j)
|
|
;;; \r carriage return (^m)
|
|
;;; \t tab (^i)
|
|
;;; \v vertical tab (^k)
|
|
;;; - octal escapes \nnn
|
|
;;; - hex escapes \xnn
|
|
|
|
;;; Is this the elegant thing to do? Too much might make it hard to shift
|
|
;;; to Unicode implementations. How about \^g for embedding control chars?
|
|
;;; And I haven't done anything about chars (as opposed to strings).
|
|
|
|
(set-standard-read-macro! #\" #t
|
|
(lambda (c port)
|
|
c ;ignored
|
|
(let* ((readc (lambda ()
|
|
(let ((c (read-char port)))
|
|
(if (eof-object? c)
|
|
(reading-error port "end of file within a string")
|
|
c))))
|
|
(read-digit (lambda (base base-name)
|
|
(let* ((c (readc))
|
|
(d (- (char->ascii c) (char->ascii #\0))))
|
|
(if (and (<= 0 d) (< d base)) d
|
|
(reading-error port
|
|
(string-append "invalid "
|
|
base-name
|
|
" code in string.")
|
|
d))))))
|
|
|
|
(let loop ((l '()) (i 0))
|
|
(let ((c (readc)))
|
|
(cond ((char=? c #\\)
|
|
(let* ((c (readc))
|
|
(rc (case c
|
|
((#\\ #\" #\? #\') c)
|
|
((#\a) bel)
|
|
((#\b) bs)
|
|
((#\f) ff)
|
|
((#\n) #\newline)
|
|
((#\r) cr)
|
|
((#\t) ht)
|
|
((#\v) vt)
|
|
((#\0 #\1 #\2 #\3)
|
|
(let* ((d1 (- (char->ascii c) (char->ascii #\0)))
|
|
(d2 (read-digit 8 "octal"))
|
|
(d3 (read-digit 8 "octal")))
|
|
(ascii->char (+ (* 64 d1) (+ (* 8 d2) d3)))))
|
|
((#\x)
|
|
(let ((d1 (read-digit 16 "hex"))
|
|
(d2 (read-digit 16 "hex")))
|
|
(ascii->char (+ (* 16 d1) d2))))
|
|
(else
|
|
(reading-error port
|
|
"invalid escapedcharacter in string"
|
|
c)))))
|
|
(loop (cons rc l) (+ i 1))))
|
|
((char=? c #\")
|
|
(reverse-list->string l i))
|
|
(else
|
|
(loop (cons c l) (+ i 1)))))))))
|
|
|
|
;scsh stop
|
|
|
|
(define strange-symbol-names
|
|
'("+" "-" "..."
|
|
"1+" "-1+" ;Only for S&ICP support
|
|
"->" ;Only for JAR's thesis
|
|
))
|
|
|
|
;--- This loses because the compiler won't in-line it. Hacked by hand
|
|
; because it is in READ's inner loop.
|
|
;(define preferred-case
|
|
; (if (char=? (string-ref (symbol->string 't) 0) #\T)
|
|
; char-upcase
|
|
; char-downcase))
|
|
|
|
(define p-c-v (make-string ascii-limit #\0))
|
|
|
|
(let ((p-c (if (char=? (string-ref (symbol->string 't) 0) #\T)
|
|
char-upcase
|
|
char-downcase)))
|
|
(do ((i 0 (+ i 1)))
|
|
((>= i ascii-limit))
|
|
(string-set! p-c-v i (p-c (ascii->char i)))))
|
|
|
|
;(define (preferred-case c)
|
|
; (string-ref p-c-v (char->ascii c)))
|
|
|
|
; Reader errors
|
|
|
|
(define (reading-error port message . irritants)
|
|
(apply signal 'read-error message
|
|
(append irritants (list port))))
|