;; -*- mode: scheme; coding: utf-8 -*- ;; Copyright © 2017, 2018, 2019 G. Weinholt ;; SPDX-License-Identifier: MIT ;; Permission is hereby granted, free of charge, to any person obtaining a ;; copy of this software and associated documentation files (the "Software"), ;; to deal in the Software without restriction, including without limitation ;; the rights to use, copy, modify, merge, publish, distribute, sublicense, ;; and/or sell copies of the Software, and to permit persons to whom the ;; Software is furnished to do so, subject to the following conditions: ;; The above copyright notice and this permission notice shall be included in ;; all copies or substantial portions of the Software. ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ;; DEALINGS IN THE SOFTWARE. #!r6rs ;; RnRS lexer and reader with source annotations. ;; Incomplete but useful list of lexical differences in R7RS: ;; https://github.com/larcenists/larceny/wiki/R7RSconversion (library (laesare reader) (export get-token read-annotated read-datum detect-scheme-file-type reader? make-reader reader-warning reader-port reader-mode reader-mode-set! reader-fold-case? reader-fold-case?-set! reader-tolerant? reader-tolerant?-set! reader-line reader-column reader-saved-line reader-saved-column annotation? annotation-expression annotation-stripped annotation-source annotation-source->condition source-condition? source-filename source-line source-column) (import (rnrs arithmetic fixnums (6)) (rnrs base (6)) (rnrs bytevectors (6)) (rnrs conditions (6)) (rnrs control (6)) (rnrs exceptions (6)) (rnrs hashtables (6)) (rnrs lists (6)) (rnrs mutable-pairs (6)) ;for #= (prefix (only (rnrs io ports (6)) lookahead-char get-char put-char eof-object? call-with-string-output-port) rnrs:) (only (rnrs io simple (6)) write display newline current-error-port) ;debugging (rnrs records syntactic (6)) (rnrs unicode (6))) (define eof-object? rnrs:eof-object?) ;; Peek at the next char from the reader. (define (lookahead-char reader) (rnrs:lookahead-char (reader-port reader))) ;; Get a char from the reader. (define (get-char reader) (let ((c (rnrs:get-char (reader-port reader)))) (when (eqv? c #\linefeed) (reader-line-set! reader (+ (reader-line reader) 1)) (reader-column-set! reader -1)) (reader-column-set! reader (+ (reader-column reader) 1)) c)) ;; Detects the (intended) type of Scheme source: r6rs-library, ;; r6rs-program, empty or unknown. (define (detect-scheme-file-type port) (let ((reader (make-reader port ""))) (let-values (((type lexeme) (get-lexeme reader))) (case type ((eof) 'empty) ((shebang) 'r6rs-program) ((openp openb) ;a pair (let-values (((type lexeme) (get-lexeme reader))) (case type ((identifier) (case lexeme ((import) 'r6rs-program) ((library) 'r6rs-library) ((define-library) 'r7rs-library) (else 'unknown))) (else 'unknown)))) (else 'unknown))))) (define-record-type reader (fields port filename (mutable line) (mutable column) (mutable saved-line) (mutable saved-column) (mutable fold-case?) ;boolean (mutable mode) ;a symbol: rnrs, r5rs, r6rs, r7rs (mutable tolerant?)) ;tolerant to errors? (sealed #t) (opaque #f) (nongenerative reader-v0-eec5b78f-a766-4be4-9cd0-fbb52ec572dc) (protocol (lambda (p) (lambda (port filename) (p port filename 1 0 1 0 #f 'rnrs #f))))) (define (reader-mark reader) (reader-saved-line-set! reader (reader-line reader)) (reader-saved-column-set! reader (reader-column reader))) ;; As wanted by psyntax (define-record-type annotation (fields expression source stripped) (sealed #t) (opaque #f) (nongenerative annotation-v0-dc9637b3-85e8-4599-9fe9-151508e9c850)) (define-condition-type &source-information &condition make-source-condition source-condition? (file-name source-filename) (line source-line) (column source-column)) (define (annotation-source->condition x) (if (vector? x) (apply make-source-condition (vector->list x)) (condition))) (define (reader-source reader) (vector (reader-filename reader) (reader-saved-line reader) (reader-saved-column reader))) (define (annotate source stripped datum) #;(assert (reader? reader)) (assert (vector? source)) (make-annotation datum source stripped)) (define (read-annotated reader) (assert (reader? reader)) (let ((labels (make-labels))) (let*-values (((type x) (get-lexeme reader)) ((_ d^) (handle-lexeme reader type x labels #f))) (resolve-labels reader labels) d^))) (define (read-datum reader) (assert (reader? reader)) (let ((labels (make-labels))) (let*-values (((type x) (get-lexeme reader)) ((d _) (handle-lexeme reader type x labels #f))) (resolve-labels reader labels) d))) ;;; Lexeme reader (define (lexical-condition reader msg irritants) (condition (make-lexical-violation) (make-message-condition msg) (make-source-condition (reader-filename reader) (reader-saved-line reader) (reader-saved-column reader)) (make-irritants-condition irritants))) (define (reader-error reader msg . irritants) ;; Non-recoverable errors. (raise (lexical-condition reader msg irritants))) (define (reader-warning reader msg . irritants) ;; Recoverable if the reader is in tolerant mode. (if (reader-tolerant? reader) (raise-continuable (condition (make-warning) (lexical-condition reader msg irritants))) (apply reader-error reader msg irritants))) (define (assert-mode p msg modes) (unless (memq (reader-mode p) modes) (reader-warning p (string-append msg " is not allowed in this mode") (reader-mode p)))) (define (eof-warning reader) (reader-warning reader "Unexpected EOF")) (define (unicode-scalar-value? sv) (and (fixnum? sv) (fx<=? 0 sv #x10FFFF) (not (fx<=? #xD800 sv #xDFFF)))) (define (char-delimiter? reader c) ;; Treats the eof-object as a delimiter (or (eof-object? c) (char-whitespace? c) (case (reader-mode reader) ((r6rs) (memv c '(#\( #\) #\[ #\] #\" #\; #\#))) ((r7rs) (memv c '(#\( #\) #\" #\; #\|))) (else (memv c '(#\( #\) #\[ #\] #\" #\; #\# #\|)))))) ;; Get a line from the reader. (define (get-line reader) (rnrs:call-with-string-output-port (lambda (out) (do ((c (get-char reader) (get-char reader))) ((or (eqv? c #\linefeed) (eof-object? c))) (rnrs:put-char out c))))) ;; Gets whitespace from the reader. (define (get-whitespace reader char) (rnrs:call-with-string-output-port (lambda (out) (let lp ((char char)) (rnrs:put-char out char) (let ((char (lookahead-char reader))) (when (and (char? char) (char-whitespace? char)) (lp (get-char reader)))))))) ;; Get an inline hex escape (escaped character inside an identifier). (define (get-inline-hex-escape p) (reader-mark p) (let lp ((digits '())) (let ((c (get-char p))) (cond ((eof-object? c) (eof-warning p) #\xFFFD) ((or (char<=? #\0 c #\9) (char<=? #\a c #\f) (char<=? #\A c #\F)) (lp (cons c digits))) ((and (char=? c #\;) (pair? digits)) (let ((sv (string->number (list->string (reverse digits)) 16))) (cond ((unicode-scalar-value? sv) (integer->char sv)) (else (reader-warning p "Inline hex escape outside valid range" sv) #\xFFFD)))) (else (reader-warning p "Invalid inline hex escape" c) #\xFFFD))))) (define (get-identifier p initial-char pipe-quoted?) (let lp ((chars (if initial-char (list initial-char) '()))) (let ((c (lookahead-char p))) (cond ((and (char? c) (or (char<=? #\a c #\z) (char<=? #\A c #\Z) (char<=? #\0 c #\9) (memv c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\^ #\_ #\~ #\+ #\- #\. #\@)) (and (> (char->integer c) 127) (memq (char-general-category c) ;XXX: could be done faster '(Lu Ll Lt Lm Lo Mn Nl No Pd Pc Po Sc Sm Sk So Co Nd Mc Me))) (and (memv (reader-mode p) '(rnrs r7rs)) (memv c '(#\x200C #\x200D))))) (lp (cons (get-char p) chars))) ((and pipe-quoted? (char? c) (not (memv c '(#\| #\\)))) (lp (cons (get-char p) chars))) ((or (char-delimiter? p c) (and pipe-quoted? (eqv? c #\|))) (when (eqv? c #\|) (get-char p)) (let ((id (list->string (reverse chars)))) (if (reader-fold-case? p) (values 'identifier (string->symbol (string-foldcase id))) (values 'identifier (string->symbol id))))) ((char=? c #\\) ;\xUUUU; (get-char p) ;consume #\\ (let ((c (get-char p))) ;should be #\x (cond ((eqv? c #\x) (lp (cons (get-inline-hex-escape p) chars))) ((and pipe-quoted? (assv c '((#\" . #\") (#\\ . #\\) (#\a . #\alarm) (#\b . #\backspace) (#\t . #\tab) (#\n . #\linefeed) (#\r . #\return) (#\| . #\|)))) => (lambda (c) (lp (cons (cdr c) chars)))) (else (if (eof-object? c) (eof-warning p) (reader-warning p "Invalid character following \\")) (lp chars))))) (else (reader-warning p "Invalid character in identifier" c) (get-char p) (lp chars)))))) ;; Get a number from the reader. (define (get-number p initial-chars) (let lp ((chars initial-chars)) (let ((c (lookahead-char p))) (cond ((and (not (eqv? c #\#)) (char-delimiter? p c)) ;; TODO: some standard numbers are not supported ;; everywhere, should use a number lexer. (let ((str (list->string (reverse chars)))) (cond ((string->number str) => (lambda (num) (values 'value num))) ((and (memq (reader-mode p) '(rnrs r7rs)) ;; TODO: This is incomplete. (not (and (pair? initial-chars) (char<=? #\0 (car initial-chars) #\9)))) (values 'identifier (string->symbol str))) (else (reader-warning p "Invalid number syntax" str) (values 'identifier (string->symbol str)))))) (else (lp (cons (get-char p) chars))))))) ;; Get a string datum from the reader. (define (get-string p) (let lp ((chars '())) (let ((c (lookahead-char p))) (cond ((eof-object? c) (eof-warning p) c) ((char=? c #\") (get-char p) (list->string (reverse chars))) ((char=? c #\\) ;escapes (get-char p) ;consume #\\ (let ((c (lookahead-char p))) (cond ((eof-object? c) (eof-warning p) c) ((or (memv c '(#\tab #\linefeed #\x85 #\x2028)) (eq? (char-general-category c) 'Zs)) ;; \* ;; * (letrec ((skip-intraline-whitespace* (lambda () (let ((c (lookahead-char p))) (cond ((eof-object? c) (eof-warning p) c) ((or (char=? c '#\tab) (eq? (char-general-category c) 'Zs)) (get-char p) (skip-intraline-whitespace*)))))) (skip-newline (lambda () (let ((c (get-char p))) ;; XXX: it appears that the port ;; transcoder is meant to ;; replace all these linefeeds ;; with #\linefeed. (cond ((eof-object? c) c) ((memv c '(#\linefeed #\x85 #\x2028))) ((char=? c #\return) (when (memv (lookahead-char p) '(#\linefeed #\x85)) (get-char p))) (else (reader-warning p "Expected a line ending" c))))))) (skip-intraline-whitespace*) (skip-newline) (skip-intraline-whitespace*) (lp chars))) (else (lp (cons (case (get-char p) ((#\") #\") ((#\\) #\\) ((#\a) #\alarm) ((#\b) #\backspace) ((#\t) #\tab) ((#\n) #\linefeed) ((#\v) (assert-mode p "\\v" '(rnrs r6rs)) #\vtab) ((#\f) (assert-mode p "\\f" '(rnrs r6rs)) #\page) ((#\r) #\return) ((#\|) (assert-mode p "\\|" '(rnrs r7rs)) #\|) ((#\x) (get-inline-hex-escape p)) (else (reader-warning p "Invalid escape in string" c) #\xFFFD)) chars)))))) (else (lp (cons (get-char p) chars))))))) ;; Gets a nested comment from the reader. (define (get-nested-comment reader) ;; The reader is immediately after "#|". (rnrs:call-with-string-output-port (lambda (out) (let lp ((levels 1) (c0 (get-char reader))) (let ((c1 (get-char reader))) (cond ((eof-object? c0) (eof-warning reader)) ((and (eqv? c0 #\|) (eqv? c1 #\#)) (unless (eqv? levels 1) (rnrs:put-char out c0) (rnrs:put-char out c1) (lp (- levels 1) (get-char reader)))) ((and (eqv? c0 #\#) (eqv? c1 #\|)) (rnrs:put-char out c0) (rnrs:put-char out c1) (lp (+ levels 1) (get-char reader))) (else (rnrs:put-char out c0) (lp levels c1)))))))) ;; Gets a #! !# comment from the reader. (define (get-!-comment reader) ;; The reader is immediately after "#!". (rnrs:call-with-string-output-port (lambda (out) (let lp ((c0 (get-char reader))) (let ((c1 (get-char reader))) (cond ((eof-object? c0) (eof-warning reader)) ((and (eqv? c0 #\!) (eqv? c1 #\#)) #f) (else (rnrs:put-char out c0) (lp c1)))))))) ;; Get a comment from the reader (including the terminating whitespace). (define (get-comment reader) ;; The reader is immediately after #\;. (rnrs:call-with-string-output-port (lambda (out) (let lp () (let ((c (get-char reader))) (unless (eof-object? c) (rnrs:put-char out c) (cond ((memv c '(#\linefeed #\x85 #\x2028 #\x2029))) ((char=? c #\return) ;; Weird line ending. This lookahead is what forces ;; the procedure to include the terminator. (when (memv (lookahead-char reader) '(#\linefeed #\x85)) (rnrs:put-char out (get-char reader)))) (else (lp))))))))) ;; Whitespace and comments can appear anywhere. (define (atmosphere? type) (memq type '(directive whitespace comment inline-comment nested-comment))) ;; Get the next lexeme from the reader, ignoring anything that is ;; like a comment. (define (get-lexeme p) (let-values (((type lexeme) (get-token p))) (if (atmosphere? type) (get-lexeme p) (values type lexeme)))) ;; Get the next token. Can be a lexeme, directive, whitespace or comment. (define (get-token p) (assert (reader? p)) (reader-mark p) (let ((c (get-char p))) (cond ((eof-object? c) (values 'eof c)) ((char-whitespace? c) (values 'whitespace (get-whitespace p c))) ((char=? c #\;) ;a comment like this one (values 'comment (get-comment p))) ((char=? c #\#) ;the mighty octothorpe (let ((c (get-char p))) (case c ((#\() (values 'vector #f)) ((#\') (values 'abbrev 'syntax)) ((#\`) (values 'abbrev 'quasisyntax)) ((#\,) (case (lookahead-char p) ((#\@) (get-char p) (values 'abbrev 'unsyntax-splicing)) (else (values 'abbrev 'unsyntax)))) ((#\v) ;r6rs (let* ((c1 (and (eqv? (lookahead-char p) #\u) (get-char p))) (c2 (and (eqv? c1 #\u) (eqv? (lookahead-char p) #\8) (get-char p))) (c3 (and (eqv? c2 #\8) (eqv? (lookahead-char p) #\() (get-char p)))) (cond ((and (eqv? c1 #\u) (eqv? c2 #\8) (eqv? c3 #\()) (assert-mode p "#vu8(" '(rnrs r6rs)) (values 'bytevector #f)) (else (reader-warning p "Expected #vu8(") (get-token p))))) ((#\u #\U) ;r7rs (let* ((c1 (and (eqv? (lookahead-char p) #\8) (get-char p))) (c2 (and (eqv? c1 #\8) (eqv? (lookahead-char p) #\() (get-char p)))) (cond ((and (eqv? c1 #\8) (eqv? c2 #\()) (assert-mode p "#u8(" '(rnrs r7rs)) (values 'bytevector #f)) (else (reader-warning p "Expected #u8(") (get-token p))))) ((#\;) ;s-expr/datum comment (let lp ((atmosphere '())) (let-values (((type token) (get-token p))) (cond ((eq? type 'eof) (eof-warning p) (values 'inline-comment (cons (reverse atmosphere) p))) ((atmosphere? type) (lp (cons (cons type token) atmosphere))) (else (let-values ([(d _) (handle-lexeme p type token #f #t)]) (values 'inline-comment (cons (reverse atmosphere) d)))))))) ((#\|) ;nested comment (values 'nested-comment (get-nested-comment p))) ((#\!) ;#!r6rs etc (let ((next-char (lookahead-char p))) (cond ((and (= (reader-saved-line p) 1) (memv next-char '(#\/ #\space))) (let ((line (reader-saved-line p)) (column (reader-saved-column p))) (values 'shebang `(,line ,column ,(get-line p))))) ((and (char? next-char) (char-alphabetic? next-char)) (let-values (((type id) (get-token p))) (cond ((eq? type 'identifier) (case id ((r6rs) ;r6rs.pdf (assert-mode p "#!r6rs" '(rnrs r6rs)) (reader-mode-set! p 'r6rs)) ((fold-case) ;r6rs-app.pdf (assert-mode p "#!fold-case" '(rnrs r6rs r7rs)) (reader-fold-case?-set! p #t)) ((no-fold-case) ;r6rs-app.pdf (assert-mode p "#!no-fold-case" '(rnrs r6rs r7rs)) (reader-fold-case?-set! p #f)) ((r7rs) ;oddly missing in r7rs (assert-mode p "#!r7rs" '(rnrs)) (reader-mode-set! p 'r7rs)) ((false) ;r2rs (assert-mode p "#!false" '(rnrs r2rs))) ((true) ;r2rs (assert-mode p "#!true" '(rnrs r2rs))) (else (reader-warning p "Invalid directive" type id))) (cond ((assq id '((false . #f) (true . #t))) => (lambda (x) (values 'value (cdr x)))) (else (values 'directive id)))) (else (reader-warning p "Expected an identifier after #!") (get-token p))))) ((eq? (reader-mode p) 'rnrs) ;; Guile compat. (get-token p) (values 'comment (get-!-comment p))) (else (reader-warning p "Expected an identifier after #!") (get-token p))))) ((#\b #\B #\o #\O #\d #\D #\x #\X #\i #\I #\e #\E) (get-number p (list c #\#))) ((#\t #\T) (unless (char-delimiter? p (lookahead-char p)) (if (memq (reader-mode p) '(rnrs r7rs)) (let* ((c1 (and (memv (lookahead-char p) '(#\r #\R)) (get-char p))) (c2 (and c1 (memv (lookahead-char p) '(#\u #\U)) (get-char p))) (c3 (and c2 (memv (lookahead-char p) '(#\e #\E)) (get-char p)))) (unless (and c1 c2 c3 (char-delimiter? p (lookahead-char p))) (reader-warning p "Expected #true"))) (reader-warning p "A delimiter is expected after #t"))) (values 'value #t)) ((#\f #\F) (unless (char-delimiter? p (lookahead-char p)) (if (memq (reader-mode p) '(rnrs r7rs)) (let* ((c1 (and (memv (lookahead-char p) '(#\a #\A)) (get-char p))) (c2 (and c1 (memv (lookahead-char p) '(#\l #\L)) (get-char p))) (c3 (and c2 (memv (lookahead-char p) '(#\s #\S)) (get-char p))) (c4 (and c3 (memv (lookahead-char p) '(#\e #\E)) (get-char p)))) (unless (and c1 c2 c3 c4 (char-delimiter? p (lookahead-char p))) (reader-warning p "Expected #false" c1 c2 c3 c4))) (reader-warning p "A delimiter is expected after #f"))) (values 'value #f)) ((#\\) (let lp ((char* '())) (let ((c (lookahead-char p))) (cond ((and (pair? char*) (char-delimiter? p c)) (let ((char* (reverse char*))) (cond ((null? char*) (reader-warning p "Empty character name") (values 'value #\xFFFD)) ((null? (cdr char*)) (values 'value (car char*))) ((char=? (car char*) #\x) (cond ((for-all (lambda (c) (or (char<=? #\0 c #\9) (char<=? #\a c #\f) (char<=? #\A c #\F))) (cdr char*)) (let ((sv (string->number (list->string (cdr char*)) 16))) (cond ((unicode-scalar-value? sv) (values 'value (integer->char sv))) (else (reader-warning p "Hex-escaped character outside valid range" sv) (values 'value #\xFFFD))))) (else (reader-warning p "Invalid character in hex-escaped character" (list->string (cdr char*))) (values 'value #\xFFFD)))) (else (let ((char-name (list->string char*)) (char-names '(("nul" #\nul r6rs) ("null" #\nul r7rs) ("alarm" #\alarm r6rs r7rs) ("backspace" #\backspace r6rs r7rs) ("tab" #\tab r6rs r7rs) ("linefeed" #\linefeed r6rs) ("newline" #\linefeed r5rs r6rs r7rs) ("vtab" #\vtab r6rs) ("page" #\page r6rs) ("return" #\return r6rs r7rs) ("esc" #\esc r6rs) ("escape" #\esc r7rs) ("space" #\space r5rs r6rs r7rs) ("delete" #\delete r6rs r7rs)))) (cond ((or (assoc char-name char-names) (and (reader-fold-case? p) (assoc (string-foldcase char-name) char-names))) => (lambda (char-data) (assert-mode p char-name (cons 'rnrs (cddr char-data))) (values 'value (cadr char-data)))) (else (reader-warning p "Invalid character name" char-name) (values 'value #\xFFFD)))))))) ((and (null? char*) (eof-object? c)) (eof-warning p) (values 'value #\xFFFD)) (else (lp (cons (get-char p) char*))))))) ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (assert-mode p "#= and ##" '(rnrs r7rs)) (let lp ((char* (list c))) (let ((next (lookahead-char p))) (cond ((eof-object? next) (eof-warning p) (get-char p)) ((char<=? #\0 next #\9) (lp (cons (get-char p) char*))) ((char=? next #\=) (get-char p) (values 'label (string->number (list->string (reverse char*)) 10))) ((char=? next #\#) (get-char p) (values 'reference (string->number (list->string (reverse char*)) 10))) (else (reader-warning p "Expected #= or ##" next) (get-token p)))))) (else (reader-warning p "Invalid #-syntax" c) (get-token p))))) ((char=? c #\") (values 'value (get-string p))) ((memv c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (get-number p (list c))) ((memv c '(#\- #\+)) ;peculiar identifier (cond ((and (char=? c #\-) (eqv? #\> (lookahead-char p))) ;-> (get-identifier p c #f)) ((char-delimiter? p (lookahead-char p)) (values 'identifier (if (eqv? c #\-) '- '+))) (else (get-number p (list c))))) ((char=? c #\.) ;peculiar identifier (cond ((char-delimiter? p (lookahead-char p)) (values 'dot #f)) ((and (eq? (reader-mode p) 'r6rs) (eqv? #\. (lookahead-char p))) (get-char p) ;consume second dot (unless (eqv? #\. (get-char p)) ;consume third dot (reader-warning p "Expected the ... identifier")) (unless (char-delimiter? p (lookahead-char p)) (reader-warning p "Expected the ... identifier")) (values 'identifier '...)) (else (get-number p (list c))))) ((or (char<=? #\a c #\z) (char<=? #\A c #\Z) ; and (memv c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\^ #\_ #\~)) (and (memv (reader-mode p) '(rnrs r7rs)) (or (eqv? c #\@) (memv c '(#\x200C #\x200D)))) (and (> (char->integer c) 127) (memq (char-general-category c) '(Lu Ll Lt Lm Lo Mn Nl No Pd Pc Po Sc Sm Sk So Co)))) (get-identifier p c #f)) ((char=? c #\\) ; (let ((c (get-char p))) (cond ((eqv? c #\x) (get-identifier p (get-inline-hex-escape p) #f)) (else (cond ((eof-object? c) (eof-warning p)) (else (reader-warning p "Invalid character following \\"))) (get-token p))))) (else (case c ((#\() (values 'openp #f)) ((#\)) (values 'closep #f)) ((#\[) (values 'openb #f)) ((#\]) (values 'closeb #f)) ((#\') (values 'abbrev 'quote)) ((#\`) (values 'abbrev 'quasiquote)) ((#\,) (case (lookahead-char p) ((#\@) (get-char p) (values 'abbrev 'unquote-splicing)) (else (values 'abbrev 'unquote)))) ((#\|) (assert-mode p "Quoted identifiers" '(rnrs r7rs)) (get-identifier p #f 'pipe)) (else (reader-warning p "Invalid leading character" c) (get-token p))))))) ;;; Datum reader ;; ;; | ;; | ;; | | | ;; ;; | | ;; → (*) | [*] ;; | (+ . ) | [+ . ] ;; | ;; ;; → ' | ` | , | ,@ ;; | #' | #` | #, | #,@ ;; → #(*) ;; → #vu8(*) ;; → 〈any representing an exact ;; integer in {0, ..., 255}〉 (define (get-compound-datum p src terminator type labels) (define vec #f) ;TODO: ugly, should be rewritten (define vec^ #f) (let lp ((head '()) (head^ '()) (prev #f) (prev^ #f) (len 0)) (let-values (((lextype x) (get-lexeme p))) (case lextype ((closep closeb eof) (unless (eq? lextype terminator) (if (eof-object? x) (eof-warning p) (reader-warning p "Mismatched parenthesis/brackets" lextype x terminator))) (case type ((vector) (let ((s (list->vector head)) (s^ (list->vector head^))) (set! vec s) (set! vec^ (annotate src s s^)) (values vec vec^))) ((list) (values head (annotate src head head^))) ((bytevector) (let ((s (u8-list->bytevector head))) (values s (annotate src s s)))) (else (reader-error p "Internal error in get-compound-datum" type)))) ((dot) ;a dot like in (1 . 2) (cond ((eq? type 'list) (let*-values (((lextype x) (get-lexeme p)) ((d d^) (handle-lexeme p lextype x labels #t))) (let-values (((termtype _) (get-lexeme p))) (cond ((eq? termtype terminator)) ((eq? termtype 'eof) (eof-warning p)) (else (reader-warning p "Improperly terminated dot list")))) (cond ((pair? prev) (cond ((eq? d^ 'reference) (register-reference p labels d (lambda (d d^) (set-cdr! prev d) (set-cdr! prev^ d^)))) (else (set-cdr! prev d) (set-cdr! prev^ d^)))) (else (reader-warning p "Unexpected dot"))) (values head (annotate src head head^)))) (else (reader-warning p "Dot used in non-list datum") (lp head head^ prev prev^ len)))) (else (let-values (((d d^) (handle-lexeme p lextype x labels #t))) (cond ((and (eq? type 'bytevector) (or (eq? d^ 'reference) (not (and (fixnum? d) (fx<=? 0 d 255))))) (reader-warning p "Invalid datum in bytevector" x) (lp head head^ prev prev^ len)) (else (let ((new-prev (cons d '())) (new-prev^ (cons d^ '()))) (when (pair? prev) (set-cdr! prev new-prev) (set-cdr! prev^ new-prev^)) (when (eq? d^ 'reference) (register-reference p labels d (if (eq? type 'vector) (lambda (d d^) (vector-set! vec len d) (vector-set! (annotation-expression vec^) len d^)) (lambda (d d^) (set-car! new-prev d) (set-car! new-prev^ d^))))) (if (pair? head) (lp head head^ new-prev new-prev^ (fx+ len 1)) (lp new-prev new-prev^ new-prev new-prev^ (fx+ len 1)))))))))))) (define (handle-lexeme p lextype x labels allow-refs?) (let ((src (reader-source p))) (case lextype ((openp) (get-compound-datum p src 'closep 'list labels)) ((openb) (assert-mode p "Square brackets" '(rnrs r6rs)) (get-compound-datum p src 'closeb 'list labels)) ((vector) (get-compound-datum p src 'closep 'vector labels)) ((bytevector) ;; TODO: open-bytevector-output-port would be faster (get-compound-datum p src 'closep 'bytevector labels)) ((value eof identifier) (values x (annotate src x x))) ((abbrev) (let-values (((type lex) (get-lexeme p))) (cond ((eq? type 'eof) (eof-warning p) (values lex lex)) (else (let-values (((d d^) (handle-lexeme p type lex labels #t))) (let ((s (list x d))) (values s (annotate src s (list x d^))))))))) ((label) ;; The object that follows this label can be referred ;; back from elsewhere. (let*-values (((lextype lexeme) (get-lexeme p)) ((d d^) (handle-lexeme p lextype lexeme labels allow-refs?))) (register-label p labels x d d^) (values d d^))) (else (cond ((and allow-refs? (eq? lextype 'reference)) (values x 'reference)) ;XXX: different return types (else ;; Ignore the shebang ("#!/" or "#! " at the start of files). ;; FIXME: should only work for programs. (unless (and (eq? lextype 'shebang) (eqv? (car x) 1) (eqv? (cadr x) 0)) (reader-warning p "Unexpected lexeme" lextype x)) (let-values (((lextype x) (get-lexeme p))) (handle-lexeme p lextype x labels allow-refs?)))))))) ;;; Shared/circular data (define (make-labels) (make-eqv-hashtable)) (define (register-label p labels label datum annotated-datum) (when labels (hashtable-update! labels label (lambda (old) (when (car old) (reader-warning p "Duplicate label" label)) (cons (cons datum annotated-datum) (cdr old))) (cons #f '())))) (define (register-reference _p labels label setter) (when labels (hashtable-update! labels label (lambda (old) (cons (car old) (cons setter (cdr old)))) (cons #f '())))) (define (resolve-labels p labels) (let-values (((ids datum/refs*) (hashtable-entries labels))) (vector-for-each (lambda (id datum/refs) (let ((datum (car datum/refs)) (refs (cdr datum/refs))) (unless datum (reader-warning p "Missing label" id)) (for-each (lambda (ref) (ref (car datum) (cdr datum))) refs))) ids datum/refs*))))