ikarus/scheme/ikarus.reader.ss

;;; Ikarus Scheme -- A compiler for R6RS Scheme.
;;; Copyright (C) 2006,2007,2008  Abdulaziz Ghuloum
;;;
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License version 3 as
;;; published by the Free Software Foundation.
;;;
;;; This program is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program.  If not, see <http://www.gnu.org/licenses/>.


(library (ikarus.reader)
  (export read read-initial read-token comment-handler get-datum
          read-annotated read-script-annotated annotation?
          annotation-expression annotation-source
          annotation-stripped)
  (import
    (only (ikarus.string-to-number) define-string->number-parser)
    (ikarus system $chars)
    (ikarus system $fx)
    (ikarus system $pairs)
    (ikarus system $bytevectors)
    (only (ikarus.io) input-port-byte-position
          input-port-column-number)
    (except (ikarus) read-char read read-token comment-handler get-datum
      read-annotated read-script-annotated annotation?
      annotation-expression annotation-source annotation-stripped))

  (define (die/lex id pos who msg arg*)
    (raise
      (condition
        (make-lexical-violation)
        (make-message-condition msg)
        (if (null? arg*)
            (condition)
            (make-irritants-condition arg*))
        (make-source-position-condition
          id pos))))
  (define (die/pos p off who msg arg*)
    (die/lex (port-id p)
       (let ([pos (input-port-byte-position p)])
         (and pos (+ pos off)))
       who msg arg*))
  (define (die/p p who msg . arg*)
    (die/pos p 0 who msg arg*))
  (define (die/p-1 p who msg . arg*)
    (die/pos p -1 who msg arg*))
  (define (die/ann ann who msg . arg*)
    (let ([src (annotation-source ann)])
      (die/lex (car src) (cdr src) who msg arg*)))


  (define (checked-integer->char n ac p)
    (define (valid-integer-char? n)
      (cond
        [(<= n #xD7FF)   #t]
        [(< n #xE000)    #f]
        [(<= n #x10FFFF) #t]
        [else            #f]))
    (if (valid-integer-char? n)
        ($fixnum->char n)
        (die/p p 'tokenize
          "invalid numeric value for character"
          (list->string (reverse ac)))))

  (define-syntax read-char
    (syntax-rules ()
      [(_ p) (get-char p)]))

  (define delimiter?
    (lambda (c)
      (or (char-whitespace? c)
          (memq c '(#\( #\) #\[ #\] #\" #\# #\; #\{ #\} #\|)))))
  (define digit?
    (lambda (c)
      (and ($char<= #\0 c) ($char<= c #\9))))
  (define char->num
    (lambda (c)
      (fx- ($char->fixnum c) ($char->fixnum #\0))))
  (define initial?
    (lambda (c)
      (cond
        [($char<= c ($fixnum->char 127))
         (or (letter? c) (special-initial? c))]
        [else (unicode-printable-char? c)])))
  (define letter?
    (lambda (c)
      (or (and ($char<= #\a c) ($char<= c #\z))
          (and ($char<= #\A c) ($char<= c #\Z)))))
  (define special-initial?
    (lambda (c)
      (memq c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\^ #\_ #\~))))
  (define special-subsequent?
    (lambda (c)
      (memq c '(#\+ #\- #\. #\@))))
  (define subsequent?
    (lambda (c)
      (cond
        [($char<= c ($fixnum->char 127))
         (or (letter? c)
             (digit? c)
             (special-initial? c)
             (special-subsequent? c))]
        [else
          (or (unicode-printable-char? c)
              (memq (char-general-category c) '(Nd Mc Me)))])))
  (define tokenize-identifier
    (lambda (ls p)
      (let ([c (peek-char p)])
        (cond
         [(eof-object? c) ls]
         [(subsequent? c)
          (read-char p)
          (tokenize-identifier (cons c ls) p)]
         [(delimiter? c)
          ls]
         [(char=? c #\\)
          (read-char p)
          (tokenize-backslash ls p)]
         [(eq? (port-mode p) 'r6rs-mode)
          (die/p p 'tokenize "invalid identifier syntax"
            (list->string (reverse (cons c ls))))]
         [else ls]))))
  (define (tokenize-string ls p)
    (let ([c (read-char p)])
      (cond
        [(eof-object? c)
         (die/p p 'tokenize "invalid eof inside string")]
        [else (tokenize-string-char ls p c)])))
  (define LF1 '(#\xA #\x85 #\x2028)) ;;; these are considered newlines
  (define LF2 '(#\xA #\x85))         ;;; these are not newlines if they
                                     ;;; appear after CR
  (define (tokenize-string-char ls p c)
    (define (intraline-whitespace? c)
      (or (eqv? c #\x9)
          (eq? (char-general-category c) 'Zs)))
    (define (tokenize-string-continue ls p c)
      (cond
        [(eof-object? c)
         (die/p p 'tokenize "invalid eof inside string")]
        [(intraline-whitespace? c)
         (let f ()
           (let ([c (read-char p)])
             (cond
               [(eof-object? c)
                (die/p p 'tokenize "invalid eof inside string")]
               [(intraline-whitespace? c) (f)]
               [else (tokenize-string-char ls p c)])))]
        [else (tokenize-string-char ls p c)]))
    (cond
     [($char= #\" c) ls]
     [($char= #\\ c)
      (let ([c (read-char p)])
        (cond
          [(eof-object? c)
           (die/p p 'tokenize "invalid eof after string escape")]
          [($char= #\a c) (tokenize-string (cons #\x7 ls) p)]
          [($char= #\b c) (tokenize-string (cons #\x8 ls) p)]
          [($char= #\t c) (tokenize-string (cons #\x9 ls) p)]
          [($char= #\n c) (tokenize-string (cons #\xA ls) p)]
          [($char= #\v c) (tokenize-string (cons #\xB ls) p)]
          [($char= #\f c) (tokenize-string (cons #\xC ls) p)]
          [($char= #\r c) (tokenize-string (cons #\xD ls) p)]
          [($char= #\" c) (tokenize-string (cons #\x22 ls) p)]
          [($char= #\\ c) (tokenize-string (cons #\x5C ls) p)]
          [($char= #\x c) ;;; unicode escape \xXXX;
           (let ([c (read-char p)])
             (cond
               [(eof-object? c)
                (die/p p 'tokenize "invalid eof inside string")]
               [(hex c) =>
                (lambda (n)
                  (let f ([n n] [ac (cons c '(#\x))])
                    (let ([c (read-char p)])
                      (cond
                        [(eof-object? n)
                         (die/p p 'tokenize "invalid eof inside string")]
                        [(hex c) =>
                         (lambda (v) (f (+ (* n 16) v) (cons c ac)))]
                        [($char= c #\;)
                         (tokenize-string
                           (cons (checked-integer->char n ac p) ls) p)]
                        [else
                         (die/p-1 p 'tokenize
                           "invalid char in escape sequence"
                           (list->string (reverse (cons c ac))))]))))]
               [else
                (die/p-1 p 'tokenize
                  "invalid char in escape sequence" c)]))]
          [(intraline-whitespace? c)
           (let f ()
             (let ([c (read-char p)])
               (cond
                 [(eof-object? c)
                  (die/p p 'tokenize "invalid eof inside string")]
                 [(intraline-whitespace? c) (f)]
                 [(memv c LF1)
                  (tokenize-string-continue ls p (read-char p))]
                 [(eqv? c #\return)
                  (let ([c (read-char p)])
                    (cond
                      [(memv c LF2)
                       (tokenize-string-continue ls p (read-char p))]
                      [else
                       (tokenize-string-continue ls p c)]))]
                 [else
                  (die/p-1 p 'tokenize
                    "non-whitespace character after escape")])))]
          [(memv c LF1)
           (tokenize-string-continue ls p (read-char p))]
          [(eqv? c #\return)
           (let ([c (read-char p)])
             (cond
               [(memv c LF2)
                (tokenize-string-continue ls p (read-char p))]
               [else
                (tokenize-string-continue ls p c)]))]
          [else (die/p-1 p 'tokenize "invalid string escape" c)]))]
     [(memv c LF1)
      (tokenize-string (cons #\linefeed ls) p)]
     [(eqv? c #\return)
      (let ([c (peek-char p)])
        (when (memv c LF2) (read-char p))
        (tokenize-string (cons #\linefeed ls) p))]
     [else
      (tokenize-string (cons c ls) p)]))
  (define skip-comment
    (lambda (p)
      (let ([c (read-char p)])
        (unless (or (eof-object? c) (memv c LF1) (eqv? c #\return))
          (skip-comment p)))))
  (define tokenize-dot
    (lambda (p)
      (let ([c (peek-char p)])
        (cond
          [(eof-object? c) 'dot]
          [(delimiter? c)  'dot]
          [($char= c #\.)  ; this is second dot
           (read-char p)
           (let ([c (peek-char p)])
             (cond
               [(eof-object? c)
                (die/p p 'tokenize "invalid syntax .. near end of file")]
               [($char= c #\.) ; this is the third
                (read-char p)
                (let ([c (peek-char p)])
                  (cond
                    [(eof-object? c) '(datum . ...)]
                    [(delimiter? c)  '(datum . ...)]
                    [else
                     (die/p p 'tokenize "invalid syntax"
                       (string-append "..." (string c)))]))]
               [else
                (die/p p 'tokenize "invalid syntax"
                  (string-append ".." (string c)))]))]
          [else
           (cons 'datum
             (dot p '(#\.) 10 #f +1))]))))
  (define tokenize-char*
    (lambda (i str p d)
      (cond
       [(fx= i (string-length str))
        (let ([c (peek-char p)])
          (cond
           [(eof-object? c) d]
           [(delimiter? c)  d]
           [else
            (die/p p 'tokenize "invalid character after sequence"
              (string-append str (string c)))]))]
       [else
        (let ([c (read-char p)])
          (cond
            [(eof-object? c)
             (die/p p 'tokenize
               "invalid eof in the middle of expected sequence" str)]
            [($char= c (string-ref str i))
             (tokenize-char* (fxadd1 i) str p d)]
            [else
             (die/p-1 p 'tokenize
                "invalid char while scanning string"
                 c str)]))])))
  (define tokenize-char-seq
    (lambda (p str d)
      (let ([c (peek-char p)])
        (cond
          [(eof-object? c) (cons 'datum (string-ref str 0))]
          [(delimiter? c)  (cons 'datum (string-ref str 0))]
          [($char= (string-ref str 1) c)
           (read-char p)
           (tokenize-char*  2 str p d)]
          [else
           (die/p p 'tokenize "invalid syntax"
             (string-ref str 0) c)]))))
  (define tokenize-char
    (lambda (p)
      (let ([c (read-char p)])
        (cond
          [(eof-object? c)
           (die/p p 'tokenize "invalid #\\ near end of file")]
          [(eqv? #\n c)
           (let ([c (peek-char p)])
             (cond
               [(eof-object? c)
                (read-char p)
                '(datum . #\n)]
               [(eqv? #\u c)
                (read-char p)
                (tokenize-char-seq p "ul" '(datum . #\x0))]
               [(eqv? #\e c)
                (read-char p)
                (tokenize-char-seq p "ewline" '(datum . #\xA))]
               [(delimiter? c)
                '(datum . #\n)]
               [else
                (die/p p 'tokenize "invalid syntax"
                  (string #\# #\\ #\n c))]))]
          [(eqv? #\a c)
           (tokenize-char-seq p "alarm" '(datum . #\x7))]
          [(eqv? #\b c)
           (tokenize-char-seq p "backspace" '(datum . #\x8))]
          [(eqv? #\t c)
           (tokenize-char-seq p "tab" '(datum . #\x9))]
          [(eqv? #\l c)
           (tokenize-char-seq p "linefeed" '(datum . #\xA))]
          [(eqv? #\v c)
           (tokenize-char-seq p "vtab" '(datum . #\xB))]
          [(eqv? #\p c)
           (tokenize-char-seq p "page" '(datum . #\xC))]
          [(eqv? #\r c)
           (tokenize-char-seq p "return" '(datum . #\xD))]
          [(eqv? #\e c)
           (tokenize-char-seq p "esc" '(datum . #\x1B))]
          [(eqv? #\s c)
           (tokenize-char-seq p "space" '(datum . #\x20))]
          [(eqv? #\d c)
           (tokenize-char-seq p "delete" '(datum . #\x7F))]
          [(eqv? #\x c)
           (let ([n (peek-char p)])
             (cond
               [(or (eof-object? n) (delimiter? n))
                '(datum . #\x)]
               [(hex n) =>
                (lambda (v)
                  (read-char p)
                  (let f ([v v] [ac (cons n '(#\x))])
                    (let ([c (peek-char p)])
                      (cond
                        [(eof-object? c)
                         (cons 'datum (checked-integer->char v ac p))]
                        [(delimiter? c)
                         (cons 'datum (checked-integer->char v ac p))]
                        [(hex c) =>
                         (lambda (v0)
                           (read-char p)
                           (f (+ (* v 16) v0) (cons c ac)))]
                        [else
                         (die/p p 'tokenize
                           "invalid character sequence"
                           (list->string (reverse (cons c ac))))]))))]
               [else
                (die/p p 'tokenize "invalid character sequence"
                       (string-append "#\\" (string n)))]))]
          [else
           (let ([n (peek-char p)])
             (cond
               [(eof-object? n) (cons 'datum c)]
               [(delimiter? n)  (cons 'datum c)]
               [else
                (die/p p 'tokenize "invalid syntax"
                  (string-append "#\\" (string c n)))]))]))))
  (define (hex x)
    (cond
      [(and ($char<= #\0 x) ($char<= x #\9))
       ($fx- ($char->fixnum x) ($char->fixnum #\0))]
      [(and ($char<= #\a x) ($char<= x #\f))
       ($fx- ($char->fixnum x)
             ($fx- ($char->fixnum #\a) 10))]
      [(and ($char<= #\A x) ($char<= x #\F))
       ($fx- ($char->fixnum x)
             ($fx- ($char->fixnum #\A) 10))]
      [else #f]))
  (define multiline-error
    (lambda (p)
      (die/p p 'tokenize
         "end of file encountered while inside a #|-style comment")))
  (define apprev
    (lambda (str i ac)
      (cond
        [(fx= i (string-length str)) ac]
        [else
         (apprev str (fx+ i 1) (cons (string-ref str i) ac))])))
  (define multiline-comment
    (lambda (p)
      (define f
        (lambda (p ac)
          (let ([c (read-char p)])
            (cond
              [(eof-object? c) (multiline-error p)]
              [($char= #\| c)
               (let g ([c (read-char p)] [ac ac])
                 (cond
                   [(eof-object? c) (multiline-error p)]
                   [($char= #\# c) ac]
                   [($char= #\| c)
                    (g (read-char p) (cons c ac))]
                   [else (f p (cons c ac))]))]
              [($char= #\# c)
               (let ([c (read-char p)])
                 (cond
                   [(eof-object? c) (multiline-error p)]
                   [($char= #\| c)
                    (let ([v (multiline-comment p)])
                      (if (string? v)
                          (f p (apprev v 0 ac))
                          (f p ac)))]
                   [else
                    (f p (cons c (cons #\# ac)))]))]
              [else (f p (cons c ac))]))))
      (let ([ac (f p '())])
        ((comment-handler)
          (list->string (reverse ac))))))
  (define tokenize-hash
    (lambda (p)
      (tokenize-hash/c (read-char p) p)))
  (define (skip-whitespace p caller)
    (let ([c (read-char p)])
      (cond
        [(eof-object? c)
         (die/p p 'tokenize "invalid eof inside" caller)]
        [(char-whitespace? c)
         (skip-whitespace p caller)]
        [else c])))
  (define tokenize-hash/c
    (lambda (c p)
      (cond
        [(eof-object? c) (die/p p 'tokenize "invalid # near end of file")]
        [(memq c '(#\t #\T))
         (let ([c1 (peek-char p)])
           (cond
             [(eof-object? c1) '(datum . #t)]
             [(delimiter? c1)  '(datum . #t)]
             [else (die/p p 'tokenize
                     (format "invalid syntax near #~a~a" c c1))]))]
        [(memq c '(#\f #\F))
         (let ([c1 (peek-char p)])
           (cond
             [(eof-object? c1) '(datum . #f)]
             [(delimiter? c1)  '(datum . #f)]
             [else (die/p p 'tokenize
                     (format "invalid syntax near #~a~a" c c1))]))]
        [($char= #\\ c) (tokenize-char p)]
        [($char= #\( c) 'vparen]
        [($char= #\' c) '(macro . syntax)]
        [($char= #\` c) '(macro . quasisyntax)]
        [($char= #\, c)
         (let ([c (peek-char p)])
           (cond
             [(eqv? c #\@) (read-char p)
              '(macro . unsyntax-splicing)]
             [else '(macro . unsyntax)]))]
        [($char= #\! c)
         (let ([e (read-char p)])
           (when (eof-object? e)
             (die/p p 'tokenize "invalid eof near #!"))
           (case e
             [(#\e)
              (when (eq? (port-mode p) 'r6rs-mode)
                (die/p-1 p 'tokenize "invalid syntax: #!e"))
              (read-char* p '(#\e) "of" "eof sequence" #f #f)
              (cons 'datum (eof-object))]
             [(#\r)
              (read-char* p '(#\r) "6rs" "#!r6rs comment" #f #f)
              (set-port-mode! p 'r6rs-mode)
              (tokenize/1 p)]
             [(#\i)
              (read-char* p '(#\i) "karus" "#!ikarus comment" #f #f)
              (set-port-mode! p 'ikarus-mode)
              (tokenize/1 p)]
             [else
              (die/p-1 p 'tokenize
                (format "invalid syntax near #!~a" e))]))]
        [(digit? c)
         (when (eq? (port-mode p) 'r6rs-mode)
           (die/p-1 p 'tokenize "graph syntax is invalid in #!r6rs mode"
              (format "#~a" c)))
         (tokenize-hashnum p (char->num c))]
        [($char= #\: c)
         (when (eq? (port-mode p) 'r6rs-mode)
           (die/p-1 p 'tokenize "gensym syntax is invalid in #!r6rs mode"
              (format "#~a" c)))
         (let* ([c (skip-whitespace p "gensym")]
                [id0
                 (cond
                   [(initial? c)
                    (list->string
                      (reverse (tokenize-identifier (cons c '()) p)))]
                   [($char= #\| c)
                    (list->string
                      (reverse (tokenize-bar p '())))]
                   [else
                    (die/p-1 p 'tokenize
                      "invalid char inside gensym" c)])])
              (cons 'datum (gensym id0)))]
        [($char= #\{ c)
         (when (eq? (port-mode p) 'r6rs-mode)
           (die/p-1 p 'tokenize "gensym syntax is invalid in #!r6rs mode"
              (format "#~a" c)))
         (let* ([c (skip-whitespace p "gensym")]
                [id0
                 (cond
                   [(initial? c)
                    (list->string
                      (reverse (tokenize-identifier (cons c '()) p)))]
                   [($char= #\| c)
                    (list->string
                      (reverse (tokenize-bar p '())))]
                   [else
                    (die/p-1 p 'tokenize
                      "invalid char inside gensym" c)])]
                [c (skip-whitespace p "gensym")])
           (cond
             [($char= #\} c)
              (cons 'datum
                (foreign-call "ikrt_strings_to_gensym" #f id0))]
             [else
              (let ([id1
                     (cond
                       [(initial? c)
                        (list->string
                         (reverse
                           (tokenize-identifier
                             (cons c '()) p)))]
                       [($char= #\| c)
                        (list->string
                          (reverse (tokenize-bar p '())))]
                       [else
                        (die/p-1 p 'tokenize
                          "invalid char inside gensym" c)])])
                (let ([c (skip-whitespace p "gensym")])
                  (cond
                    [($char= #\} c)
                     (cons 'datum
                      (foreign-call "ikrt_strings_to_gensym"
                        id0 id1))]
                    [else
                     (die/p-1 p 'tokenize
                        "invalid char inside gensym" c)])))]))]
        [($char= #\v c)
         (let ([c (read-char p)])
           (cond
             [($char= #\u c)
              (let ([c (read-char p)])
                (cond
                  [($char= c #\8)
                   (let ([c (read-char p)])
                     (cond
                       [($char= c #\() 'vu8]
                       [(eof-object? c)
                        (die/p p 'tokenize "invalid eof object after #vu8")]
                       [else (die/p-1 p 'tokenize
                               (format "invalid sequence #vu8~a" c))]))]
                  [(eof-object? c)
                   (die/p p 'tokenize "invalid eof object after #vu")]
                  [else (die/p-1 p 'tokenize
                           (format "invalid sequence #vu~a" c))]))]
             [(eof-object? c)
              (die/p p 'tokenize "invalid eof object after #v")]
             [else (die/p p 'tokenize
                     (format "invalid sequence #v~a" c))]))]
        [(memq c '(#\e #\E))
         (cons 'datum (parse-string p (list c #\#) 10 #f 'e))]
        [(memq c '(#\i #\I))
         (cons 'datum (parse-string p (list c #\#) 10 #f 'i))]
        [(memq c '(#\b #\B))
         (cons 'datum (parse-string p (list c #\#) 2 2 #f))]
        [(memq c '(#\x #\X))
         (cons 'datum (parse-string p (list c #\#) 16 16 #f))]
        [(memq c '(#\o #\O))
         (cons 'datum (parse-string p (list c #\#) 8 8 #f))]
        [(memq c '(#\d #\D))
         (cons 'datum (parse-string p (list c #\#) 10 10 #f))]
        ;[($char= #\@ c) DEAD: Unfixable due to port encoding
        ;                 that does not allow mixing binary and
        ;                 textual data in the same port.
        ;                Left here for historical value
        ; (when (eq? (port-mode p) 'r6rs-mode)
        ;   (die/p-1 p 'tokenize "fasl syntax is invalid in #!r6rs mode"
        ;      (format "#~a" c)))
        ; (die/p-1 p 'read "FIXME: fasl read disabled")
        ; '(cons 'datum ($fasl-read p))]
        [else
         (die/p-1 p 'tokenize
            (format "invalid syntax #~a" c))])))

  (define (num-error p str ls)
    (die/p-1 p 'read str
      (list->string (reverse ls))))

  (define-syntax port-config
    (syntax-rules (GEN-TEST GEN-ARGS FAIL EOF-ERROR GEN-DELIM-TEST)
      [(_ GEN-ARGS k . rest) (k (p ac) . rest)]
      [(_ FAIL (p ac))
       (num-error p "invalid numeric sequence" ac)]
      [(_ FAIL (p ac) c)
       (num-error p "invalid numeric sequence" (cons c ac))]
      [(_ EOF-ERROR (p ac))
       (num-error p "invalid eof while reading number" ac)]
      [(_ GEN-DELIM-TEST c sk fk)
       (if (delimiter? c) sk fk)]
      [(_ GEN-TEST var next fail (p ac) eof-case char-case)
       (let ([c (peek-char p)])
         (if (eof-object? c)
             (let ()
               (define-syntax fail
                 (syntax-rules ()
                    [(_) (num-error p "invalid numeric sequence" ac)]))
               eof-case)
             (let ([var c])
               (define-syntax fail
                 (syntax-rules ()
                    [(_)
                     (num-error p "invalid numeric sequence"
                        (cons var ac))]))
               (define-syntax next
                 (syntax-rules ()
                   [(_ who args (... ...))
                    (who p (cons (get-char p) ac) args (... ...))]))
               char-case)))]))

  (define-string->number-parser port-config
    (parse-string digit+ sign dot))

  (define (read-char* p ls str who ci? delimited?)
    (let f ([i 0] [ls ls])
      (cond
        [(fx= i (string-length str))
         (when delimited?
           (let ([c (peek-char p)])
             (when (and (not (eof-object? c)) (not (delimiter? c)))
               (die/p p 'tokenize
                 (format "invalid ~a: ~s" who
                   (list->string (reverse (cons c ls))))))))]
        [else
         (let ([c (read-char p)])
           (cond
             [(eof-object? c)
              (die/p p 'tokenize
                (format "invalid eof inside ~a" who))]
             [(or (and (not ci?) (char=? c (string-ref str i)))
                  (and ci? (char=? (char-downcase c) (string-ref str i))))
              (f (add1 i) (cons c ls))]
             [else
              (die/p-1 p 'tokenize
                (format "invalid ~a: ~s" who
                  (list->string (reverse (cons c ls)))))]))])))
  (define (tokenize-hashnum p n)
    (let ([c (read-char p)])
      (cond
        [(eof-object? c)
         (die/p p 'tokenize "invalid eof inside #n mark/ref")]
        [($char= #\= c) (cons 'mark n)]
        [($char= #\# c) (cons 'ref n)]
        [(digit? c)
         (tokenize-hashnum p (fx+ (fx* n 10) (char->num c)))]
        [else
         (die/p-1 p 'tokenize "invalid char while inside a #n mark/ref" c)])))
  (define tokenize-bar
    (lambda (p ac)
      (let ([c (read-char p)])
        (cond
          [(eof-object? c)
           (die/p p 'tokenize "unexpected eof while reading symbol")]
          [($char= #\\ c)
           (let ([c (read-char p)])
             (cond
               [(eof-object? c)
                (die/p p 'tokenize "unexpected eof while reading symbol")]
               [else (tokenize-bar p (cons c ac))]))]
          [($char= #\| c) ac]
          [else (tokenize-bar p (cons c ac))]))))
  (define (tokenize-backslash main-ac p)
    (let ([c (read-char p)])
      (cond
        [(eof-object? c)
         (die/p p 'tokenize "invalid eof after symbol escape")]
        [($char= #\x c)
         (let ([c (read-char p)])
           (cond
             [(eof-object? c)
              (die/p p 'tokenize "invalid eof after \\x")]
             [(hex c) =>
              (lambda (v)
                (let f ([v v] [ac `(,c #\x #\\)])
                  (let ([c (read-char p)])
                    (cond
                      [(eof-object? c)
                       (die/p p 'tokenize
                         (format "invalid eof after ~a"
                           (list->string (reverse ac))))]
                      [($char= #\; c)
                       (tokenize-identifier
                         (cons (checked-integer->char v ac p) main-ac)
                         p)]
                      [(hex c) =>
                       (lambda (v0)
                         (f (+ (* v 16) v0) (cons c ac)))]
                      [else
                       (die/p-1 p 'tokenize "invalid sequence"
                         (list->string (cons c (reverse ac))))]))))]
             [else
              (die/p-1 p 'tokenize
                 (format "invalid sequence \\x~a" c))]))]
        [else
         (die/p-1 p 'tokenize
           (format "invalid sequence \\~a" c))])))
  (define tokenize/c
    (lambda (c p)
      (cond
        [(eof-object? c)
         (error 'tokenize/c "hmmmm eof")
         (eof-object)]
        [($char= #\( c)   'lparen]
        [($char= #\) c)   'rparen]
        [($char= #\[ c)   'lbrack]
        [($char= #\] c)   'rbrack]
        [($char= #\' c)   '(macro . quote)]
        [($char= #\` c)   '(macro . quasiquote)]
        [($char= #\, c)
         (let ([c (peek-char p)])
           (cond
            [(eof-object? c) '(macro . unquote)]
            [($char= c #\@)
             (read-char p)
             '(macro . unquote-splicing)]
            [else '(macro . unquote)]))]
        [($char= #\# c) (tokenize-hash p)]
        [(char<=? #\0 c #\9)
         (let ([d (fx- (char->integer c) (char->integer #\0))])
           (cons 'datum
             (digit+ p (list c) 10 #f +1 d)))]
        [(initial? c)
         (let ([ls (reverse (tokenize-identifier (cons c '()) p))])
           (cons 'datum (string->symbol (list->string ls))))]
        [($char= #\" c)
         (let ([ls (tokenize-string '() p)])
           (cons 'datum (list->string (reverse ls))))]
        [(memq c '(#\+))
         (let ([c (peek-char p)])
           (cond
             [(eof-object? c) '(datum . +)]
             [(delimiter? c)  '(datum . +)]
             [else
              (cons 'datum
                (sign p '(#\+) 10 #f +1))]))]
        [(memq c '(#\-))
         (let ([c (peek-char p)])
           (cond
             [(eof-object? c) '(datum . -)]
             [(delimiter? c)  '(datum . -)]
             [($char= c #\>)
              (read-char p)
              (let ([ls (tokenize-identifier '() p)])
                (let ([str (list->string (cons* #\- #\> (reverse ls)))])
                  (cons 'datum (string->symbol str))))]
             [else
              (cons 'datum
                (sign p '(#\-) 10 #f -1))]))]
        [($char= #\. c)
         (tokenize-dot p)]
        [($char= #\| c)
         (when (eq? (port-mode p) 'r6rs-mode)
           (die/p p 'tokenize "|symbol| syntax is invalid in #!r6rs mode"))
         (let ([ls (reverse (tokenize-bar p '()))])
           (cons 'datum (string->symbol (list->string ls))))]
        [($char= #\\ c)
         (cons 'datum
            (string->symbol
              (list->string
                (reverse (tokenize-backslash '() p)))))]
        ;[($char= #\{ c) 'lbrace]
        [($char= #\@ c)
         (when (eq? (port-mode p) 'r6rs-mode)
           (die/p p 'tokenize "@-expr syntax is invalid in #!r6rs mode"))
         'at-expr]
        [else
         (die/p-1 p 'tokenize "invalid syntax" c)])))

  (define tokenize/1
    (lambda (p)
      (let ([c (read-char p)])
        (cond
          [(eof-object? c) (eof-object)]
          [(eqv? c #\;)
           (skip-comment p)
           (tokenize/1 p)]
          [(eqv? c #\#)
           (let ([c (read-char p)])
             (cond
               [(eof-object? c)
                (die/p p 'tokenize "invalid eof after #")]
               [(eqv? c #\;)
                (read-as-comment p)
                (tokenize/1 p)]
               [(eqv? c #\|)
                (multiline-comment p)
                (tokenize/1 p)]
               [else
                (tokenize-hash/c c p)]))]
          [(char-whitespace? c) (tokenize/1 p)]
          [else (tokenize/c c p)]))))

  (define tokenize/1+pos
    (lambda (p)
      (let ([pos (input-port-byte-position p)])
        (let ([c (read-char p)])
          (cond
            [(eof-object? c) (values (eof-object) pos)]
            [(eqv? c #\;)
             (skip-comment p)
             (tokenize/1+pos p)]
            [(eqv? c #\#)
             (let ([pos (input-port-byte-position p)])
               (let ([c (read-char p)])
                 (cond
                   [(eof-object? c)
                    (die/p p 'tokenize "invalid eof after #")]
                   [(eqv? c #\;)
                    (read-as-comment p)
                    (tokenize/1+pos p)]
                   [(eqv? c #\|)
                    (multiline-comment p)
                    (tokenize/1+pos p)]
                   [else
                    (values (tokenize-hash/c c p) pos)])))]
            [(char-whitespace? c) (tokenize/1+pos p)]
            [else
             (values (tokenize/c c p) pos)])))))

  (define tokenize-script-initial
    (lambda (p)
      (let ([c (read-char p)])
        (cond
          [(eof-object? c) c]
          [(eqv? c #\;)
           (skip-comment p)
           (tokenize/1 p)]
          [(eqv? c #\#)
           (let ([c (read-char p)])
             (cond
               [(eof-object? c)
                (die/p p 'tokenize "invalid eof after #")]
               [(eqv? c #\!)
                (skip-comment p)
                (tokenize/1 p)]
               [(eqv? c #\;)
                (read-as-comment p)
                (tokenize/1 p)]
               [(eqv? c #\|)
                (multiline-comment p)
                (tokenize/1 p)]
               [else
                (tokenize-hash/c c p)]))]
          [(char-whitespace? c) (tokenize/1 p)]
          [else (tokenize/c c p)]))))

  (define tokenize-script-initial+pos
    (lambda (p)
      (let ([pos (input-port-byte-position p)])
        (let ([c (read-char p)])
          (cond
            [(eof-object? c) (values (eof-object) pos)]
            [(eqv? c #\;)
             (skip-comment p)
             (tokenize/1+pos p)]
            [(eqv? c #\#)
             (let ([pos (input-port-byte-position p)])
               (let ([c (read-char p)])
                 (cond
                   [(eof-object? c)
                    (die/p p 'tokenize "invalid eof after #")]
                   [(eqv? c #\!)
                    (skip-comment p)
                    (tokenize/1+pos p)]
                   [(eqv? c #\;)
                    (read-as-comment p)
                    (tokenize/1+pos p)]
                   [(eqv? c #\|)
                    (multiline-comment p)
                    (tokenize/1+pos p)]
                   [else
                    (values (tokenize-hash/c c p) pos)])))]
            [(char-whitespace? c) (tokenize/1+pos p)]
            [else (values (tokenize/c c p) pos)])))))

  (define-struct loc (value value^ set?))

  ;;; this is reverse engineered from psyntax.ss
  (define-struct annotation (expression source stripped))
  ;;; - source is a pair of file-name x char-position
  ;;; - stripped is an s-expression with no annotations
  ;;; - expression is a list/vector/id/whathaveyou that
  ;;;   may contain further annotations.


  (module (read-expr read-expr-script-initial)
    (define-syntax tokenize/1 syntax-error)
    (define (annotate-simple datum pos p)
      (make-annotation datum (cons (port-id p) pos) datum))
    (define (annotate stripped expression pos p)
      (make-annotation expression (cons (port-id p) pos) stripped))
    (define read-list
      (lambda (p locs k end mis init?)
        (let-values ([(t pos) (tokenize/1+pos p)])
          (cond
            [(eof-object? t)
             (die/p p 'read "end of file encountered while reading list")]
            [(eq? t end) (values '() '() locs k)]
            [(eq? t mis)
             (die/p-1 p 'read "paren mismatch")]
            [(eq? t 'dot)
             (when init?
               (die/p-1 p 'read "invalid dot while reading list"))
             (let-values ([(d d^ locs k) (read-expr p locs k)])
               (let-values ([(t pos^) (tokenize/1+pos p)])
                 (cond
                  [(eq? t end) (values d d^ locs k)]
                  [(eq? t mis)
                   (die/p-1 p 'read "paren mismatch")]
                  [(eq? t 'dot)
                   (die/p-1 p 'read "cannot have two dots in a list")]
                  [else
                   (die/p-1 p 'read
                     (format "expecting ~a, got ~a" end t))])))]
            [else
             (let-values ([(a a^ locs k) (parse-token p locs k t pos)])
               (let-values ([(d d^ locs k) (read-list p locs k end mis #f)])
                 (let ([x (cons a d)] [x^ (cons a^ d^)])
                   (values x x^ locs (extend-k-pair x x^ a d k)))))]))))
    (define extend-k-pair
      (lambda (x x^ a d k)
        (cond
          [(or (loc? a) (loc? d))
           (lambda ()
             (let ([a (car x)])
               (when (loc? a)
                 (set-car! x (loc-value a))
                 (set-car! x^ (loc-value^ a))))
             (let ([d (cdr x)])
               (when (loc? d)
                 (set-cdr! x (loc-value d))
                 (set-cdr! x^ (loc-value^ d))))
             (k))]
          [else k])))
    (define vector-put
      (lambda (v v^ k i ls ls^)
        (cond
          [(null? ls) k]
          [else
           (let ([a (car ls)])
             (vector-set! v i a)
             (vector-set! v^ i (car ls^))
             (vector-put v v^
                (if (loc? a)
                    (lambda ()
                      (vector-set! v i (loc-value a))
                      (vector-set! v^ i (loc-value^ a))
                      (k))
                    k)
                (fxsub1 i)
                (cdr ls)
                (cdr ls^)))])))
    (define read-vector
      (lambda (p locs k count ls ls^)
        (let-values ([(t pos) (tokenize/1+pos p)])
          (cond
            [(eof-object? t)
             (die/p p 'read "end of file encountered while reading a vector")]
            [(eq? t 'rparen)
             (let ([v (make-vector count)] [v^ (make-vector count)])
               (let ([k (vector-put v v^ k (fxsub1 count) ls ls^)])
                 (values v v^ locs k)))]
            [(eq? t 'rbrack)
             (die/p-1 p 'read "unexpected ] while reading a vector")]
            [(eq? t 'dot)
             (die/p-1 p 'read "unexpected . while reading a vector")]
            [else
             (let-values ([(a a^ locs k) (parse-token p locs k t pos)])
                (read-vector p locs k (fxadd1 count)
                  (cons a ls) (cons a^ ls^)))]))))
    (define read-bytevector
      (lambda (p locs k count ls)
        (let-values ([(t pos) (tokenize/1+pos p)])
          (cond
            [(eof-object? t)
             (die/p p 'read "end of file encountered while reading a bytevector")]
            [(eq? t 'rparen)
             (let ([v (u8-list->bytevector (reverse ls))])
               (values v v locs k))]
            [(eq? t 'rbrack)
             (die/p-1 p 'read "unexpected ] while reading a bytevector")]
            [(eq? t 'dot)
             (die/p-1 p 'read "unexpected . while reading a bytevector")]
            [else
             (let-values ([(a a^ locs k) (parse-token p locs k t pos)])
                (unless (and (fixnum? a) (fx<= 0 a) (fx<= a 255))
                  (die/ann a^ 'read
                    "invalid value in a bytevector" a))
                (read-bytevector p locs k (fxadd1 count)
                  (cons a ls)))]))))
    (define read-at-expr
      (lambda (p locs k at-pos)
        (define-struct nested (a a^))
        (define-struct nested* (a* a*^))
        (define (get-chars chars pos p a* a*^)
          (if (null? chars)
              (values a* a*^)
              (let ([str (list->string chars)])
                (let ([str^ (annotate-simple str pos p)])
                  (values (cons str a*) (cons str^ a*^))))))
        (define (return start-pos start-col c*** p)
          (let ([indent
                 (apply min start-col
                   (map
                     (lambda (c**)
                       (define (st00 c* c** n)
                         (if (null? c*)
                             (st0 c** n)
                             (if (char=? (car c*) #\space)
                                 (st00 (cdr c*) c** (+ n 1))
                                 n)))
                       (define (st0 c** n)
                         (if (null? c**)
                             start-col
                             (let ([c* (car c**)])
                               (if (or (nested? c*) (nested*? c*))
                                   start-col
                                   (st00 (car c*) (cdr c**) n)))))
                       (st0 c** 0))
                     (cdr c***)))])
            (define (convert c*)
              (if (or (nested? c*) (nested*? c*))
                  c*
                  (let ([str (list->string (car c*))])
                    (let ([str^ (annotate-simple str (cdr c*) p)])
                      (make-nested str str^)))))
            (define (trim/convert c**)
              (define (mk n pos)
                (let ([str (make-string (- n indent) #\space)])
                  (let ([str^ (annotate-simple str pos p)])
                    (make-nested str str^))))
              (define (s1 c* pos c** n)
                (if (null? c*)
                    (let ([c* (car c**)])
                      (if (or (nested? c*) (nested*? c*))
                          (cons (mk n pos) (map convert c**))
                          (s1 c* pos (cdr c**) n)))
                    (if (char=? (car c*) #\space)
                        (s1 (cdr c*) pos c** (+ n 1))
                        (cons*
                          (mk n pos)
                          (map convert (cons (cons c* pos) c**))))))
              (define (s00 c* pos c** n)
                (if (null? c*)
                    (s0 c** n)
                    (if (char=? #\space (car c*))
                        (if (< n indent)
                            (s00 (cdr c*) pos c** (+ n 1))
                            (s1 (cdr c*) pos c** (+ n 1)))
                        (map convert (cons (cons c* pos) c**)))))
              (define (s0 c** n)
                (if (null? c**)
                    '()
                    (let ([c* (car c**)])
                      (if (or (nested? c*) (nested*? c*))
                          (map convert c**)
                          (s00 (car c*) (cdr c*) (cdr c**) n)))))
              (s0 c** 0))
            (define (cons-initial c** c***)
              (define (all-white? c**)
                (andmap (lambda (c*)
                          (and (not (nested? c*))
                               (not (nested*? c*))
                               (andmap
                                 (lambda (c) (char=? c #\space))
                                 (car c*))))
                        c**))
              (define (nl)
                (let ([str "\n"])
                  (list (make-nested str str))))
              (define (S1 c*** n)
                (if (null? c***)
                    (make-list n (nl))
                    (let ([c** (car c***)] [c*** (cdr c***)])
                      (if (all-white? c**)
                          (S1 c*** (+ n 1))
                          (append
                            (make-list n (nl))
                            (cons (trim/convert c**)
                                  (S2 c*** 0 0)))))))
              (define (S2 c*** n m)
                (if (null? c***)
                    (make-list (+ n m) (nl))
                    (let ([c** (car c***)] [c*** (cdr c***)])
                      (if (all-white? c**)
                          (S2 c*** (+ n 1) -1)
                          (append
                            (make-list (+ n 1) (nl))
                            (cons (trim/convert c**)
                                  (S2 c*** 0 0)))))))
              (define (S0 c** c***)
                (if (all-white? c**)
                    (S1 c*** 0)
                    (cons
                      (map convert c**)
                      (S2 c*** 0 0))))
              (S0 c** c***))
            (let ([c** (cons-initial (car c***) (cdr c***))])
              (let ([n* (apply append c**)])
                (define (extract p p* ls)
                  (let f ([ls ls])
                    (cond
                      [(null? ls) '()]
                      [(nested? (car ls)) (cons (p (car ls)) (f (cdr ls)))]
                      [else (append (p* (car ls)) (f (cdr ls)))])))
                (let ([c* (extract nested-a nested*-a* n*)]
                      [c*^ (extract nested-a^ nested*-a*^ n*)])
                  (values c* (annotate c* c*^ start-pos p) locs k))))))
        (define (read-text p locs k pref*)
          (let ([start-pos (port-position p)]
                [start-col (input-port-column-number p)])
            (let f ([c* '()] [pos start-pos]
                    [c** '()] [c*** '()]
                    [depth 0] [locs locs] [k k])
              (define (match-prefix c* pref*)
                (cond
                  [(and (pair? c*) (pair? pref*))
                   (and (char=? (car c*) (car pref*))
                        (match-prefix (cdr c*) (cdr pref*)))]
                  [else (and (null? pref*) c*)]))
              (let ([c (read-char p)])
                (cond
                  [(eof-object? c)
                   (die/p p 'read "end of file while reading @-expr text")]
                  [(char=? c #\})
                   (let g ([x* (cons #\} c*)] [p* pref*])
                     (if (null? p*)
                         (if (= depth 0)
                             (let ([c**
                                    (reverse
                                      (if (null? c*)
                                          c**
                                          (cons (cons (reverse c*) pos) c**)))])
                               (let ([c*** (reverse (cons c** c***))])
                                 (return start-pos start-col c*** p)))
                             (f x* pos c** c*** (- depth 1) locs k))
                         (let ([c (peek-char p)])
                           (cond
                             [(eof-object? c)
                              (die/p p 'read "invalid eof inside @-expression")]
                             [(char=? c (rev-punc (car p*)))
                              (read-char p)
                              (g (cons c x*) (cdr p*))]
                             [else
                              (f x* pos c** c*** depth locs k)]))))]
                  [(char=? c #\{)
                   (f (cons c c*) pos c** c***
                      (if (match-prefix c* pref*) (+ depth 1) depth)
                      locs k)]
                  [(char=? c #\newline)
                   (f '()
                      (port-position p)
                      '()
                      (cons (reverse
                              (if (null? c*)
                                  c**
                                  (cons (cons (reverse c*) pos) c**)))
                            c***)
                      depth locs k)]
                  [(and (char=? c #\@) (match-prefix c* pref*)) =>
                   (lambda (c*)
                     (let ([c (peek-char p)])
                       (cond
                         [(eof-object? c)
                          (die/p p 'read "invalid eof inside nested @-expr")]
                         [(char=? c #\")
                          (read-char p)
                          (let ([c* (tokenize-string c* p)])
                            (f c* pos c** c*** depth locs k))]
                         [else
                          (let-values ([(a* a*^ locs k)
                                        (read-at-text-mode p locs k)])
                            (f '()
                               (port-position p)
                               (cons (make-nested* a* a*^)
                                 (if (null? c*)
                                     c**
                                     (cons (cons (reverse c*) pos) c**)))
                               c*** depth locs k))])))]
                  [else
                   (f (cons c c*) pos c** c*** depth locs k)])))))
        (define (read-brackets p locs k)
          (let-values ([(a* a*^ locs k)
                        (read-list p locs k 'rbrack 'rparen #t)])
            (unless (list? a*)
              (die/ann a*^ 'read "not a proper list"))
            (let ([c (peek-char p)])
              (cond
                [(eof-object? c) ;;; @<cmd>[...]
                 (values a* a*^ locs k)]
                [(char=? c #\{)
                 (read-char p)
                 (let-values ([(b* b*^ locs k)
                               (read-text p locs k '())])
                   (values (append a* b*)
                           (append a*^ b*^)
                           locs k))]
                [(char=? c #\|)
                 (read-char p)
                 (let-values ([(b* b*^ locs k)
                               (read-at-bar p locs k #t)])
                   (values (append a* b*)
                           (append a*^ b*^)
                           locs k))]
                [else (values a* a*^ locs k)]))))
        (define (left-punc? c)
          (define chars "([<!?~$%^&*-_+=:")
          (let f ([i 0])
            (cond
              [(= i (string-length chars)) #f]
              [(char=? c (string-ref chars i)) #t]
              [else (f (+ i 1))])))
        (define (rev-punc c)
          (cond
            [(char=? c #\() #\)]
            [(char=? c #\[) #\]]
            [(char=? c #\<) #\>]
            [else c]))
        (define (read-at-bar p locs k text-mode?)
          (let ([c (peek-char p)])
            (cond
              [(eof-object? c)
               (die/p p 'read "eof inside @|-expression")]
              [(and (char=? c #\|) text-mode?) ;;; @||
               (read-char p)
               (values '() '() locs k)]
              [(char=? c #\{) ;;; @|{
               (read-char p)
               (read-text p locs k '(#\|))]
              [(left-punc? c) ;;; @|<({
               (read-char p)
               (let ([pos (port-position p)])
                 (let f ([ls (list c)])
                   (let ([c (peek-char p)])
                     (cond
                       [(eof-object? c)
                        (die/p p 'read "eof inside @|< mode")]
                       [(left-punc? c)
                        (read-char p)
                        (f (cons c ls))]
                       [(char=? c #\{)
                        (read-char p)
                        (read-text p locs k (append ls '(#\|)))]
                       [else
                        (read-at-bar-others ls p locs k)]))))]
              [text-mode? ;;; @|5 6 7|
               (read-at-bar-datum p locs k)]
              [else
               (die/p p 'read "invalid char in @| mode" c)])))
        (define (read-at-bar-others ls p locs k)
          (define (split ls)
            (cond
              [(null? ls) (values '() '())]
              [(initial? (car ls))
               (let-values ([(a d) (split (cdr ls))])
                 (values (cons (car ls) a) d))]
              [else
               (values '() ls)]))
          (define (mksymbol ls)
            (let ([s (string->symbol
                       (list->string
                         (reverse ls)))])
              (values s s)))
          (let-values ([(inits rest) (split ls)])
            (let ([ls (tokenize-identifier inits p)])
              (let-values ([(s s^) (mksymbol ls)])
                (let g ([rest rest]
                        [a* (list s)]
                        [a*^ (list s^)]
                        [locs locs]
                        [k k])
                  (if (null? rest)
                      (let-values ([(b* b*^ locs k)
                                    (read-at-bar-datum p locs k)])
                        (values (append a* b*) (append a*^ b*^) locs k))
                      (let ([x (car rest)])
                        (case x
                          [(#\() #\) ;;; vim paren-matching sucks
                           (let-values ([(b* b*^ locs k)
                                         (read-list p locs k 'rparen 'rbrack #t)])
                             (g (cdr rest)
                                (list (append a* b*))
                                (list (append a*^ b*^))
                                locs k))]
                          [(#\[) #\] ;;;  vim paren-matching sucks
                           (let-values ([(b* b*^ locs k)
                                         (read-list p locs k 'rbrack 'rparen #t)])
                             (g (cdr rest)
                                (list (append a* b*))
                                (list (append a*^ b*^))
                                locs k))]
                          [else
                           (let-values ([(inits rest) (split rest)])
                             (let-values ([(s s^) (mksymbol inits)])
                               (g rest
                                  (cons s a*)
                                  (cons s^ a*^)
                                  locs k)))]))))))))
        (define (read-at-bar-datum p locs k)
          (let ([c (peek-char p)])
            (cond
              [(eof-object? c) (die/p p 'read "eof inside @|datum mode")]
              [(char-whitespace? c)
               (read-char p)
               (read-at-bar-datum p locs k)]
              [(char=? c #\|)
               (read-char p)
               (values '() '() locs k)]
              [else
               (let-values ([(a a^ locs k) (read-expr p locs k)])
                 (let-values ([(a* a*^ locs k) (read-at-bar-datum p locs k)])
                   (values (cons a a*) (cons a^ a*^) locs k)))])))
        (define (read-at-text-mode p locs k)
          (let ([c (peek-char p)])
            (cond
              [(eof-object? c)
               (die/p p 'read "eof encountered inside @-expression")]
              [(char=? c #\|)
               (read-char p)
               (read-at-bar p locs k #t)]
              [else
               (let-values ([(a a^ locs k)
                             (read-at-sexpr-mode p locs k)])
                 (values (list a) (list a^) locs k))])))
        (define (read-at-sexpr-mode p locs k)
          (let ([c (peek-char p)])
            (cond
              [(eof-object? c)
               (die/p p 'read "eof encountered inside @-expression")]
              [(eqv? c '#\[) ;;;   @[ ...
               (read-char p)
               (read-brackets p locs k)]
              [(eqv? c #\{) ;;;   @{ ...
               (read-char p)
               (read-text p locs k '())]
              [(char=? c #\|)
               (read-char p)
               (read-at-bar p locs k #f)]
              [else            ;;;   @<cmd> ...
               (let-values ([(a a^ locs k) (read-expr p locs k)])
                 (let ([c (peek-char p)])
                   (cond
                     [(eof-object? c)  ;;; @<cmd><eof>
                      (values a a^ locs k)]
                     [(eqv? c #\[)
                      (read-char p)
                      (let-values ([(a* a*^ locs k)
                                    (read-brackets p locs k)])
                        (let ([v (cons a a*)] [v^ (cons a^ a*^)])
                          (values v (annotate v v^ at-pos p) locs k)))]
                     [(eqv? c #\{) ;;; @<cmd>{ ...
                      (read-char p)
                      (let-values ([(a* a*^ locs k)
                                    (read-text p locs k '())])
                        (let ([v (cons a a*)] [v^ (cons a^ a*^)])
                          (values v (annotate v v^ at-pos p) locs k)))]
                     [(eqv? c #\|) ;;; @<cmd>| ...
                      (read-char p)
                      (let-values ([(a* a*^ locs k)
                                    (read-at-bar p locs k #f)])
                        (let ([v (cons a a*)] [v^ (cons a^ a*^)])
                          (values v (annotate v v^ at-pos p) locs k)))]
                     [else
                      (values a a^ locs k)])))])))
        (read-at-sexpr-mode p locs k)))
    (define parse-token
      (lambda (p locs k t pos)
        (cond
          [(eof-object? t)
           (values (eof-object)
             (annotate-simple (eof-object) pos p) locs k)]
          [(eq? t 'lparen)
           (let-values ([(ls ls^ locs k)
                         (read-list p locs k 'rparen 'rbrack #t)])
             (values ls (annotate ls ls^ pos p) locs k))]
          [(eq? t 'lbrack)
           (let-values ([(ls ls^ locs k)
                         (read-list p locs k 'rbrack 'rparen #t)])
             (values ls (annotate ls ls^ pos p) locs k))]
          [(eq? t 'vparen)
           (let-values ([(v v^ locs k)
                         (read-vector p locs k 0 '() '())])
             (values v (annotate v v^ pos p) locs k))]
          [(eq? t 'vu8)
           (let-values ([(v v^ locs k)
                         (read-bytevector p locs k 0 '())])
             (values v (annotate v v^ pos p) locs k))]
          [(eq? t 'at-expr)
           (read-at-expr p locs k pos)]
          [(pair? t)
           (cond
             [(eq? (car t) 'datum)
              (values (cdr t)
                (annotate-simple (cdr t) pos p) locs k)]
             [(eq? (car t) 'macro)
              (let ([macro (cdr t)])
                (define (read-macro)
                  (let-values ([(t pos) (tokenize/1+pos p)])
                    (cond
                      [(eof-object? t)
                       (die/p p 'read
                         (format "invalid eof after ~a read macro"
                         macro))]
                      [else (parse-token p locs k t pos)])))
                (let-values ([(expr expr^ locs k) (read-macro)])
                  (let ([d (list expr)] [d^ (list expr^)])
                    (let ([x (cons macro d)]
                          [x^ (cons (annotate-simple macro pos p) d^)])
                      (values x (annotate x x^ pos p) locs
                        (extend-k-pair d d^ expr '() k))))))]
             [(eq? (car t) 'mark)
              (let ([n (cdr t)])
                (let-values ([(expr expr^ locs k)
                              (read-expr p locs k)])
                  (cond
                    [(assq n locs) =>
                     (lambda (x)
                       (let ([loc (cdr x)])
                         (when (loc-set? loc) ;;; FIXME: pos
                           (die/p p 'read "duplicate mark" n))
                         (set-loc-value! loc expr)
                         (set-loc-value^! loc expr^)
                         (set-loc-set?! loc #t)
                         (values expr expr^ locs k)))]
                    [else
                     (let ([loc (make-loc expr 'unused #t)])
                       (let ([locs (cons (cons n loc) locs)])
                         (values expr expr^ locs k)))])))]
             [(eq? (car t) 'ref)
              (let ([n (cdr t)])
                (cond
                  [(assq n locs) =>
                   (lambda (x)
                     (values (cdr x) 'unused locs k))]
                  [else
                   (let ([loc (make-loc #f 'unused #f)])
                     (let ([locs (cons (cons n loc) locs)])
                       (values loc 'unused locs k)))]))]
             [else (die/p p 'read "invalid token" t)])]
          [else
           (die/p-1 p 'read
             (format "unexpected ~s found" t))])))
    (define read-expr
      (lambda (p locs k)
        (let-values ([(t pos) (tokenize/1+pos p)])
          (parse-token p locs k t pos))))
    (define read-expr-script-initial
      (lambda (p locs k)
        (let-values ([(t pos) (tokenize-script-initial+pos p)])
          (parse-token p locs k t pos)))))


  (define (reduce-loc! p)
    (lambda (x)
       (let ([loc (cdr x)])
         (unless (loc-set? loc)
           (die/p p 'read "referenced mark is not set" (car x)))
         (when (loc? (loc-value loc))
           (let f ([h loc] [t loc])
             (if (loc? h)
                 (let ([h1 (loc-value h)])
                   (if (loc? h1)
                       (begin
                         (when (eq? h1 t)
                           (die/p p 'read "circular marks"))
                         (let ([v (f (loc-value h1) (loc-value t))])
                           (set-loc-value! h1 v)
                           (set-loc-value! h v)
                           v))
                       (begin
                         (set-loc-value! h h1)
                         h1)))
                 h))))))

  (define (read-as-comment p)
    (begin (read-expr p '() void) (void)))

  (define (return-annotated x)
    (cond
      [(and (annotation? x) (eof-object? (annotation-expression x)))
       (eof-object)]
      [else x]))

  (define my-read
    (lambda (p)
      (let-values ([(expr expr^ locs k) (read-expr p '() void)])
        (cond
          [(null? locs) expr]
          [else
           (for-each (reduce-loc! p) locs)
           (k)
           (if (loc? expr)
               (loc-value expr)
               expr)]))))

  (define read-initial
    (lambda (p)
      (let-values ([(expr expr^ locs k) (read-expr-script-initial p '() void)])
        (cond
          [(null? locs) expr]
          [else
           (for-each (reduce-loc! p) locs)
           (k)
           (if (loc? expr)
               (loc-value expr)
               expr)]))))

  (define read-annotated
    (case-lambda
      [(p)
       (unless (input-port? p)
         (error 'read-annotated "not an input port" p))
       (let-values ([(expr expr^ locs k) (read-expr p '() void)])
         (cond
           [(null? locs) (return-annotated expr^)]
           [else
            (for-each (reduce-loc! p) locs)
            (k)
            (if (loc? expr)
                (loc-value^ expr)
                (return-annotated expr^))]))]
      [() (read-annotated (current-input-port))]))

  (define read-script-annotated
    (lambda (p)
      (let-values ([(expr expr^ locs k) (read-expr-script-initial p '() void)])
        (cond
          [(null? locs) (return-annotated expr^)]
          [else
           (for-each (reduce-loc! p) locs)
           (k)
           (if (loc? expr)
               (loc-value^ expr)
               (return-annotated expr^))]))))

  (define read-token
    (case-lambda
      [() (tokenize/1 (current-input-port))]
      [(p)
       (if (input-port? p)
           (tokenize/1 p)
           (die 'read-token "not an input port" p))]))

  (define read
    (case-lambda
      [() (my-read (current-input-port))]
      [(p)
       (if (input-port? p)
           (my-read p)
           (die 'read "not an input port" p))]))

  (define (get-datum p)
    (unless (input-port? p)
      (die 'get-datum "not an input port"))
    (my-read p))

  (define comment-handler
    ;;; this is stale, maybe delete
    (make-parameter
      (lambda (x) (void))
      (lambda (x)
        (unless (procedure? x)
          (die 'comment-handler "not a procedure" x))
        x)))

)