Removed infix directory. It has a non-free copyright and will
therefore be removed from S48 as well.
This commit is contained in:
parent
442559a708
commit
ba5cdcf6fb
|
@ -1,29 +0,0 @@
|
|||
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
|
||||
|
||||
; Infix stuff
|
||||
|
||||
(define-structure tokenizer (export make-tokenizer-table
|
||||
set-up-usual-tokenization!
|
||||
set-char-tokenization!
|
||||
tokenize)
|
||||
(open scheme records signals defpackage ascii)
|
||||
(access primitives)
|
||||
(files tokenize))
|
||||
|
||||
(define-structure pratt (export toplevel-parse
|
||||
parse
|
||||
make-operator
|
||||
make-lexer-table set-char-tokenization!
|
||||
lexer-ttab define-keyword define-punctuation
|
||||
prsmatch comma-operator delim-error erb-error
|
||||
if-operator
|
||||
then-operator else-operator parse-prefix
|
||||
parse-nary parse-infix
|
||||
parse-matchfix end-of-input-operator
|
||||
port->stream)
|
||||
(open scheme records signals tokenizer tables)
|
||||
(files pratt))
|
||||
|
||||
(define-structure sgol (export sgol-read sgol-repl)
|
||||
(open scheme signals pratt)
|
||||
(files sgol))
|
|
@ -1,308 +0,0 @@
|
|||
; -*- Mode: Scheme; -*-
|
||||
;
|
||||
; A simple Pratt-Parser for SIOD: 2-FEB-90, George Carrette, GJC@PARADIGM.COM
|
||||
; Siod may be obtained by anonymous FTP to world.std.com:pub/gjc.
|
||||
;
|
||||
; * COPYRIGHT (c) 1988-1994 BY *
|
||||
; * PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS. *
|
||||
; * ALL RIGHTS RESERVED *
|
||||
;
|
||||
;Permission to use, copy, modify, distribute and sell this software
|
||||
;and its documentation for any purpose and without fee is hereby
|
||||
;granted, provided that the above copyright notice appear in all copies
|
||||
;and that both that copyright notice and this permission notice appear
|
||||
;in supporting documentation, and that the name of Paradigm Associates
|
||||
;Inc not be used in advertising or publicity pertaining to distribution
|
||||
;of the software without specific, written prior permission.
|
||||
;
|
||||
;PARADIGM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
||||
;ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
|
||||
;PARADIGM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
;ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
;WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
;ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
||||
;SOFTWARE.
|
||||
;
|
||||
; Based on a theory of parsing presented in:
|
||||
;
|
||||
; Pratt, Vaughan R., ``Top Down Operator Precedence,''
|
||||
; ACM Symposium on Principles of Programming Languages
|
||||
; Boston, MA; October, 1973.
|
||||
;
|
||||
|
||||
; The following terms may be useful in deciphering this code:
|
||||
|
||||
; NUD -- NUll left Denotation (op has nothing to its left (prefix))
|
||||
; LED -- LEft Denotation (op has something to left (postfix or infix))
|
||||
|
||||
; LBP -- Left Binding Power (the stickiness to the left)
|
||||
; RBP -- Right Binding Power (the stickiness to the right)
|
||||
;
|
||||
|
||||
; Mods for Scheme 48 by J Rees 6-14-90
|
||||
|
||||
; From: <gjc@mitech.com>
|
||||
;
|
||||
; Now a neat thing that CGOL had was a way of packaging and scoping
|
||||
; different parsing contexts. The maclisp implementation was simple,
|
||||
; instead of just NUD and LED and other properties there was a list
|
||||
; of property indicators. And a lookup operation.
|
||||
;
|
||||
; One use of the local-context thing, in parsing the C language
|
||||
; you can use a different binding-power for ":" depending on
|
||||
; what kind of statement you are parsing, a general statement
|
||||
; context where ":" means a label, a "switch" or the "if for value
|
||||
; " construct of (a > b) > c : d;
|
||||
|
||||
|
||||
(define (peek-token stream)
|
||||
(stream 'peek #f))
|
||||
|
||||
(define (read-token stream)
|
||||
(stream 'get #f))
|
||||
|
||||
(define (toplevel-parse stream)
|
||||
(if (eq? end-of-input-operator (peek-token stream))
|
||||
(read-token stream)
|
||||
(parse -1 stream)))
|
||||
|
||||
|
||||
; A token is either an operator or atomic (number, identifier, etc.)
|
||||
|
||||
(define operator-type
|
||||
(make-record-type 'operator
|
||||
'(name lbp rbp nud led)))
|
||||
|
||||
(define make-operator
|
||||
(let ()
|
||||
(define make
|
||||
(record-constructor operator-type '(name lbp rbp nud led)))
|
||||
(define (make-operator name lbp rbp nud led)
|
||||
(make name
|
||||
(or lbp default-lbp)
|
||||
(or rbp default-rbp)
|
||||
(or nud default-nud)
|
||||
(or led default-led)))
|
||||
make-operator))
|
||||
|
||||
(define operator? (record-predicate operator-type))
|
||||
|
||||
(define operator-name (record-accessor operator-type 'name))
|
||||
(define operator-nud (record-accessor operator-type 'nud))
|
||||
(define operator-led (record-accessor operator-type 'led))
|
||||
(define operator-lbp (record-accessor operator-type 'lbp))
|
||||
(define operator-rbp (record-accessor operator-type 'rbp))
|
||||
|
||||
(define (default-nud operator stream)
|
||||
(if (eq? (operator-led operator) default-led)
|
||||
operator
|
||||
(error 'not-a-prefix-operator operator)))
|
||||
|
||||
(define (nudcall token stream)
|
||||
(if (operator? token)
|
||||
((operator-nud token) token stream)
|
||||
token))
|
||||
|
||||
(define default-led #f)
|
||||
|
||||
;+++ To do: fix this to make juxtaposition work (f x+y)
|
||||
|
||||
(define (ledcall token left stream)
|
||||
((or (and (operator? token)
|
||||
(operator-led token))
|
||||
(error 'not-an-infix-operator token))
|
||||
token
|
||||
left
|
||||
stream))
|
||||
|
||||
(define default-lbp 200)
|
||||
|
||||
(define (lbp token)
|
||||
(if (operator? token)
|
||||
(operator-lbp token)
|
||||
default-lbp))
|
||||
|
||||
(define default-rbp 200)
|
||||
|
||||
(define (rbp token)
|
||||
(if (operator? token)
|
||||
(operator-rbp token)
|
||||
default-rbp))
|
||||
|
||||
(define-record-discloser operator-type
|
||||
(lambda (obj)
|
||||
(list 'operator (operator-name obj))))
|
||||
|
||||
; Mumble
|
||||
|
||||
(define (delim-error token stream)
|
||||
(error 'invalid-use-of-delimiter token))
|
||||
|
||||
(define (erb-error token left stream)
|
||||
(error 'too-many-right-parentheses token))
|
||||
|
||||
(define (premterm-err token stream)
|
||||
(error 'premature-termination-of-input token))
|
||||
|
||||
; Parse
|
||||
|
||||
(define *parse-debug* #f)
|
||||
|
||||
(define (parse rbp-level stream)
|
||||
(if *parse-debug* (print `(parse ,rbp-level)))
|
||||
(let parse-loop ((translation (nudcall (read-token stream) stream)))
|
||||
(if (< rbp-level (lbp (peek-token stream)))
|
||||
(parse-loop (ledcall (read-token stream) translation stream))
|
||||
(begin (if *parse-debug* (print translation))
|
||||
translation))))
|
||||
|
||||
(define (print s) (write s) (newline))
|
||||
|
||||
(define (parse-prefix operator stream)
|
||||
(list (operator-name operator)
|
||||
(parse (rbp operator) stream)))
|
||||
|
||||
(define (parse-infix operator left stream)
|
||||
(list (operator-name operator)
|
||||
left
|
||||
(parse (rbp operator) stream)))
|
||||
|
||||
(define (parse-nary operator left stream)
|
||||
(cons (operator-name operator) (cons left (prsnary operator stream))))
|
||||
|
||||
(define (prsnary operator stream)
|
||||
(define (loop l)
|
||||
(if (eq? operator (peek-token stream))
|
||||
(begin (read-token stream)
|
||||
(loop (cons (parse (rbp operator) stream) l)))
|
||||
(reverse l)))
|
||||
(loop (list (parse (rbp operator) stream))))
|
||||
|
||||
; Parenthesis matching, with internal commas.
|
||||
; Kind of a kludge if you ask me.
|
||||
|
||||
(define (parse-matchfix operator stream) ; |x|
|
||||
(cons (operator-name operator)
|
||||
(prsmatch operator stream)))
|
||||
|
||||
(define (prsmatch close-op stream)
|
||||
(if (eq? (peek-token stream) close-op)
|
||||
(begin (read-token stream)
|
||||
'())
|
||||
(let loop ((l (list (parse 10 stream))))
|
||||
(if (eq? (peek-token stream) close-op)
|
||||
(begin (read-token stream)
|
||||
(reverse l))
|
||||
(if (eq? (peek-token stream) comma-operator)
|
||||
(begin (read-token stream)
|
||||
(loop (cons (parse 10 stream) l)))
|
||||
(error 'comma-or-match-not-found (read-token stream)))))))
|
||||
|
||||
(define comma-operator (make-operator 'comma 10 #f delim-error #f))
|
||||
|
||||
; if A then B [else C]
|
||||
|
||||
(define (if-nud token stream)
|
||||
(let* ((pred (parse (rbp token) stream))
|
||||
(then (if (eq? (peek-token stream) then-operator)
|
||||
(parse (rbp (read-token stream)) stream)
|
||||
(error 'missing-then pred))))
|
||||
(if (eq? (peek-token stream) else-operator)
|
||||
`(if ,pred ,then ,(parse (rbp (read-token stream)) stream))
|
||||
`(if ,pred ,then))))
|
||||
|
||||
(define if-operator (make-operator 'if #f 45 if-nud #f))
|
||||
(define then-operator (make-operator 'then 5 25 delim-error #f))
|
||||
(define else-operator (make-operator 'else 5 25 delim-error #f))
|
||||
|
||||
; Lexer support:
|
||||
|
||||
(define lexer-type
|
||||
(make-record-type 'lexer '(ttab punctab keytab)))
|
||||
|
||||
(define lexer-ttab (record-accessor lexer-type 'ttab))
|
||||
(define lexer-punctab (record-accessor lexer-type 'punctab))
|
||||
(define lexer-keytab (record-accessor lexer-type 'keytab))
|
||||
|
||||
(define make-lexer-table
|
||||
(let ((make (record-constructor lexer-type '(ttab punctab keytab))))
|
||||
(lambda ()
|
||||
(let ((ttab (make-tokenizer-table)))
|
||||
(set-up-usual-tokenization! ttab)
|
||||
(make ttab (make-table) (make-table))))))
|
||||
|
||||
(define (lex ltab port)
|
||||
(let ((thing (tokenize (lexer-ttab ltab) port)))
|
||||
(cond ((eof-object? thing)
|
||||
end-of-input-operator)
|
||||
((symbol? thing)
|
||||
(or (table-ref (lexer-keytab ltab) thing)
|
||||
thing))
|
||||
(else thing))))
|
||||
|
||||
; Keywords
|
||||
|
||||
(define (define-keyword ltab name op)
|
||||
(table-set! (lexer-keytab ltab) name op))
|
||||
|
||||
; Punctuation
|
||||
|
||||
; lexnode = (* operator (table-of char (+ lexnode #f))) -- discrimination tree
|
||||
|
||||
(define (define-punctuation ltab string op)
|
||||
(let ((end (- (string-length string) 1)))
|
||||
(let loop ((i 0)
|
||||
(table (lexer-punctab ltab)))
|
||||
(let* ((c (string-ref string i))
|
||||
(lexnode
|
||||
(or (table-ref table c)
|
||||
(let ((lexnode
|
||||
(cons (error-operator (substring string 0 (+ i 1)))
|
||||
(make-table))))
|
||||
(table-set! table c lexnode)
|
||||
(if (= i 0)
|
||||
(set-char-tokenization! (lexer-ttab ltab)
|
||||
c
|
||||
(operator-reader lexnode)
|
||||
#t))
|
||||
lexnode))))
|
||||
(if (>= i end)
|
||||
(set-car! lexnode op)
|
||||
(loop (+ i 1) (cdr lexnode)))))))
|
||||
|
||||
(define (operator-reader lexnode)
|
||||
(lambda (c port)
|
||||
(let loop ((lexnode lexnode))
|
||||
(let ((nextc (peek-char port)))
|
||||
(let ((nextnode (table-ref (cdr lexnode) nextc)))
|
||||
(if nextnode
|
||||
(begin (read-char port)
|
||||
(loop nextnode))
|
||||
(car lexnode)))))))
|
||||
|
||||
(define (error-operator string)
|
||||
(make-operator 'invalid-operator #f #f
|
||||
(lambda rest (error "invalid operator" string))
|
||||
#f))
|
||||
|
||||
; Mumble
|
||||
|
||||
(define end-of-input-operator
|
||||
(make-operator "end of input" -1 #f premterm-err #f))
|
||||
|
||||
(define (port->stream port ltab)
|
||||
(define (really-get)
|
||||
(lex ltab port))
|
||||
(define peeked? #f)
|
||||
(define peek #f)
|
||||
(define (stream op arg)
|
||||
(case op
|
||||
((get) (if peeked?
|
||||
(begin (set! peeked? #f) peek)
|
||||
(really-get)))
|
||||
((peek) (if peeked?
|
||||
peek
|
||||
(begin (set! peeked? #t)
|
||||
(set! peek (really-get))
|
||||
peek)))))
|
||||
stream)
|
|
@ -1,11 +0,0 @@
|
|||
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
|
||||
|
||||
|
||||
(define (%unspecific)
|
||||
(if #f #f))
|
||||
|
||||
(define (!= x y)
|
||||
(not (= x y)))
|
||||
|
||||
(define (%tuple . rest)
|
||||
(list->vector (cons 'tuple rest)))
|
|
@ -1,213 +0,0 @@
|
|||
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
|
||||
|
||||
|
||||
; Lexer for Infix Scheme (JAR's obscure syntax)
|
||||
; Bears no relation to Pratt's CGOL
|
||||
|
||||
; To do: add ML-ish binding constructs.
|
||||
|
||||
; (sgol-read) reads an expression
|
||||
;
|
||||
; semicolon terminates input
|
||||
; comment character is # (comment goes to end of line)
|
||||
;
|
||||
; f(x, y) reads as (f x y)
|
||||
;
|
||||
; if x then y else z reads as (if x y z)
|
||||
; x and y, x or y, not x do the obvious thing
|
||||
;
|
||||
; x + y reads as (+ x y) - similarly for - * / = < > <= >=
|
||||
;
|
||||
; x::y reads as (cons x y) - ML's syntax
|
||||
; x++y reads as (append x y) - whose syntax? Haskell's?
|
||||
; [] reads as '()
|
||||
; [a, b, ...] reads as (list a b ...)
|
||||
;
|
||||
; () reads as the-unit
|
||||
; (x, y, ...) reads as (tuple x y ...)
|
||||
;
|
||||
; a[i] reads as (vector-ref a i)
|
||||
; a[i, j, ...] reads as (array-ref a i j ...)
|
||||
;
|
||||
; x := y reads as (set! x y)
|
||||
; car(x) := y reads as (set-car! x y) - similarly for cdr
|
||||
; x[y] := z reads as (vector-set! x y z) - similarly for array-ref
|
||||
;
|
||||
; 'foo' tries to read as 'foo but usually loses
|
||||
|
||||
|
||||
(define sgol-lexer-table (make-lexer-table))
|
||||
|
||||
(set-char-tokenization! (lexer-ttab sgol-lexer-table)
|
||||
#\#
|
||||
(lambda (c port)
|
||||
c ;ignored
|
||||
(gobble-line port)
|
||||
(read port))
|
||||
#t)
|
||||
|
||||
(define (gobble-line port)
|
||||
(let loop ()
|
||||
(let ((c (read-char port)))
|
||||
(cond ((eof-object? c) c)
|
||||
((char=? c #\newline) #f)
|
||||
(else (loop))))))
|
||||
|
||||
;
|
||||
|
||||
(define (define-sgol-keyword name op)
|
||||
(define-keyword sgol-lexer-table name op))
|
||||
|
||||
(define (define-sgol-punctuation string op)
|
||||
(define-punctuation sgol-lexer-table string op))
|
||||
|
||||
; Arguments to make-operator are: name lbp rbp nud led
|
||||
|
||||
(define (open-paren-nud token stream)
|
||||
(let ((right (prsmatch close-paren-operator stream)))
|
||||
(if (null? right)
|
||||
'the-unit ; ()
|
||||
(if (null? (cdr right))
|
||||
(car right) ; (x)
|
||||
(cons 'tuple right))))) ; (x, y, ..., z)
|
||||
|
||||
; f(x, y) reads as (f x y)
|
||||
; f((x, y)) reads as (f (tuple x y))
|
||||
|
||||
(define (open-paren-led token left stream)
|
||||
(cons left (prsmatch close-paren-operator stream)))
|
||||
|
||||
(define-sgol-punctuation "("
|
||||
(make-operator 'open-paren 200 #f open-paren-nud open-paren-led))
|
||||
|
||||
(define-sgol-punctuation "," comma-operator)
|
||||
|
||||
(define close-paren-operator
|
||||
(make-operator 'close-paren 5 #f delim-error erb-error))
|
||||
(define-sgol-punctuation ")" close-paren-operator)
|
||||
|
||||
; Boolean operators
|
||||
|
||||
(define-sgol-keyword 'true '#t)
|
||||
(define-sgol-keyword 'false '#f)
|
||||
|
||||
(define-sgol-keyword 'if if-operator)
|
||||
(define-sgol-keyword 'then then-operator)
|
||||
(define-sgol-keyword 'else else-operator)
|
||||
|
||||
(define-sgol-keyword 'not (make-operator 'not 70 70 parse-prefix #f))
|
||||
(define-sgol-keyword 'and (make-operator 'and 65 #f #f parse-nary))
|
||||
(define-sgol-keyword 'or (make-operator 'or 60 #f #f parse-nary))
|
||||
|
||||
; Lists
|
||||
|
||||
(define (open-bracket-nud token stream)
|
||||
(let ((elements (prsmatch close-bracket-operator stream)))
|
||||
(if (null? elements)
|
||||
`'()
|
||||
`(list ,@elements))))
|
||||
|
||||
(define (open-bracket-led token left stream)
|
||||
(let ((subscripts (prsmatch close-bracket-operator stream)))
|
||||
(if (and (not (null? subscripts))
|
||||
(null? (cdr subscripts)))
|
||||
`(vector-ref ,left ,@subscripts)
|
||||
`(array-ref ,left ,@subscripts))))
|
||||
|
||||
(define-sgol-punctuation "["
|
||||
(make-operator 'open-bracket 200 #f open-bracket-nud open-bracket-led))
|
||||
|
||||
(define close-bracket-operator
|
||||
(make-operator 'close-bracket 5 #f delim-error erb-error))
|
||||
(define-sgol-punctuation "]" close-bracket-operator)
|
||||
|
||||
(define-sgol-punctuation "::"
|
||||
(make-operator 'cons 75 74 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation "++"
|
||||
(make-operator 'append 75 74 #f parse-nary))
|
||||
|
||||
; Quotation
|
||||
|
||||
(define-sgol-punctuation "'"
|
||||
(make-operator 'quote 5 #f parse-matchfix #f)) ;This isn't right
|
||||
|
||||
; Arithmetic
|
||||
|
||||
(define-sgol-punctuation "+"
|
||||
(make-operator '+ 100 100 parse-prefix parse-infix))
|
||||
|
||||
(define-sgol-punctuation "-"
|
||||
(make-operator '- 100 100 parse-prefix parse-infix))
|
||||
|
||||
(define-sgol-punctuation "*"
|
||||
(make-operator '* 120 120 #f parse-infix)) ;should be parse-nary
|
||||
|
||||
(define-sgol-punctuation "/"
|
||||
(make-operator '/ 120 120 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation "="
|
||||
(make-operator '= 80 80 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation ">"
|
||||
(make-operator '> 80 80 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation "<"
|
||||
(make-operator '< 80 80 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation ">="
|
||||
(make-operator '>= 80 80 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation "<="
|
||||
(make-operator '<= 80 80 #f parse-infix))
|
||||
|
||||
(define-sgol-punctuation "!="
|
||||
(make-operator '!= 80 80 #f parse-infix))
|
||||
|
||||
; Side effects
|
||||
|
||||
(define (:=-led token left stream)
|
||||
(let* ((form (parse-infix token left stream))
|
||||
(lhs (cadr form))
|
||||
(rhs (caddr form)))
|
||||
(if (pair? lhs)
|
||||
(case (car lhs)
|
||||
((car) `(set-car! ,@(cdr lhs) ,rhs))
|
||||
((cdr) `(set-cdr! ,@(cdr lhs) ,rhs))
|
||||
((vector-ref) `(vector-set! ,@(cdr lhs) ,rhs))
|
||||
((array-ref) `(array-set! ,@(cdr lhs) ,rhs))
|
||||
(else (error "invalid LHS for :=" form)))
|
||||
form)))
|
||||
|
||||
(define-sgol-punctuation ":="
|
||||
(make-operator 'set! 70 #f #f :=-led))
|
||||
|
||||
; End of input...
|
||||
|
||||
(define-sgol-punctuation ";" end-of-input-operator)
|
||||
|
||||
; Read using Pratt parser with SGOL tokenizer table
|
||||
|
||||
(define (sgol-read . port-option)
|
||||
(toplevel-parse (port->stream (if (null? port-option)
|
||||
(current-input-port)
|
||||
(car port-option))
|
||||
sgol-lexer-table)))
|
||||
|
||||
; Read/print loop
|
||||
|
||||
(define (rpl)
|
||||
(let ((thing (sgol-read)))
|
||||
(if (not (eq? thing end-of-input-operator))
|
||||
(begin (write thing)
|
||||
(newline)
|
||||
(rpl)))))
|
||||
|
||||
; Read/eval/print loop
|
||||
|
||||
(define (rpl)
|
||||
(let ((thing (sgol-read)))
|
||||
(if (not (eq? thing end-of-input-operator))
|
||||
(begin (write thing)
|
||||
(newline)
|
||||
(rpl)))))
|
|
@ -1,154 +0,0 @@
|
|||
; -*- Mode: Scheme; Syntax: Scheme; Package: Scheme; -*-
|
||||
; Copyright (c) 1993-1999 by Richard Kelsey and Jonathan Rees. See file COPYING.
|
||||
|
||||
|
||||
; A tokenizer.
|
||||
|
||||
; Nonstandard things needed:
|
||||
; record package
|
||||
; char->ascii
|
||||
; peek-char
|
||||
; reverse-list->string
|
||||
; error
|
||||
|
||||
(define (reverse-list->string l n)
|
||||
(list->string (reverse l)))
|
||||
|
||||
; Tokenizer tables
|
||||
|
||||
(define tokenizer-table-type
|
||||
(make-record-type 'tokenizer-table
|
||||
'(translation dispatch-vector terminating?-vector)))
|
||||
|
||||
(define make-tokenizer-table
|
||||
(let ()
|
||||
(define make
|
||||
(record-constructor tokenizer-table-type
|
||||
'(translation dispatch-vector terminating?-vector)))
|
||||
(define (make-tokenizer-table)
|
||||
(make (if (char=? (string-ref (symbol->string 't) 0) #\T)
|
||||
char-upcase
|
||||
char-downcase)
|
||||
(make-vector 256 (lambda (c port)
|
||||
(error "illegal character read" c)))
|
||||
(make-vector 256 #t)))
|
||||
make-tokenizer-table))
|
||||
|
||||
(define ttab-translation
|
||||
(record-accessor tokenizer-table-type 'translation))
|
||||
(define ttab-dispatch-vector
|
||||
(record-accessor tokenizer-table-type 'dispatch-vector))
|
||||
(define ttab-terminating?-vector
|
||||
(record-accessor tokenizer-table-type 'terminating?-vector))
|
||||
|
||||
(define set-tokenizer-table-translator!
|
||||
(record-modifier tokenizer-table-type 'translation))
|
||||
|
||||
(define (set-char-tokenization! ttab char reader term?)
|
||||
(vector-set! (ttab-dispatch-vector ttab) (char->ascii char) reader)
|
||||
(vector-set! (ttab-terminating?-vector ttab) (char->ascii char) term?))
|
||||
|
||||
; Main dispatch
|
||||
|
||||
(define (tokenize ttab port)
|
||||
(let ((c (read-char port)))
|
||||
(if (eof-object? c)
|
||||
c
|
||||
((vector-ref (ttab-dispatch-vector ttab) (char->ascii c))
|
||||
c port))))
|
||||
|
||||
; Atoms (symbols and numbers)
|
||||
|
||||
(define (scan-atom c ttab port)
|
||||
(let ((translate (ttab-translation ttab)))
|
||||
(let loop ((l (list (translate c))) (n 1))
|
||||
(let ((c (peek-char port)))
|
||||
(cond ((or (eof-object? c)
|
||||
(vector-ref (ttab-terminating?-vector ttab)
|
||||
(char->ascii c)))
|
||||
(reverse-list->string l n))
|
||||
(else
|
||||
(loop (cons (translate (read-char port)) l)
|
||||
(+ n 1))))))))
|
||||
|
||||
; Allow ->foo, -v-, etc.
|
||||
|
||||
(define (parse-atom string)
|
||||
(let ((c (string-ref string 0)))
|
||||
(cond ((char=? c #\+)
|
||||
(parse-possible-number string))
|
||||
((char=? c #\-)
|
||||
(parse-possible-number string))
|
||||
((char=? c #\.)
|
||||
(parse-possible-number string))
|
||||
(else
|
||||
(if (char-numeric? c)
|
||||
(parse-number string)
|
||||
(string->symbol string))))))
|
||||
|
||||
; First char is + - .
|
||||
|
||||
(define (parse-possible-number string)
|
||||
(if (and (> (string-length string) 1)
|
||||
(char-numeric? (string-ref string 1)))
|
||||
(parse-number string)
|
||||
(string->symbol string)))
|
||||
|
||||
(define (parse-number string)
|
||||
(or (string->number string 'e 'd)
|
||||
(error "unsupported number syntax" string)))
|
||||
|
||||
|
||||
; Usual stuff (what you'd expect to be common to Scheme and ML syntax)
|
||||
|
||||
(define (set-up-usual-tokenization! ttab)
|
||||
|
||||
(define (tokenize-whitespace c port) c ;ignored
|
||||
(tokenize ttab port))
|
||||
|
||||
(define (tokenize-constituent c port)
|
||||
(parse-atom (scan-atom c ttab port)))
|
||||
|
||||
(for-each (lambda (c)
|
||||
(set-char-tokenization! ttab (ascii->char c)
|
||||
tokenize-whitespace #t))
|
||||
ascii-whitespaces)
|
||||
|
||||
(for-each (lambda (c)
|
||||
(set-char-tokenization! ttab c tokenize-constituent #f))
|
||||
(string->list
|
||||
(string-append ".0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz")))
|
||||
|
||||
(set-char-tokenization! ttab #\" tokenize-string #t)
|
||||
|
||||
)
|
||||
|
||||
(define (make-constituent! c ttab)
|
||||
(set-char-tokenization! ttab c
|
||||
(lambda (c port)
|
||||
(parse-atom (scan-atom c ttab port)))
|
||||
#f))
|
||||
|
||||
(define (tokenize-string c port) c ;ignored
|
||||
(let loop ((l '()) (i 0))
|
||||
(let ((c (read-char port)))
|
||||
(cond ((eof-object? c)
|
||||
(error "end of file within a string"))
|
||||
((char=? c #\\)
|
||||
(let ((c (read-char port)))
|
||||
(if (or (char=? c #\\) (char=? c #\"))
|
||||
(loop (cons c l) (+ i 1))
|
||||
(error "invalid escaped character in string" c))))
|
||||
((char=? c #\") (reverse-list->string l i))
|
||||
(else (loop (cons c l) (+ i 1)))))))
|
||||
|
||||
; Auxiliary for parse-atom and tokenize-string
|
||||
|
||||
;(define (reverse-list->string l n) ;In microcode?
|
||||
; (let ((s (make-string n)))
|
||||
; (do ((l l (cdr l))
|
||||
; (i (- n 1) (- i 1)))
|
||||
; ((< i 0) s)
|
||||
; (string-set! s i (car l)))))
|
Loading…
Reference in New Issue