; -*- Mode: Scheme; -*-
;
; A simple Pratt-Parser for SIOD: 2-FEB-90, George Carrette, GJC@PARADIGM.COM
; Siod may be obtained by anonymous FTP to world.std.com:pub/gjc.
;
; *                      COPYRIGHT (c) 1988-1994 BY                          *
; *        PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS.       *
; *                         ALL RIGHTS RESERVED                              *
;
;Permission to use, copy, modify, distribute and sell this software
;and its documentation for any purpose and without fee is hereby
;granted, provided that the above copyright notice appear in all copies
;and that both that copyright notice and this permission notice appear
;in supporting documentation, and that the name of Paradigm Associates
;Inc not be used in advertising or publicity pertaining to distribution
;of the software without specific, written prior permission.
;
;PARADIGM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
;ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
;PARADIGM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
;ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
;WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
;ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
;SOFTWARE.
;
; Based on a theory of parsing presented in:                       
;                                                                      
;  Pratt, Vaughan R., ``Top Down Operator Precedence,''         
;  ACM Symposium on Principles of Programming Languages         
;  Boston, MA; October, 1973.                                   
;                                                                      

; The following terms may be useful in deciphering this code:

; NUD -- NUll left Denotation (op has nothing to its left (prefix))
; LED -- LEft Denotation      (op has something to left (postfix or infix))

; LBP -- Left Binding Power  (the stickiness to the left)
; RBP -- Right Binding Power (the stickiness to the right)
;

; Mods for Scheme 48 by J Rees 6-14-90

; From: <gjc@mitech.com>
;
; Now a neat thing that CGOL had was a way of packaging and scoping
; different parsing contexts. The maclisp implementation was simple,
; instead of just NUD and LED and other properties there was a list
; of property indicators. And a lookup operation.
;
; One use of the local-context thing, in parsing the C language
; you can use a different binding-power for ":" depending on
; what kind of statement you are parsing, a general statement
; context where ":" means a label, a "switch" or the "if for value
; " construct of (a > b) > c : d;


(define (peek-token stream)
  (stream 'peek #f))

(define (read-token stream)
  (stream 'get #f))
   
(define (toplevel-parse stream)
  (if (eq? end-of-input-operator (peek-token stream))
      (read-token stream)
      (parse -1 stream)))


; A token is either an operator or atomic (number, identifier, etc.)

(define operator-type
  (make-record-type 'operator
		    '(name lbp rbp nud led)))

(define make-operator
  (let ()
    (define make
      (record-constructor operator-type '(name lbp rbp nud led)))
    (define (make-operator name lbp rbp nud led)
      (make name
	    (or lbp default-lbp)
	    (or rbp default-rbp)
	    (or nud default-nud)
	    (or led default-led)))
    make-operator))

(define operator? (record-predicate operator-type))

(define operator-name (record-accessor operator-type 'name))
(define operator-nud (record-accessor operator-type 'nud))
(define operator-led (record-accessor operator-type 'led))
(define operator-lbp (record-accessor operator-type 'lbp))
(define operator-rbp (record-accessor operator-type 'rbp))

(define (default-nud operator stream)
  (if (eq? (operator-led operator) default-led)
      operator
      (error 'not-a-prefix-operator operator)))

(define (nudcall token stream)
  (if (operator? token)
      ((operator-nud token) token stream)
      token))

(define default-led #f)

;+++ To do: fix this to make juxtaposition work   (f x+y)

(define (ledcall token left stream)
  ((or (and (operator? token)
	    (operator-led token))
       (error 'not-an-infix-operator token))
   token
   left
   stream))

(define default-lbp 200)

(define (lbp token)
  (if (operator? token)
      (operator-lbp token)
      default-lbp))

(define default-rbp 200)

(define (rbp token)
  (if (operator? token)
      (operator-rbp token)
      default-rbp))

(define-record-discloser operator-type
  (lambda (obj)
    (list 'operator (operator-name obj))))

; Mumble

(define (delim-error token stream)
  (error 'invalid-use-of-delimiter token))

(define (erb-error token left stream)
  (error 'too-many-right-parentheses token))

(define (premterm-err token stream)
  (error 'premature-termination-of-input token))

; Parse

(define *parse-debug* #f)

(define (parse rbp-level stream)
  (if *parse-debug* (print `(parse ,rbp-level)))
  (let parse-loop ((translation (nudcall (read-token stream) stream)))
    (if (< rbp-level (lbp (peek-token stream)))
	(parse-loop (ledcall (read-token stream) translation stream))
      (begin (if *parse-debug* (print translation))
	     translation))))

(define (print s) (write s) (newline))

(define (parse-prefix operator stream)
  (list (operator-name operator)
	(parse (rbp operator) stream)))

(define (parse-infix operator left stream)
  (list (operator-name operator)
	left
	(parse (rbp operator) stream)))

(define (parse-nary operator left stream)
  (cons (operator-name operator) (cons left (prsnary operator stream))))

(define (prsnary operator stream)
  (define (loop l)
    (if (eq? operator (peek-token stream))
	(begin (read-token stream)
	       (loop (cons (parse (rbp operator) stream) l)))
      (reverse l)))
  (loop (list (parse (rbp operator) stream))))

; Parenthesis matching, with internal commas.
; Kind of a kludge if you ask me.

(define (parse-matchfix operator stream) ; |x|
  (cons (operator-name operator)
	(prsmatch operator stream)))

(define (prsmatch close-op stream)
  (if (eq? (peek-token stream) close-op)
      (begin (read-token stream)
	     '())
      (let loop ((l (list (parse 10 stream))))
	(if (eq? (peek-token stream) close-op)
	    (begin (read-token stream)
		   (reverse l))
	    (if (eq? (peek-token stream) comma-operator)
		(begin (read-token stream)
		       (loop (cons (parse 10 stream) l)))
		(error 'comma-or-match-not-found (read-token stream)))))))

(define comma-operator (make-operator 'comma 10 #f delim-error #f))

; if A then B [else C]

(define (if-nud token stream)
  (let* ((pred (parse (rbp token) stream))
	 (then (if (eq? (peek-token stream) then-operator)
		   (parse (rbp (read-token stream)) stream)
		   (error 'missing-then pred))))
    (if (eq? (peek-token stream) else-operator)
	`(if ,pred ,then ,(parse (rbp (read-token stream)) stream))
	`(if ,pred ,then))))

(define if-operator (make-operator 'if #f 45 if-nud #f))
(define then-operator (make-operator 'then 5 25 delim-error #f))
(define else-operator (make-operator 'else 5 25 delim-error #f))

; Lexer support:

(define lexer-type
  (make-record-type 'lexer '(ttab punctab keytab)))

(define lexer-ttab    (record-accessor lexer-type 'ttab))
(define lexer-punctab (record-accessor lexer-type 'punctab))
(define lexer-keytab  (record-accessor lexer-type 'keytab))

(define make-lexer-table
  (let ((make (record-constructor lexer-type '(ttab punctab keytab))))
    (lambda ()
      (let ((ttab (make-tokenizer-table)))
	(set-up-usual-tokenization! ttab)
	(make ttab (make-table) (make-table))))))

(define (lex ltab port)
  (let ((thing (tokenize (lexer-ttab ltab) port)))
    (cond ((eof-object? thing)
	   end-of-input-operator)
	  ((symbol? thing)
	   (or (table-ref (lexer-keytab ltab) thing)
	       thing))
	  (else thing))))

; Keywords

(define (define-keyword ltab name op)
  (table-set! (lexer-keytab ltab) name op))

; Punctuation

; lexnode = (* operator (table-of char (+ lexnode #f)))  -- discrimination tree

(define (define-punctuation ltab string op)
  (let ((end (- (string-length string) 1)))
    (let loop ((i 0)
	       (table (lexer-punctab ltab)))
      (let* ((c (string-ref string i))
	     (lexnode
	      (or (table-ref table c)
		  (let ((lexnode
			 (cons (error-operator (substring string 0 (+ i 1)))
			       (make-table))))
		    (table-set! table c lexnode)
		    (if (= i 0)
			(set-char-tokenization! (lexer-ttab ltab)
						c
						(operator-reader lexnode)
						#t))
		    lexnode))))
	(if (>= i end)
	    (set-car! lexnode op)
	    (loop (+ i 1) (cdr lexnode)))))))

(define (operator-reader lexnode)
  (lambda (c port)
    (let loop ((lexnode lexnode))
      (let ((nextc (peek-char port)))
	(let ((nextnode (table-ref (cdr lexnode) nextc)))
	  (if nextnode
	      (begin (read-char port)
		     (loop nextnode))
	      (car lexnode)))))))

(define (error-operator string)
  (make-operator 'invalid-operator #f #f
		 (lambda rest (error "invalid operator" string))
		 #f))

; Mumble

(define end-of-input-operator
  (make-operator "end of input" -1 #f premterm-err #f))

(define (port->stream port ltab)
  (define (really-get)
    (lex ltab port))
  (define peeked? #f)
  (define peek #f)
  (define (stream op arg)
    (case op
      ((get) (if peeked?
		 (begin (set! peeked? #f) peek)
		 (really-get)))
      ((peek) (if peeked?
		  peek
		  (begin (set! peeked? #t)
			 (set! peek (really-get))
			 peek)))))
  stream)