;;; EARLEY -- Earley's parser, written by Marc Feeley.

; (make-parser grammar lexer) is used to create a parser from the grammar
; description `grammar' and the lexer function `lexer'.
;
; A grammar is a list of definitions.  Each definition defines a non-terminal
; by a set of rules.  Thus a definition has the form: (nt rule1 rule2...).
; A given non-terminal can only be defined once.  The first non-terminal
; defined is the grammar's goal.  Each rule is a possibly empty list of
; non-terminals.  Thus a rule has the form: (nt1 nt2...).  A non-terminal
; can be any scheme value.  Note that all grammar symbols are treated as
; non-terminals.  This is fine though because the lexer will be outputing
; non-terminals.
;
; The lexer defines what a token is and the mapping between tokens and
; the grammar's non-terminals.  It is a function of one argument, the input,
; that returns the list of tokens corresponding to the input.  Each token is
; represented by a list.  The first element is some `user-defined' information
; associated with the token and the rest represents the token's class(es) (as a
; list of non-terminals that this token corresponds to).
;
; The result of `make-parser' is a function that parses the single input it
; is given into the grammar's goal.  The result is a `parse' which can be
; manipulated with the procedures: `parse->parsed?', `parse->trees'
; and `parse->nb-trees' (see below).
;
; Let's assume that we want a parser for the grammar
;
;  S -> x = E
;  E -> E + E | V
;  V -> V y |
;
; and that the input to the parser is a string of characters.  Also, assume we
; would like to map the characters `x', `y', `+' and `=' into the corresponding
; non-terminals in the grammar.  Such a parser could be created with
;
; (make-parser
;   '(
;      (s (x = e))
;      (e (e + e) (v))
;      (v (v y) ())
;    )
;   (lambda (str)
;     (map (lambda (char)
;            (list char ; user-info = the character itself
;                  (case char
;                    ((#\x) 'x)
;                    ((#\y) 'y)
;                    ((#\+) '+)
;                    ((#\=) '=)
;                    (else (fatal-error "lexer error")))))
;          (string->list str)))
; )
;
; An alternative definition (that does not check for lexical errors) is
;
; (make-parser
;   '(
;      (s (#\x #\= e))
;      (e (e #\+ e) (v))
;      (v (v #\y) ())
;    )
;   (lambda (str) (map (lambda (char) (list char char)) (string->list str)))
; )
;
; To help with the rest of the discussion, here are a few definitions:
;
; An input pointer (for an input of `n' tokens) is a value between 0 and `n'.
; It indicates a point between two input tokens (0 = beginning, `n' = end).
; For example, if `n' = 4, there are 5 input pointers:
;
;   input                   token1     token2     token3     token4
;   input pointers       0          1          2          3          4
;
; A configuration indicates the extent to which a given rule is parsed (this
; is the common `dot notation').  For simplicity, a configuration is
; represented as an integer, with successive configurations in the same
; rule associated with successive integers.  It is assumed that the grammar
; has been extended with rules to aid scanning.  These rules are of the
; form `nt ->', and there is one such rule for every non-terminal.  Note
; that these rules are special because they only apply when the corresponding
; non-terminal is returned by the lexer.
;
; A configuration set is a configuration grouped with the set of input pointers
; representing where the head non-terminal of the configuration was predicted.
;
; Here are the rules and configurations for the grammar given above:
;
;  S -> .         \
;       0          |
;  x -> .          |
;       1          |
;  = -> .          |
;       2          |
;  E -> .          |
;       3           > special rules (for scanning)
;  + -> .          |
;       4          |
;  V -> .          |
;       5          |
;  y -> .          |
;       6         /
;  S -> .  x  .  =  .  E  .
;       7     8     9     10
;  E -> .  E  .  +  .  E  .
;       11    12    13    14
;  E -> .  V  .
;       15    16
;  V -> .  V  .  y  .
;       17    18    19
;  V -> .
;       20
;
; Starters of the non-terminal `nt' are configurations that are leftmost
; in a non-special rule for `nt'.  Enders of the non-terminal `nt' are
; configurations that are rightmost in any rule for `nt'.  Predictors of the
; non-terminal `nt' are configurations that are directly to the left of `nt'
; in any rule.
;
; For the grammar given above,
;
;   Starters of V   = (17 20)
;   Enders of V     = (5 19 20)
;   Predictors of V = (15 17)

(library (rnrs-benchmarks earley)
  (export main)
  (import (rnrs) (rnrs-benchmarks))
  
  (define (make-parser grammar lexer)
  
    (define (non-terminals grammar) ; return vector of non-terminals in grammar
  
      (define (add-nt nt nts)
        (if (member nt nts) nts (cons nt nts))) ; use equal? for equality tests
  
      (let def-loop ((defs grammar) (nts '()))
        (if (pair? defs)
          (let* ((def (car defs))
                 (head (car def)))
            (let rule-loop ((rules (cdr def))
                            (nts (add-nt head nts)))
              (if (pair? rules)
                (let ((rule (car rules)))
                  (let loop ((l rule) (nts nts))
                    (if (pair? l)
                      (let ((nt (car l)))
                        (loop (cdr l) (add-nt nt nts)))
                      (rule-loop (cdr rules) nts))))
                (def-loop (cdr defs) nts))))
          (list->vector (reverse nts))))) ; goal non-terminal must be at index 0
  
    (define (ind nt nts) ; return index of non-terminal `nt' in `nts'
      (let loop ((i (- (vector-length nts) 1)))
        (if (>= i 0)
          (if (equal? (vector-ref nts i) nt) i (loop (- i 1)))
          #f)))
  
    (define (nb-configurations grammar) ; return nb of configurations in grammar
      (let def-loop ((defs grammar) (nb-confs 0))
        (if (pair? defs)
          (let ((def (car defs)))
            (let rule-loop ((rules (cdr def)) (nb-confs nb-confs))
              (if (pair? rules)
                (let ((rule (car rules)))
                  (let loop ((l rule) (nb-confs nb-confs))
                    (if (pair? l)
                      (loop (cdr l) (+ nb-confs 1))
                      (rule-loop (cdr rules) (+ nb-confs 1)))))
                (def-loop (cdr defs) nb-confs))))
        nb-confs)))
  
  ; First, associate a numeric identifier to every non-terminal in the
  ; grammar (with the goal non-terminal associated with 0).
  ;
  ; So, for the grammar given above we get:
  ;
  ; s -> 0   x -> 1   = -> 4   e ->3    + -> 4   v -> 5   y -> 6
  
    (let* ((nts (non-terminals grammar))          ; id map = list of non-terms
           (nb-nts (vector-length nts))           ; the number of non-terms
           (nb-confs (+ (nb-configurations grammar) nb-nts)) ; the nb of confs
           (starters (make-vector nb-nts '()))    ; starters for every non-term
           (enders (make-vector nb-nts '()))      ; enders for every non-term
           (predictors (make-vector nb-nts '()))  ; predictors for every non-term
           (steps (make-vector nb-confs #f))      ; what to do in a given conf
           (names (make-vector nb-confs #f)))     ; name of rules
  
      (define (setup-tables grammar nts starters enders predictors steps names)
  
        (define (add-conf conf nt nts class)
          (let ((i (ind nt nts)))
            (vector-set! class i (cons conf (vector-ref class i)))))
  
        (let ((nb-nts (vector-length nts)))
  
          (let nt-loop ((i (- nb-nts 1)))
            (if (>= i 0)
              (begin
                (vector-set! steps i (- i nb-nts))
                (vector-set! names i (list (vector-ref nts i) 0))
                (vector-set! enders i (list i))
                (nt-loop (- i 1)))))
  
          (let def-loop ((defs grammar) (conf (vector-length nts)))
            (if (pair? defs)
              (let* ((def (car defs))
                     (head (car def)))
                (let rule-loop ((rules (cdr def)) (conf conf) (rule-num 1))
                  (if (pair? rules)
                    (let ((rule (car rules)))
                      (vector-set! names conf (list head rule-num))
                      (add-conf conf head nts starters)
                      (let loop ((l rule) (conf conf))
                        (if (pair? l)
                          (let ((nt (car l)))
                            (vector-set! steps conf (ind nt nts))
                            (add-conf conf nt nts predictors)
                            (loop (cdr l) (+ conf 1)))
                          (begin
                            (vector-set! steps conf (- (ind head nts) nb-nts))
                            (add-conf conf head nts enders)
                            (rule-loop (cdr rules) (+ conf 1) (+ rule-num 1))))))
                    (def-loop (cdr defs) conf))))))))
  
  ; Now, for each non-terminal, compute the starters, enders and predictors and
  ; the names and steps tables.
  
      (setup-tables grammar nts starters enders predictors steps names)
  
  ; Build the parser description
  
      (let ((parser-descr (vector lexer
                                  nts
                                  starters
                                  enders
                                  predictors
                                  steps
                                  names)))
        (lambda (input)
  
          (define (ind nt nts) ; return index of non-terminal `nt' in `nts'
            (let loop ((i (- (vector-length nts) 1)))
              (if (>= i 0)
                (if (equal? (vector-ref nts i) nt) i (loop (- i 1)))
                #f)))
  
          (define (comp-tok tok nts) ; transform token to parsing format
            (let loop ((l1 (cdr tok)) (l2 '()))
              (if (pair? l1)
                (let ((i (ind (car l1) nts)))
                  (if i
                    (loop (cdr l1) (cons i l2))
                    (loop (cdr l1) l2)))
                (cons (car tok) (reverse l2)))))
  
          (define (input->tokens input lexer nts)
            (list->vector (map (lambda (tok) (comp-tok tok nts)) (lexer input))))
  
          (define (make-states nb-toks nb-confs)
            (let ((states (make-vector (+ nb-toks 1) #f)))
              (let loop ((i nb-toks))
                (if (>= i 0)
                  (let ((v (make-vector (+ nb-confs 1) #f)))
                    (vector-set! v 0 -1)
                    (vector-set! states i v)
                    (loop (- i 1)))
                  states))))
  
          (define (conf-set-get state conf)
            (vector-ref state (+ conf 1)))
  
          (define (conf-set-get* state state-num conf)
            (let ((conf-set (conf-set-get state conf)))
              (if conf-set
                conf-set
                (let ((conf-set (make-vector (+ state-num 6) #f)))
                  (vector-set! conf-set 1 -3) ; old elems tail (points to head)
                  (vector-set! conf-set 2 -1) ; old elems head
                  (vector-set! conf-set 3 -1) ; new elems tail (points to head)
                  (vector-set! conf-set 4 -1) ; new elems head
                  (vector-set! state (+ conf 1) conf-set)
                  conf-set))))
  
          (define (conf-set-merge-new! conf-set)
            (vector-set! conf-set
              (+ (vector-ref conf-set 1) 5)
              (vector-ref conf-set 4))
            (vector-set! conf-set 1 (vector-ref conf-set 3))
            (vector-set! conf-set 3 -1)
            (vector-set! conf-set 4 -1))
  
          (define (conf-set-head conf-set)
            (vector-ref conf-set 2))
  
          (define (conf-set-next conf-set i)
            (vector-ref conf-set (+ i 5)))
  
          (define (conf-set-member? state conf i)
            (let ((conf-set (vector-ref state (+ conf 1))))
              (if conf-set
                (conf-set-next conf-set i)
                #f)))
  
          (define (conf-set-adjoin state conf-set conf i)
            (let ((tail (vector-ref conf-set 3))) ; put new element at tail
              (vector-set! conf-set (+ i 5) -1)
              (vector-set! conf-set (+ tail 5) i)
              (vector-set! conf-set 3 i)
              (if (< tail 0)
                (begin
                  (vector-set! conf-set 0 (vector-ref state 0))
                  (vector-set! state 0 conf)))))
  
          (define (conf-set-adjoin* states state-num l i)
            (let ((state (vector-ref states state-num)))
              (let loop ((l1 l))
                (if (pair? l1)
                  (let* ((conf (car l1))
                         (conf-set (conf-set-get* state state-num conf)))
                    (if (not (conf-set-next conf-set i))
                      (begin
                        (conf-set-adjoin state conf-set conf i)
                        (loop (cdr l1)))
                      (loop (cdr l1))))))))
  
          (define (conf-set-adjoin** states states* state-num conf i)
            (let ((state (vector-ref states state-num)))
              (if (conf-set-member? state conf i)
                (let* ((state* (vector-ref states* state-num))
                       (conf-set* (conf-set-get* state* state-num conf)))
                  (if (not (conf-set-next conf-set* i))
                    (conf-set-adjoin state* conf-set* conf i))
                  #t)
                #f)))
  
          (define (conf-set-union state conf-set conf other-set)
            (let loop ((i (conf-set-head other-set)))
              (if (>= i 0)
                (if (not (conf-set-next conf-set i))
                  (begin
                    (conf-set-adjoin state conf-set conf i)
                    (loop (conf-set-next other-set i)))
                  (loop (conf-set-next other-set i))))))
  
          (define (forw states state-num starters enders predictors steps nts)
  
            (define (predict state state-num conf-set conf nt starters enders)
  
              ; add configurations which start the non-terminal `nt' to the
              ; right of the dot
  
              (let loop1 ((l (vector-ref starters nt)))
                (if (pair? l)
                  (let* ((starter (car l))
                         (starter-set (conf-set-get* state state-num starter)))
                    (if (not (conf-set-next starter-set state-num))
                      (begin
                        (conf-set-adjoin state starter-set starter state-num)
                        (loop1 (cdr l)))
                      (loop1 (cdr l))))))
  
              ; check for possible completion of the non-terminal `nt' to the
              ; right of the dot
  
              (let loop2 ((l (vector-ref enders nt)))
                (if (pair? l)
                  (let ((ender (car l)))
                    (if (conf-set-member? state ender state-num)
                      (let* ((next (+ conf 1))
                             (next-set (conf-set-get* state state-num next)))
                        (conf-set-union state next-set next conf-set)
                        (loop2 (cdr l)))
                      (loop2 (cdr l)))))))
  
            (define (reduce states state state-num conf-set head preds)
  
              ; a non-terminal is now completed so check for reductions that
              ; are now possible at the configurations `preds'
  
              (let loop1 ((l preds))
                (if (pair? l)
                  (let ((pred (car l)))
                    (let loop2 ((i head))
                      (if (>= i 0)
                        (let ((pred-set (conf-set-get (vector-ref states i) pred)))
                          (if pred-set
                            (let* ((next (+ pred 1))
                                   (next-set (conf-set-get* state state-num next)))
                              (conf-set-union state next-set next pred-set)))
                          (loop2 (conf-set-next conf-set i)))
                        (loop1 (cdr l))))))))
  
            (let ((state (vector-ref states state-num))
                  (nb-nts (vector-length nts)))
              (let loop ()
                (let ((conf (vector-ref state 0)))
                  (if (>= conf 0)
                    (let* ((step (vector-ref steps conf))
                           (conf-set (vector-ref state (+ conf 1)))
                           (head (vector-ref conf-set 4)))
                      (vector-set! state 0 (vector-ref conf-set 0))
                      (conf-set-merge-new! conf-set)
                      (if (>= step 0)
                        (predict state state-num conf-set conf step starters enders)
                        (let ((preds (vector-ref predictors (+ step nb-nts))))
                          (reduce states state state-num conf-set head preds)))
                      (loop)))))))
  
          (define (forward starters enders predictors steps nts toks)
            (let* ((nb-toks (vector-length toks))
                   (nb-confs (vector-length steps))
                   (states (make-states nb-toks nb-confs))
                   (goal-starters (vector-ref starters 0)))
              (conf-set-adjoin* states 0 goal-starters 0) ; predict goal
              (forw states 0 starters enders predictors steps nts)
              (let loop ((i 0))
                (if (< i nb-toks)
                  (let ((tok-nts (cdr (vector-ref toks i))))
                    (conf-set-adjoin* states (+ i 1) tok-nts i) ; scan token
                    (forw states (+ i 1) starters enders predictors steps nts)
                    (loop (+ i 1)))))
              states))
  
          (define (produce conf i j enders steps toks states states* nb-nts)
            (let ((prev (- conf 1)))
              (if (and (>= conf nb-nts) (>= (vector-ref steps prev) 0))
                (let loop1 ((l (vector-ref enders (vector-ref steps prev))))
                  (if (pair? l)
                    (let* ((ender (car l))
                           (ender-set (conf-set-get (vector-ref states j)
                                                    ender)))
                      (if ender-set
                        (let loop2 ((k (conf-set-head ender-set)))
                          (if (>= k 0)
                            (begin
                              (and (>= k i)
                                   (conf-set-adjoin** states states* k prev i)
                                   (conf-set-adjoin** states states* j ender k))
                              (loop2 (conf-set-next ender-set k)))
                            (loop1 (cdr l))))
                        (loop1 (cdr l)))))))))
  
          (define (back states states* state-num enders steps nb-nts toks)
            (let ((state* (vector-ref states* state-num)))
              (let loop1 ()
                (let ((conf (vector-ref state* 0)))
                  (if (>= conf 0)
                    (let* ((conf-set (vector-ref state* (+ conf 1)))
                           (head (vector-ref conf-set 4)))
                      (vector-set! state* 0 (vector-ref conf-set 0))
                      (conf-set-merge-new! conf-set)
                      (let loop2 ((i head))
                        (if (>= i 0)
                          (begin
                            (produce conf i state-num enders steps
                                     toks states states* nb-nts)
                            (loop2 (conf-set-next conf-set i)))
                          (loop1)))))))))
  
          (define (backward states enders steps nts toks)
            (let* ((nb-toks (vector-length toks))
                   (nb-confs (vector-length steps))
                   (nb-nts (vector-length nts))
                   (states* (make-states nb-toks nb-confs))
                   (goal-enders (vector-ref enders 0)))
              (let loop1 ((l goal-enders))
                (if (pair? l)
                  (let ((conf (car l)))
                    (conf-set-adjoin** states states* nb-toks conf 0)
                    (loop1 (cdr l)))))
              (let loop2 ((i nb-toks))
                (if (>= i 0)
                  (begin
                    (back states states* i enders steps nb-nts toks)
                    (loop2 (- i 1)))))
              states*))
  
          (define (parsed? nt i j nts enders states)
            (let ((nt* (ind nt nts)))
              (if nt*
                (let ((nb-nts (vector-length nts)))
                  (let loop ((l (vector-ref enders nt*)))
                    (if (pair? l)
                      (let ((conf (car l)))
                        (if (conf-set-member? (vector-ref states j) conf i)
                          #t
                          (loop (cdr l))))
                      #f)))
                #f)))
  
          (define (deriv-trees conf i j enders steps names toks states nb-nts)
            (let ((name (vector-ref names conf)))
  
              (if name ; `conf' is at the start of a rule (either special or not)
                (if (< conf nb-nts)
                  (list (list name (car (vector-ref toks i))))
                  (list (list name)))
  
                (let ((prev (- conf 1)))
                  (let loop1 ((l1 (vector-ref enders (vector-ref steps prev)))
                              (l2 '()))
                    (if (pair? l1)
                      (let* ((ender (car l1))
                             (ender-set (conf-set-get (vector-ref states j)
                                                      ender)))
                        (if ender-set
                          (let loop2 ((k (conf-set-head ender-set)) (l2 l2))
                            (if (>= k 0)
                              (if (and (>= k i)
                                       (conf-set-member? (vector-ref states k)
                                                         prev i))
                                (let ((prev-trees
                                        (deriv-trees prev i k enders steps names
                                                     toks states nb-nts))
                                      (ender-trees
                                        (deriv-trees ender k j enders steps names
                                                     toks states nb-nts)))
                                  (let loop3 ((l3 ender-trees) (l2 l2))
                                    (if (pair? l3)
                                      (let ((ender-tree (list (car l3))))
                                        (let loop4 ((l4 prev-trees) (l2 l2))
                                          (if (pair? l4)
                                            (loop4 (cdr l4)
                                                   (cons (append (car l4)
                                                                 ender-tree)
                                                         l2))
                                            (loop3 (cdr l3) l2))))
                                      (loop2 (conf-set-next ender-set k) l2))))
                                (loop2 (conf-set-next ender-set k) l2))
                              (loop1 (cdr l1) l2)))
                          (loop1 (cdr l1) l2)))
                      l2))))))
  
          (define (deriv-trees* nt i j nts enders steps names toks states)
            (let ((nt* (ind nt nts)))
              (if nt*
                (let ((nb-nts (vector-length nts)))
                  (let loop ((l (vector-ref enders nt*)) (trees '()))
                    (if (pair? l)
                      (let ((conf (car l)))
                        (if (conf-set-member? (vector-ref states j) conf i)
                          (loop (cdr l)
                                (append (deriv-trees conf i j enders steps names
                                                     toks states nb-nts)
                                        trees))
                          (loop (cdr l) trees)))
                      trees)))
                #f)))
  
          (define (nb-deriv-trees conf i j enders steps toks states nb-nts)
            (let ((prev (- conf 1)))
              (if (or (< conf nb-nts) (< (vector-ref steps prev) 0))
                1
                (let loop1 ((l (vector-ref enders (vector-ref steps prev)))
                            (n 0))
                  (if (pair? l)
                    (let* ((ender (car l))
                           (ender-set (conf-set-get (vector-ref states j)
                                                    ender)))
                      (if ender-set
                        (let loop2 ((k (conf-set-head ender-set)) (n n))
                          (if (>= k 0)
                            (if (and (>= k i)
                                     (conf-set-member? (vector-ref states k)
                                                       prev i))
                              (let ((nb-prev-trees
                                      (nb-deriv-trees prev i k enders steps
                                                      toks states nb-nts))
                                    (nb-ender-trees
                                      (nb-deriv-trees ender k j enders steps
                                                      toks states nb-nts)))
                                (loop2 (conf-set-next ender-set k)
                                       (+ n (* nb-prev-trees nb-ender-trees))))
                              (loop2 (conf-set-next ender-set k) n))
                            (loop1 (cdr l) n)))
                        (loop1 (cdr l) n)))
                    n)))))
  
          (define (nb-deriv-trees* nt i j nts enders steps toks states)
            (let ((nt* (ind nt nts)))
              (if nt*
                (let ((nb-nts (vector-length nts)))
                  (let loop ((l (vector-ref enders nt*)) (nb-trees 0))
                    (if (pair? l)
                      (let ((conf (car l)))
                        (if (conf-set-member? (vector-ref states j) conf i)
                          (loop (cdr l)
                                (+ (nb-deriv-trees conf i j enders steps
                                                   toks states nb-nts)
                                   nb-trees))
                          (loop (cdr l) nb-trees)))
                      nb-trees)))
                #f)))
  
          (let* ((lexer      (vector-ref parser-descr 0))
                 (nts        (vector-ref parser-descr 1))
                 (starters   (vector-ref parser-descr 2))
                 (enders     (vector-ref parser-descr 3))
                 (predictors (vector-ref parser-descr 4))
                 (steps      (vector-ref parser-descr 5))
                 (names      (vector-ref parser-descr 6))
                 (toks       (input->tokens input lexer nts)))
  
            (vector nts
                    starters
                    enders
                    predictors
                    steps
                    names
                    toks
                    (backward (forward starters enders predictors steps nts toks)
                              enders steps nts toks)
                    parsed?
                    deriv-trees*
                    nb-deriv-trees*))))))
  
  (define (parse->parsed? parse nt i j)
    (let* ((nts     (vector-ref parse 0))
           (enders  (vector-ref parse 2))
           (states  (vector-ref parse 7))
           (parsed? (vector-ref parse 8)))
      (parsed? nt i j nts enders states)))
  
  (define (parse->trees parse nt i j)
    (let* ((nts          (vector-ref parse 0))
           (enders       (vector-ref parse 2))
           (steps        (vector-ref parse 4))
           (names        (vector-ref parse 5))
           (toks         (vector-ref parse 6))
           (states       (vector-ref parse 7))
           (deriv-trees* (vector-ref parse 9)))
      (deriv-trees* nt i j nts enders steps names toks states)))
  
  (define (parse->nb-trees parse nt i j)
    (let* ((nts             (vector-ref parse 0))
           (enders          (vector-ref parse 2))
           (steps           (vector-ref parse 4))
           (toks            (vector-ref parse 6))
           (states          (vector-ref parse 7))
           (nb-deriv-trees* (vector-ref parse 10)))
      (nb-deriv-trees* nt i j nts enders steps toks states)))
  
  (define (test)
    (let ((p (make-parser '( (s (a) (s s)) )
                          (lambda (l) (map (lambda (x) (list x x)) l)))))
      (let ((x (p '(a a a a a a a a a))))
        (length (parse->trees x 's 0 9)))))
  
  (define (main . args)
    (run-benchmark
      "earley"
      earley-iters
      (lambda (result) (equal? result 1430))
      (lambda () (lambda () (test))))))