;;; EARLEY -- Earley's parser, written by Marc Feeley.

; (make-parser grammar lexer) is used to create a parser from the grammar
; description `grammar' and the lexer function `lexer'.
;
; A grammar is a list of definitions.  Each definition defines a non-terminal
; by a set of rules.  Thus a definition has the form: (nt rule1 rule2...).
; A given non-terminal can only be defined once.  The first non-terminal
; defined is the grammar's goal.  Each rule is a possibly empty list of
; non-terminals.  Thus a rule has the form: (nt1 nt2...).  A non-terminal
; can be any scheme value.  Note that all grammar symbols are treated as
; non-terminals.  This is fine though because the lexer will be outputing
; non-terminals.
;
; The lexer defines what a token is and the mapping between tokens and
; the grammar's non-terminals.  It is a function of one argument, the input,
; that returns the list of tokens corresponding to the input.  Each token is
; represented by a list.  The first element is some `user-defined' information
; associated with the token and the rest represents the token's class(es) (as a
; list of non-terminals that this token corresponds to).
;
; The result of `make-parser' is a function that parses the single input it
; is given into the grammar's goal.  The result is a `parse' which can be
; manipulated with the procedures: `parse->parsed?', `parse->trees'
; and `parse->nb-trees' (see below).
;
; Let's assume that we want a parser for the grammar
;
;  S -> x = E
;  E -> E + E | V
;  V -> V y |
;
; and that the input to the parser is a string of characters.  Also, assume we
; would like to map the characters `x', `y', `+' and `=' into the corresponding
; non-terminals in the grammar.  Such a parser could be created with
;
; (make-parser
;   '(
;      (s (x = e))
;      (e (e + e) (v))
;      (v (v y) ())
;    )
;   (lambda (str)
;     (map (lambda (char)
;            (list char ; user-info = the character itself
;                  (case char
;                    ((#\x) 'x)
;                    ((#\y) 'y)
;                    ((#\+) '+)
;                    ((#\=) '=)
;                    (else (fatal-error "lexer error")))))
;          (string->list str)))
; )
;
; An alternative definition (that does not check for lexical errors) is
;
; (make-parser
;   '(
;      (s (#\x #\= e))
;      (e (e #\+ e) (v))
;      (v (v #\y) ())
;    )
;   (lambda (str) (map (lambda (char) (list char char)) (string->list str)))
; )
;
; To help with the rest of the discussion, here are a few definitions:
;
; An input pointer (for an input of `n' tokens) is a value between 0 and `n'.
; It indicates a point between two input tokens (0 = beginning, `n' = end).
; For example, if `n' = 4, there are 5 input pointers:
;
;   input                   token1     token2     token3     token4
;   input pointers       0          1          2          3          4
;
; A configuration indicates the extent to which a given rule is parsed (this
; is the common `dot notation').  For simplicity, a configuration is
; represented as an integer, with successive configurations in the same
; rule associated with successive integers.  It is assumed that the grammar
; has been extended with rules to aid scanning.  These rules are of the
; form `nt ->', and there is one such rule for every non-terminal.  Note
; that these rules are special because they only apply when the corresponding
; non-terminal is returned by the lexer.
;
; A configuration set is a configuration grouped with the set of input pointers
; representing where the head non-terminal of the configuration was predicted.
;
; Here are the rules and configurations for the grammar given above:
;
;  S -> .         \
;       0          |
;  x -> .          |
;       1          |
;  = -> .          |
;       2          |
;  E -> .          |
;       3           > special rules (for scanning)
;  + -> .          |
;       4          |
;  V -> .          |
;       5          |
;  y -> .          |
;       6         /
;  S -> .  x  .  =  .  E  .
;       7     8     9     10
;  E -> .  E  .  +  .  E  .
;       11    12    13    14
;  E -> .  V  .
;       15    16
;  V -> .  V  .  y  .
;       17    18    19
;  V -> .
;       20
;
; Starters of the non-terminal `nt' are configurations that are leftmost
; in a non-special rule for `nt'.  Enders of the non-terminal `nt' are
; configurations that are rightmost in any rule for `nt'.  Predictors of the
; non-terminal `nt' are configurations that are directly to the left of `nt'
; in any rule.
;
; For the grammar given above,
;
;   Starters of V   = (17 20)
;   Enders of V     = (5 19 20)
;   Predictors of V = (15 17)

(define (make-parser grammar lexer)

  (define (non-terminals grammar) ; return vector of non-terminals in grammar

    (define (add-nt nt nts)
      (if (member nt nts) nts (cons nt nts))) ; use equal? for equality tests

    (let def-loop ((defs grammar) (nts '()))
      (if (pair? defs)
        (let* ((def (car defs))
               (head (car def)))
          (let rule-loop ((rules (cdr def))
                          (nts (add-nt head nts)))
            (if (pair? rules)
              (let ((rule (car rules)))
                (let loop ((l rule) (nts nts))
                  (if (pair? l)
                    (let ((nt (car l)))
                      (loop (cdr l) (add-nt nt nts)))
                    (rule-loop (cdr rules) nts))))
              (def-loop (cdr defs) nts))))
        (list->vector (reverse nts))))) ; goal non-terminal must be at index 0

  (define (ind nt nts) ; return index of non-terminal `nt' in `nts'
    (let loop ((i (- (vector-length nts) 1)))
      (if (>= i 0)
        (if (equal? (vector-ref nts i) nt) i (loop (- i 1)))
        #f)))

  (define (nb-configurations grammar) ; return nb of configurations in grammar
    (let def-loop ((defs grammar) (nb-confs 0))
      (if (pair? defs)
        (let ((def (car defs)))
          (let rule-loop ((rules (cdr def)) (nb-confs nb-confs))
            (if (pair? rules)
              (let ((rule (car rules)))
                (let loop ((l rule) (nb-confs nb-confs))
                  (if (pair? l)
                    (loop (cdr l) (+ nb-confs 1))
                    (rule-loop (cdr rules) (+ nb-confs 1)))))
              (def-loop (cdr defs) nb-confs))))
      nb-confs)))

; First, associate a numeric identifier to every non-terminal in the
; grammar (with the goal non-terminal associated with 0).
;
; So, for the grammar given above we get:
;
; s -> 0   x -> 1   = -> 4   e ->3    + -> 4   v -> 5   y -> 6

  (let* ((nts (non-terminals grammar))          ; id map = list of non-terms
         (nb-nts (vector-length nts))           ; the number of non-terms
         (nb-confs (+ (nb-configurations grammar) nb-nts)) ; the nb of confs
         (starters (make-vector nb-nts '()))    ; starters for every non-term
         (enders (make-vector nb-nts '()))      ; enders for every non-term
         (predictors (make-vector nb-nts '()))  ; predictors for every non-term
         (steps (make-vector nb-confs #f))      ; what to do in a given conf
         (names (make-vector nb-confs #f)))     ; name of rules

    (define (setup-tables grammar nts starters enders predictors steps names)

      (define (add-conf conf nt nts class)
        (let ((i (ind nt nts)))
          (vector-set! class i (cons conf (vector-ref class i)))))

      (let ((nb-nts (vector-length nts)))

        (let nt-loop ((i (- nb-nts 1)))
          (if (>= i 0)
            (begin
              (vector-set! steps i (- i nb-nts))
              (vector-set! names i (list (vector-ref nts i) 0))
              (vector-set! enders i (list i))
              (nt-loop (- i 1)))))

        (let def-loop ((defs grammar) (conf (vector-length nts)))
          (if (pair? defs)
            (let* ((def (car defs))
                   (head (car def)))
              (let rule-loop ((rules (cdr def)) (conf conf) (rule-num 1))
                (if (pair? rules)
                  (let ((rule (car rules)))
                    (vector-set! names conf (list head rule-num))
                    (add-conf conf head nts starters)
                    (let loop ((l rule) (conf conf))
                      (if (pair? l)
                        (let ((nt (car l)))
                          (vector-set! steps conf (ind nt nts))
                          (add-conf conf nt nts predictors)
                          (loop (cdr l) (+ conf 1)))
                        (begin
                          (vector-set! steps conf (- (ind head nts) nb-nts))
                          (add-conf conf head nts enders)
                          (rule-loop (cdr rules) (+ conf 1) (+ rule-num 1))))))
                  (def-loop (cdr defs) conf))))))))

; Now, for each non-terminal, compute the starters, enders and predictors and
; the names and steps tables.

    (setup-tables grammar nts starters enders predictors steps names)

; Build the parser description

    (let ((parser-descr (vector lexer
                                nts
                                starters
                                enders
                                predictors
                                steps
                                names)))
      (lambda (input)

        (define (ind nt nts) ; return index of non-terminal `nt' in `nts'
          (let loop ((i (- (vector-length nts) 1)))
            (if (>= i 0)
              (if (equal? (vector-ref nts i) nt) i (loop (- i 1)))
              #f)))

        (define (comp-tok tok nts) ; transform token to parsing format
          (let loop ((l1 (cdr tok)) (l2 '()))
            (if (pair? l1)
              (let ((i (ind (car l1) nts)))
                (if i
                  (loop (cdr l1) (cons i l2))
                  (loop (cdr l1) l2)))
              (cons (car tok) (reverse l2)))))

        (define (input->tokens input lexer nts)
          (list->vector (map (lambda (tok) (comp-tok tok nts)) (lexer input))))

        (define (make-states nb-toks nb-confs)
          (let ((states (make-vector (+ nb-toks 1) #f)))
            (let loop ((i nb-toks))
              (if (>= i 0)
                (let ((v (make-vector (+ nb-confs 1) #f)))
                  (vector-set! v 0 -1)
                  (vector-set! states i v)
                  (loop (- i 1)))
                states))))

        (define (conf-set-get state conf)
          (vector-ref state (+ conf 1)))

        (define (conf-set-get* state state-num conf)
          (let ((conf-set (conf-set-get state conf)))
            (if conf-set
              conf-set
              (let ((conf-set (make-vector (+ state-num 6) #f)))
                (vector-set! conf-set 1 -3) ; old elems tail (points to head)
                (vector-set! conf-set 2 -1) ; old elems head
                (vector-set! conf-set 3 -1) ; new elems tail (points to head)
                (vector-set! conf-set 4 -1) ; new elems head
                (vector-set! state (+ conf 1) conf-set)
                conf-set))))

        (define (conf-set-merge-new! conf-set)
          (vector-set! conf-set
            (+ (vector-ref conf-set 1) 5)
            (vector-ref conf-set 4))
          (vector-set! conf-set 1 (vector-ref conf-set 3))
          (vector-set! conf-set 3 -1)
          (vector-set! conf-set 4 -1))

        (define (conf-set-head conf-set)
          (vector-ref conf-set 2))

        (define (conf-set-next conf-set i)
          (vector-ref conf-set (+ i 5)))

        (define (conf-set-member? state conf i)
          (let ((conf-set (vector-ref state (+ conf 1))))
            (if conf-set
              (conf-set-next conf-set i)
              #f)))

        (define (conf-set-adjoin state conf-set conf i)
          (let ((tail (vector-ref conf-set 3))) ; put new element at tail
            (vector-set! conf-set (+ i 5) -1)
            (vector-set! conf-set (+ tail 5) i)
            (vector-set! conf-set 3 i)
            (if (< tail 0)
              (begin
                (vector-set! conf-set 0 (vector-ref state 0))
                (vector-set! state 0 conf)))))

        (define (conf-set-adjoin* states state-num l i)
          (let ((state (vector-ref states state-num)))
            (let loop ((l1 l))
              (if (pair? l1)
                (let* ((conf (car l1))
                       (conf-set (conf-set-get* state state-num conf)))
                  (if (not (conf-set-next conf-set i))
                    (begin
                      (conf-set-adjoin state conf-set conf i)
                      (loop (cdr l1)))
                    (loop (cdr l1))))))))

        (define (conf-set-adjoin** states states* state-num conf i)
          (let ((state (vector-ref states state-num)))
            (if (conf-set-member? state conf i)
              (let* ((state* (vector-ref states* state-num))
                     (conf-set* (conf-set-get* state* state-num conf)))
                (if (not (conf-set-next conf-set* i))
                  (conf-set-adjoin state* conf-set* conf i))
                #t)
              #f)))

        (define (conf-set-union state conf-set conf other-set)
          (let loop ((i (conf-set-head other-set)))
            (if (>= i 0)
              (if (not (conf-set-next conf-set i))
                (begin
                  (conf-set-adjoin state conf-set conf i)
                  (loop (conf-set-next other-set i)))
                (loop (conf-set-next other-set i))))))

        (define (forw states state-num starters enders predictors steps nts)

          (define (predict state state-num conf-set conf nt starters enders)

            ; add configurations which start the non-terminal `nt' to the
            ; right of the dot

            (let loop1 ((l (vector-ref starters nt)))
              (if (pair? l)
                (let* ((starter (car l))
                       (starter-set (conf-set-get* state state-num starter)))
                  (if (not (conf-set-next starter-set state-num))
                    (begin
                      (conf-set-adjoin state starter-set starter state-num)
                      (loop1 (cdr l)))
                    (loop1 (cdr l))))))

            ; check for possible completion of the non-terminal `nt' to the
            ; right of the dot

            (let loop2 ((l (vector-ref enders nt)))
              (if (pair? l)
                (let ((ender (car l)))
                  (if (conf-set-member? state ender state-num)
                    (let* ((next (+ conf 1))
                           (next-set (conf-set-get* state state-num next)))
                      (conf-set-union state next-set next conf-set)
                      (loop2 (cdr l)))
                    (loop2 (cdr l)))))))

          (define (reduce states state state-num conf-set head preds)

            ; a non-terminal is now completed so check for reductions that
            ; are now possible at the configurations `preds'

            (let loop1 ((l preds))
              (if (pair? l)
                (let ((pred (car l)))
                  (let loop2 ((i head))
                    (if (>= i 0)
                      (let ((pred-set (conf-set-get (vector-ref states i) pred)))
                        (if pred-set
                          (let* ((next (+ pred 1))
                                 (next-set (conf-set-get* state state-num next)))
                            (conf-set-union state next-set next pred-set)))
                        (loop2 (conf-set-next conf-set i)))
                      (loop1 (cdr l))))))))

          (let ((state (vector-ref states state-num))
                (nb-nts (vector-length nts)))
            (let loop ()
              (let ((conf (vector-ref state 0)))
                (if (>= conf 0)
                  (let* ((step (vector-ref steps conf))
                         (conf-set (vector-ref state (+ conf 1)))
                         (head (vector-ref conf-set 4)))
                    (vector-set! state 0 (vector-ref conf-set 0))
                    (conf-set-merge-new! conf-set)
                    (if (>= step 0)
                      (predict state state-num conf-set conf step starters enders)
                      (let ((preds (vector-ref predictors (+ step nb-nts))))
                        (reduce states state state-num conf-set head preds)))
                    (loop)))))))

        (define (forward starters enders predictors steps nts toks)
          (let* ((nb-toks (vector-length toks))
                 (nb-confs (vector-length steps))
                 (states (make-states nb-toks nb-confs))
                 (goal-starters (vector-ref starters 0)))
            (conf-set-adjoin* states 0 goal-starters 0) ; predict goal
            (forw states 0 starters enders predictors steps nts)
            (let loop ((i 0))
              (if (< i nb-toks)
                (let ((tok-nts (cdr (vector-ref toks i))))
                  (conf-set-adjoin* states (+ i 1) tok-nts i) ; scan token
                  (forw states (+ i 1) starters enders predictors steps nts)
                  (loop (+ i 1)))))
            states))

        (define (produce conf i j enders steps toks states states* nb-nts)
          (let ((prev (- conf 1)))
            (if (and (>= conf nb-nts) (>= (vector-ref steps prev) 0))
              (let loop1 ((l (vector-ref enders (vector-ref steps prev))))
                (if (pair? l)
                  (let* ((ender (car l))
                         (ender-set (conf-set-get (vector-ref states j)
                                                  ender)))
                    (if ender-set
                      (let loop2 ((k (conf-set-head ender-set)))
                        (if (>= k 0)
                          (begin
                            (and (>= k i)
                                 (conf-set-adjoin** states states* k prev i)
                                 (conf-set-adjoin** states states* j ender k))
                            (loop2 (conf-set-next ender-set k)))
                          (loop1 (cdr l))))
                      (loop1 (cdr l)))))))))

        (define (back states states* state-num enders steps nb-nts toks)
          (let ((state* (vector-ref states* state-num)))
            (let loop1 ()
              (let ((conf (vector-ref state* 0)))
                (if (>= conf 0)
                  (let* ((conf-set (vector-ref state* (+ conf 1)))
                         (head (vector-ref conf-set 4)))
                    (vector-set! state* 0 (vector-ref conf-set 0))
                    (conf-set-merge-new! conf-set)
                    (let loop2 ((i head))
                      (if (>= i 0)
                        (begin
                          (produce conf i state-num enders steps
                                   toks states states* nb-nts)
                          (loop2 (conf-set-next conf-set i)))
                        (loop1)))))))))

        (define (backward states enders steps nts toks)
          (let* ((nb-toks (vector-length toks))
                 (nb-confs (vector-length steps))
                 (nb-nts (vector-length nts))
                 (states* (make-states nb-toks nb-confs))
                 (goal-enders (vector-ref enders 0)))
            (let loop1 ((l goal-enders))
              (if (pair? l)
                (let ((conf (car l)))
                  (conf-set-adjoin** states states* nb-toks conf 0)
                  (loop1 (cdr l)))))
            (let loop2 ((i nb-toks))
              (if (>= i 0)
                (begin
                  (back states states* i enders steps nb-nts toks)
                  (loop2 (- i 1)))))
            states*))

        (define (parsed? nt i j nts enders states)
          (let ((nt* (ind nt nts)))
            (if nt*
              (let ((nb-nts (vector-length nts)))
                (let loop ((l (vector-ref enders nt*)))
                  (if (pair? l)
                    (let ((conf (car l)))
                      (if (conf-set-member? (vector-ref states j) conf i)
                        #t
                        (loop (cdr l))))
                    #f)))
              #f)))

        (define (deriv-trees conf i j enders steps names toks states nb-nts)
          (let ((name (vector-ref names conf)))

            (if name ; `conf' is at the start of a rule (either special or not)
              (if (< conf nb-nts)
                (list (list name (car (vector-ref toks i))))
                (list (list name)))

              (let ((prev (- conf 1)))
                (let loop1 ((l1 (vector-ref enders (vector-ref steps prev)))
                            (l2 '()))
                  (if (pair? l1)
                    (let* ((ender (car l1))
                           (ender-set (conf-set-get (vector-ref states j)
                                                    ender)))
                      (if ender-set
                        (let loop2 ((k (conf-set-head ender-set)) (l2 l2))
                          (if (>= k 0)
                            (if (and (>= k i)
                                     (conf-set-member? (vector-ref states k)
                                                       prev i))
                              (let ((prev-trees
                                      (deriv-trees prev i k enders steps names
                                                   toks states nb-nts))
                                    (ender-trees
                                      (deriv-trees ender k j enders steps names
                                                   toks states nb-nts)))
                                (let loop3 ((l3 ender-trees) (l2 l2))
                                  (if (pair? l3)
                                    (let ((ender-tree (list (car l3))))
                                      (let loop4 ((l4 prev-trees) (l2 l2))
                                        (if (pair? l4)
                                          (loop4 (cdr l4)
                                                 (cons (append (car l4)
                                                               ender-tree)
                                                       l2))
                                          (loop3 (cdr l3) l2))))
                                    (loop2 (conf-set-next ender-set k) l2))))
                              (loop2 (conf-set-next ender-set k) l2))
                            (loop1 (cdr l1) l2)))
                        (loop1 (cdr l1) l2)))
                    l2))))))

        (define (deriv-trees* nt i j nts enders steps names toks states)
          (let ((nt* (ind nt nts)))
            (if nt*
              (let ((nb-nts (vector-length nts)))
                (let loop ((l (vector-ref enders nt*)) (trees '()))
                  (if (pair? l)
                    (let ((conf (car l)))
                      (if (conf-set-member? (vector-ref states j) conf i)
                        (loop (cdr l)
                              (append (deriv-trees conf i j enders steps names
                                                   toks states nb-nts)
                                      trees))
                        (loop (cdr l) trees)))
                    trees)))
              #f)))

        (define (nb-deriv-trees conf i j enders steps toks states nb-nts)
          (let ((prev (- conf 1)))
            (if (or (< conf nb-nts) (< (vector-ref steps prev) 0))
              1
              (let loop1 ((l (vector-ref enders (vector-ref steps prev)))
                          (n 0))
                (if (pair? l)
                  (let* ((ender (car l))
                         (ender-set (conf-set-get (vector-ref states j)
                                                  ender)))
                    (if ender-set
                      (let loop2 ((k (conf-set-head ender-set)) (n n))
                        (if (>= k 0)
                          (if (and (>= k i)
                                   (conf-set-member? (vector-ref states k)
                                                     prev i))
                            (let ((nb-prev-trees
                                    (nb-deriv-trees prev i k enders steps
                                                    toks states nb-nts))
                                  (nb-ender-trees
                                    (nb-deriv-trees ender k j enders steps
                                                    toks states nb-nts)))
                              (loop2 (conf-set-next ender-set k)
                                     (+ n (* nb-prev-trees nb-ender-trees))))
                            (loop2 (conf-set-next ender-set k) n))
                          (loop1 (cdr l) n)))
                      (loop1 (cdr l) n)))
                  n)))))

        (define (nb-deriv-trees* nt i j nts enders steps toks states)
          (let ((nt* (ind nt nts)))
            (if nt*
              (let ((nb-nts (vector-length nts)))
                (let loop ((l (vector-ref enders nt*)) (nb-trees 0))
                  (if (pair? l)
                    (let ((conf (car l)))
                      (if (conf-set-member? (vector-ref states j) conf i)
                        (loop (cdr l)
                              (+ (nb-deriv-trees conf i j enders steps
                                                 toks states nb-nts)
                                 nb-trees))
                        (loop (cdr l) nb-trees)))
                    nb-trees)))
              #f)))

        (let* ((lexer      (vector-ref parser-descr 0))
               (nts        (vector-ref parser-descr 1))
               (starters   (vector-ref parser-descr 2))
               (enders     (vector-ref parser-descr 3))
               (predictors (vector-ref parser-descr 4))
               (steps      (vector-ref parser-descr 5))
               (names      (vector-ref parser-descr 6))
               (toks       (input->tokens input lexer nts)))

          (vector nts
                  starters
                  enders
                  predictors
                  steps
                  names
                  toks
                  (backward (forward starters enders predictors steps nts toks)
                            enders steps nts toks)
                  parsed?
                  deriv-trees*
                  nb-deriv-trees*))))))

(define (parse->parsed? parse nt i j)
  (let* ((nts     (vector-ref parse 0))
         (enders  (vector-ref parse 2))
         (states  (vector-ref parse 7))
         (parsed? (vector-ref parse 8)))
    (parsed? nt i j nts enders states)))

(define (parse->trees parse nt i j)
  (let* ((nts          (vector-ref parse 0))
         (enders       (vector-ref parse 2))
         (steps        (vector-ref parse 4))
         (names        (vector-ref parse 5))
         (toks         (vector-ref parse 6))
         (states       (vector-ref parse 7))
         (deriv-trees* (vector-ref parse 9)))
    (deriv-trees* nt i j nts enders steps names toks states)))

(define (parse->nb-trees parse nt i j)
  (let* ((nts             (vector-ref parse 0))
         (enders          (vector-ref parse 2))
         (steps           (vector-ref parse 4))
         (toks            (vector-ref parse 6))
         (states          (vector-ref parse 7))
         (nb-deriv-trees* (vector-ref parse 10)))
    (nb-deriv-trees* nt i j nts enders steps toks states)))

(define (test)
  (let ((p (make-parser '( (s (a) (s s)) )
                        (lambda (l) (map (lambda (x) (list x x)) l)))))
    (let ((x (p '(a a a a a a a a a))))
      (length (parse->trees x 's 0 9)))))

(display (test))
(newline)