;;; GNU info -> HTML gateway for the SU web server. -*- Scheme -*-

;;; This file is part of the Scheme Untergrund Networking package.

;;; Copyright (c) 1996 by Mike Sperber.
;;; based on code with the same purpose by Gaebe Engelhart
;;; For copyright information, see the file COPYING which comes with
;;; the distribution.


;;; (info-handler parse-info reference find-icon address) -> handler
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This function creates a request handler that converts GNU info pages
;;; on-the-fly.  It is highly parameterizable to accomodate a wide
;;; range of environments.  The parameters specify how to find the
;;; source code for the info pages, and how to generate certain
;;; elements of the generated HTML output.
;;;
;;; PARSE-INFO specifies how to parse the URLs that end up in the
;;;   handler. 
;;;   It can be:
;;;
;;;      * a procedure which is called with the URL as its parameters.
;;;      It is expected to return with two values, FIND-ENTRY and
;;;      NODE-NAME.  FIND-ENTRY, in turn, can be either a procedure
;;;      which gets passed the file name of an info node and is
;;;      supposed to return with an absolute name of same.  If it is a
;;;      list, that list is taken as a list of directories in which to
;;;      search for the info files.  NODE-NAME is supposed to be the
;;;      name of an info node of the form (<file>)<node>, extracted
;;;      from the URL.
;;;
;;;      * a list, in which case that is taken as a list of
;;;      directories in which to search for the info files.  The node
;;;      name of a node is extracted from the URL by just taking the
;;;      search component of the URL.
;;;
;;;      * #f in which case the info path is taken from the environment
;;;      variable INFOPATH, and the node name extraction works as
;;;      above.
;;;
;;; REFERENCE specifies how to generate cross-references to other info
;;; nodes.  It can be:
;;;
;;;      * a procedure which gets called with the URL of the info page
;;;      which contains the reference, and the node name of the node
;;;      to be referenced.  The procedure is expected to return the
;;;      text for a link.
;;;
;;;      * a string, in which case that is to be a prefix to which the
;;;      node name is simply appended to yield the new link.
;;;
;;;      * #f in which case all references have the form
;;;      info?<node-name>.
;;;
;;; FIND-ICON specifies to to find the various icons used to decorate
;;; info pages.  It can be:
;;;
;;;      * a procedure which gets passed one of the tags in
;;;      DEFAULT-ICON-ALIST and is supposed to return a link for the
;;;      appropriate icon (or #f if no icon is to be used)
;;;
;;;      * a string which is taken as a prefix to which one of the
;;;      appropriate icon name from DEFAULT-ICON-ALIST is appended.
;;;      (Note that these icon names were stolen from the
;;;      cern-httpd-3.0 distribution at
;;;      http://www.w3.org/pub/WWW/Daemon/.)
;;;
;;;      * a list which is taken as an alist of the same format as
;;;      DEFAULT-ICON-ALIST.
;;;
;;;      * #f in which case no icons are used.
;;;
;;; ADDRESS a string to be appended at the bottom of all info pages
;;;
;;; To install a vanilla info handler for a prefix "info?" that looks
;;; in the environment variable INFOPATH, just use something like
;;; (info-handler #f #f #f "Generated by info-gateway")

;;; TODO: write a CGI version of this

(define-condition-type 'info-gateway-error '(error))

(define info-gateway-error? (condition-predicate 'info-gateway-error))

(define (info-gateway-error msg . irritants)
  (apply signal 'info-gateway-error msg irritants))

(define default-icon-alist
  '((info . "infodoc.gif")
    (up . "up.gif")
    (next . "next.gif")
    (previous . "prev.gif")
    (menu . "menu.gif")))

(define (info-handler parse-info reference find-icon address)
  (let ((icon-name
	 (cond
	  ((procedure? find-icon) find-icon)
	  ((string? find-icon)
	   (let ((alist
		  (map (lambda (entry)
			 (cons (car entry)
			       (string-append find-icon (cdr entry))))
		       default-icon-alist)))
	     (lambda (tag)
	       (cond ((assq tag alist) => cdr)
		     (else #f)))))
	  ((list? find-icon)
	   (lambda (tag)
	     (cond ((assq tag find-icon) => cdr)
		   (else #f))))
	  (else (lambda (tag) #f))))
	(parse-info-url
	 (cond
	  ((procedure? parse-info) parse-info)
	  ((list? parse-info)		; it's an info path
	   (lambda (url)
	     (values parse-info
		     (unescape-uri (http-url-search url)))))
	  (else
	   (let ((info-path 
		  ((infix-splitter ":") 
		   (or (getenv "INFOPATH")
		       (begin 
			 (format (current-error-port)
				 "~%Warning: environment variable INFOPATH is unset.~%")
			 "")))))
	     (lambda (url)
	       (values info-path
		       (unescape-uri (http-url-search url))))))))
	(make-reference
	 (cond
	  ((procedure? reference) reference)
	  ((string? reference)
	   (lambda (url node-name)
	     (string-append reference node-name)))
	  (else
	   (lambda (url node-name)
	     (string-append "info?" node-name))))))

    (lambda (path req)
      (let ((request-method (request-method req)))
	(cond 
	 ((string=? request-method "GET")
	  (with-fatal-error-handler
	   (lambda (c decline)
	     (cond
	      ((info-gateway-error? c)
	       (apply http-error (status-code bad-gateway) req
		      (condition-stuff c)))
	      ((http-error? c)
	       (apply http-error (car (condition-stuff c)) req
		      (cddr (condition-stuff c))))
	      (else
	       (decline))))

	   (make-response
	    (status-code ok)
	    #f
	    (time)
	    "text/html"
	    '()
	    (make-writer-body
	     (lambda (out options)
	   
	       (receive (find-entry node-name) (parse-info-url (request-url req))
		 (display-node node-name
			       (file-finder find-entry)
			       (referencer make-reference (request-url req) out)
			       icon-name
			       out))
	       (with-tag out address ()
		 (write-string address out)))))))

	 (else 
	  (make-error-response (status-code method-not-allowed) req 
				    request-method)))))))

(define split-header-line
  (let ((split (infix-splitter (make-regexp "(, *)|(  +)|( *\t *)")))
	(split-field (infix-splitter (make-regexp ": *"))))
    (lambda (l)
      (let ((fields (map split-field (split l))))
	  
	(define (search-field regexp)
	  (cond
	   ((find (lambda (field)
		   (string-match regexp (car field)))
		 fields)
	    => cadr)
	   (else #f)))

	(values (search-field "[F|f]ile")
		(search-field "[N|n]ode")
		(search-field "[U|u]p")
		(search-field "[P|p]rev(ious)?")
		(search-field "[N|n]ext"))))))

(define (replace-if-empty-string s v)
  (if (zero? (string-length s))
      v
      s))

(define (string-newline->space s)
  (string-map (lambda (c)
		(if (char=? c #\newline)
		    #\space
		    c))
	      s))

(define (parse-node-name node-name)
  (cond
   ((string-match "^\\((.*)\\)(.*)$" (string-newline->space node-name))
    => (lambda (match)
	 (values
	  (replace-if-empty-string (match:substring match 1) #f)
	  (replace-if-empty-string (match:substring match 2) "Top"))))
   (else (values #f (string-newline->space node-name)))))
    

(define (unparse-node-name file node)
  (let* ((ext (file-name-extension file))
	 (file (if (string=? ext ".info")
		   (file-name-sans-extension file)
		   file)))
    (receive (file node) (if (and (string=? "dir" file)
				  (not (string=? "" node))
				  (not (string=? "Top" node)))
			     (values node "Top")
			     (values file node))
      (string-append "(" file ")" node))))

(define (display-icon file alt out)
  (emit-tag out 'img
    (cons 'src file)
    (cons 'alt alt)
    (cons 'align "bottom")))
	 
(define (referencer make-reference old-entry out)
  (lambda (file node-name label . maybe-icon)
    (receive (node-file node) (parse-node-name node-name)
      (let ((file (or node-file file)))
	(with-tag out a ((href (make-reference
				old-entry
				(escape-uri (unparse-node-name file node)))))
	  (if (and (not (null? maybe-icon))
		   (car maybe-icon))
	      (display-icon (car maybe-icon) (cadr maybe-icon) out))
	  (emit-text label out))))))

(define node-prologue (ascii->char 31))
(define node-epilogue-regexp
  (make-regexp
   (string-append (regexp-quote (string node-prologue))
		  "|"
		  (regexp-quote (string (ascii->char 12))))))

(define (string-starts-with-char? s c)
  (and (not (zero? (string-length s)))
       (char=? c (string-ref s 0))))

(define (node-prologue? s)
  (string-starts-with-char? s node-prologue))
(define (node-epilogue? s)
  (regexp-exec node-epilogue-regexp s))

;; Document title

(define (display-title file node up previous next
		       display-reference icon-name out)

  (define (maybe-display-header header icon alt)
    (if header
	(begin
	  (newline out)
	  (with-tag out b ()
	    (display-reference file header header icon alt)))))

  (emit-title out (string-append "Info Node: "
				(unparse-node-name file node)))
  (with-tag out h1 ()
	    (emit-tag out 'img
		      (cons 'src (icon-name 'info))
		      (cons 'alt "Info Node")
		      (cons 'align 'bottom))
	    (write-string (unparse-node-name file node) out))
  (emit-tag out 'hr)
  (maybe-display-header next (icon-name 'next) "[Next]")
  (maybe-display-header previous (icon-name 'previous) "[Previous]")
  (maybe-display-header up (icon-name 'up) "[Up]")
	 
  (if (or next previous up)
      (emit-tag out 'hr)))

;; Text


;; Dealing with cross references
;; info sucks

(define xref-marker-regexp (make-regexp "\\*[Nn]ote([ \n]|$)"))
(define xref-regexp (make-regexp "\\*[Nn]ote *([^:]*): *([^\t\n,.;:?!]*)"))

(define max-xref-lines 3)

(define complete-line
  (let ((split-xref-markers (field-splitter xref-marker-regexp))
	(split-xrefs (field-splitter xref-regexp))
	(cr (string #\newline)))
    (lambda (line port)
      (let loop ((line line) (count max-xref-lines))
	(let ((xref-markers (split-xref-markers line))
	      (xrefs (split-xrefs line)))
	  (if (= (length xref-markers) (length xrefs))
	      line
	      (if (zero? count)
		  (info-gateway-error "invalid cross reference")
		  (let ((new-line (read-line port)))
		    (if (eof-object? new-line)
			(info-gateway-error
			 "unexpected end of info file inside cross reference"))
		    (loop (string-append line cr new-line) (- count 1))))))))))
		    

(define (display-xref xref file display-reference out)
  (let* ((match (regexp-exec xref-regexp xref))
	 (note (match:substring match 1))
	 (node-name (match:substring match 2))
	 (node-name (if (string=? "" node-name) note node-name))
	 (node-name (substring node-name
			       (string-skip node-name char-set:whitespace)
			       (string-length node-name))))
    (emit-text "See " out)
    (display-reference file node-name note)))

(define display-text
  (let ((split-xrefs (infix-splitter xref-regexp #f 'split)))
    (lambda (line port file display-reference out)
      (let* ((line (complete-line line port))
	     (components (split-xrefs line)))
	;; in components, every 2nd element is a cross reference
	;; also, it always has odd length or length zero
	(if (not (null? components))
	    (let loop ((components components))
	      (emit-text (car components) out)
	      (if (not (null? (cdr components)))
		  (begin
		    (display-xref (cadr components) file display-reference out)
		    (loop (cddr components))))))
	(newline out)))))

;; Menus

(define menu-regexp (make-regexp "^\\* +Menu:"))
(define menu-item-regexp (make-regexp "^\\* +"))

(define (char-splitter c)
  (lambda (s)
    (cond ((string-index s c)
	   => (lambda (i)
		(values (substring s 0 i)
			(substring s (+ 1 i) (string-length s)))))
	  (else (values s "")))))

(define colon-split (char-splitter #\:))

(define (display-menu-item-header line port file display-reference icon-name out)
  (let ((menu-line-split (infix-splitter menu-item-regexp)))
    (receive (note rest) (colon-split (cadr (menu-line-split line)))
      (receive (node-name text)
	(cond
	 ((string-match ": *(.*)" rest)
	  => (lambda (match)
		 (values note (match:substring match 1))))
	   ((string-match "^ *([^.]*)\\.? *(.*)" rest)
	    => (lambda (match)
		 (values (match:substring match 1)
			 (match:substring match 2))))
	   (else
	    (info-gateway-error  "invalid menu item")))
	(emit-tag out 'dt)
	(display-reference file node-name note (icon-name 'menu) "*")
	(newline out)
	(if (and (not (string=? "" text))
		 (not (string=? "." text)))
	    (begin
	      (emit-tag out 'dd)
	      (display-text text port file display-reference out)))))))

(define (display-menu line port file display-reference icon-name out)
  (emit-close-tag out 'pre)
    
  (with-tag out dl ()
    (let loop ((line line))
      (if (eof-object? line)
	  (info-gateway-error "unexpected end of info file"))

      (display-menu-item-header line port file display-reference icon-name out)

      (let finish-item-loop ()
	(if (eof-object? line)
	    (info-gateway-error "unexpected end of info file"))
	
	(let ((line (read-line port)))
	  (cond
	   ((or (eof-object? line)
		(node-epilogue? line)
		(string=? "" line))
	    (emit-tag out 'pre)
	    (dispatch-line line port file display-reference icon-name out))
	   ((regexp-exec menu-item-regexp line)
	    (loop line))
	   (else
	    (display-text line port file display-reference out)
	    (finish-item-loop))))))))

;; Central dispatch

(define (dispatch-line line port file display-reference icon-name out)
  (cond
   ((or (eof-object? line) (node-epilogue? line)) #f)
   ((string=? "" line) (emit-p out) #t)
   ((regexp-exec menu-regexp line) #t) ;; this should probably be expanded
   ((regexp-exec menu-item-regexp line)
    (display-menu line port file display-reference icon-name out))
   (else
    (display-text line port file display-reference out) #t)))

(define (display-body port file display-reference icon-name out)
  (let loop ()
    (let ((line (read-line port)))
      (if (dispatch-line line port file display-reference icon-name out)
	  (loop)))))
	    
(define (display-node node-name find-file display-reference icon-name out)
  (receive (file node) (parse-node-name node-name)
    (receive (port file-header node-header up-header prev-header next-header)
	(find-node file node find-file)
    
      (with-tag out html ()
	(with-tag out head ()
	  (display-title file node-header up-header
			 prev-header next-header
			 display-reference icon-name
			 out))
	(with-tag out body ()
	  (with-tag out pre ()
	    (display-body port file display-reference icon-name out))))
      
      (close-input-port port))))

;; Finding nodes

(define (ensure-node-prologue port msg)
  (let ((line (read-line port)))
    (if (or (eof-object? line)
	    (not (node-prologue line)))
	(info-gateway-error "invalid info file" msg))))

(define (ensure-regexp-line port regexp msg)
  (let ((line (read-line port)))
    (if (or (eof-object? line)
	    (not (string-match regexp line)))
	(info-gateway-error "invalid info file" msg))))

(define (ensure-tag-table-node port)
  (ensure-regexp-line port "^Tag Table:" "no tag table"))
(define (ensure-indirect-tag-table-header port)
  (ensure-regexp-line port "^\\(Indirect\\)" "no indirect tag"))

(define split-indirection (infix-splitter (make-regexp " *: *")))
(define (parse-indirection line)
  (let ((l (split-indirection line)))
    (if (null? (cdr l))
	(info-gateway-error "invalid indirection entry in info file")
	(let ((file (car l))
	      (seek-pos (string->number (cadr l))))
	  (if (not seek-pos)
	      (info-gateway-error "invalid indirection entry in info file"))
	  (cons file seek-pos)))))

(define (read-indirection-table port)
  (let loop ((table '()))
    (let ((line (read-line port)))
      (if (eof-object? line)
	  (info-gateway-error "invalid info file"))
      (if (node-epilogue? line)
	  (reverse table)
	  (loop (cons (parse-indirection line) table))))))

(define tag-seek-separator (ascii->char 127))

(define parse-tag
  (let ((split (infix-splitter (make-regexp ", *")))
	(split-field (infix-splitter ": "))
	(split-node-info
	 (infix-splitter (string tag-seek-separator))))

    (define (barf)
      (info-gateway-error "invalid tag entry in info file"))

    (lambda (line)
      (let* ((fields (map split-field (split line)))
	     (file (cond
		    ((assoc "File" fields)
		     => (lambda (p)
			  (if (null? (cdr p)) (barf))
			  (cadr p)))
		    (else #f))))
	(cond
	 ((assoc "Node" fields)
	  => (lambda (p)
	       (if (null? (cdr p)) (barf))
	       (let ((s (split-node-info (cadr p))))
		 (if (null? (cdr p)) (barf))
		 (let* ((node (car s))
			(seek (string->number (cadr s))))
		   (if (not seek) (barf))
		   (values node file seek)))))
	 (else (barf)))))))

(define (find-tag node port)
  (let loop ()
    (let ((line (read-line port)))
      (if (eof-object? line)
	  (info-gateway-error "invalid info file"))
      (if (regexp-exec node-epilogue-regexp line)
	  (http-error (status-code not-found) #f "node not found"))
      (receive (entry-node file seek) (parse-tag line)
        (if (string=? node entry-node)
	    (cons file seek)
	    (loop))))))

(define (find-indirection-entry seek-pos indirection-table)
  (let loop ((table indirection-table))
    (if (null? table)
	(http-error (status-code not-found) #f "node not found"))
    (let* ((entry (car table))
	   (pos (cdr entry)))
      (if (and (>= seek-pos pos)
	       (or (null? (cdr table))
		   (let* ((next-entry (cadr table))
			  (next-pos (cdr next-entry)))
		     (< seek-pos next-pos))))
	  entry
	  (loop (cdr table))))))

(define (file-finder with)
  (cond ((procedure? with) with)
	((list? with)
	 (lambda (file)
	   (find-info-file file with)))))

(define (find-node-port-with-tag-entry node tag-entry ? find-file)
  (let* ((port (if (input-port? ?) ? #f))
	 (indirection-table (if port #f ?))
	 (seek-pos (cdr tag-entry))
	 (indirection-entry
	  (and indirection-table
	       (find-indirection-entry seek-pos indirection-table)))
	 (seek-pos (if indirection-entry
		       (- seek-pos (cdr indirection-entry))
		       seek-pos))
	 ;; that's what the documentation says ...
	 (seek-pos (if (>= seek-pos 1000)
		       (- seek-pos 1000)
		       0))
	 (file (or (car tag-entry)
		    (and indirection-entry
			 (car indirection-entry))))
	 (port (if file
		   (begin
		     (if port (close-input-port port))
		     (open-input-file (find-file file)))
		   port)))
    (seek port seek-pos)
    port))

(define (find-node file node find-file)
  (if (not file)
      (http-error (status-code not-found) #f
		  "no file in info node specification"))

  (let* ((fname (find-file file))
	 (port (open-input-file fname)))
    (let loop ((port port))
      (let ((line (read-line port)))
	(if (eof-object? line)
	    (http-error (status-code not-found) #f "info node not found"))
	(if (node-prologue? line)
	    (let ((header (read-line port)))
	      (if (eof-object? header)
		  (info-gateway-error "invalid info file"))
	      (cond

	       ((string-match "^Indirect:" header)
		(let ((indirection-table
		       (read-indirection-table port)))
		  (ensure-tag-table-node port)
		  (ensure-indirect-tag-table-header port)
		  (let ((tag-entry (find-tag node port)))
		    (close-input-port port)
		    (loop (find-node-port-with-tag-entry
			   node tag-entry indirection-table find-file)))))

	       ((string-match "^Tag Table:" header)
		(let ((tag-entry (find-tag node port)))
		  (loop (find-node-port-with-tag-entry
			 node tag-entry port find-file))))

	       ((string-match "^File:" header)
		(receive
		    (file-header node-header up-header prev-header next-header)
		    (split-header-line header)
		  (if (string=? node-header node)
		      (values port
			      file-header node-header
			      up-header prev-header next-header)
		      (loop port))))
	       (else (loop port))))
	    (loop port))))))

;; Finding files

(define (info-file-alternative-names file)
  (receive (dir base ext) (parse-file-name file)
    (let* ((base
	    (cond ((string-match "(.*)-info$" base)
		   => (lambda (match)
			(match:substring match 1)))
		  (else base)))
	   (base-ci (string-map char-downcase base))
	   (alts-1 (if (string=? base base-ci)
		       (list base)
		       (list base base-ci)))
	   (alts (append alts-1
			 (map (lambda (base)
				(string-append base ".info"))
			      alts-1)))
	   (alts (append alts
			 (map (lambda (base)
				(string-append base "-info"))
			      alts-1)))
	   (alts (map (lambda (f) (string-append dir f)) alts))
	   (alts (cons file alts)))
      alts)))

(define (find-info-file file info-path)
  (let ((alts (info-file-alternative-names file)))
    (let path-loop ((path info-path))
      (if (null? path)
	  (http-error (status-code not-found) #f "info file not found"))
      (let alt-loop ((alts alts))
	(if (null? alts)
	    (path-loop (cdr path))
	    (let ((try (string-append (file-name-as-directory (car path))
				      (car alts))))
	      (if (file-exists? try)
		  try
		  (alt-loop (cdr alts)))))))))