sunet/scheme/httpd/cgi-server.scm

299 lines
11 KiB
Scheme

;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*-
;;; This file is part of the Scheme Untergrund Networking package.
;;; Copyright (c) 1995 by Olin Shivers.
;;; For copyright information, see the file COPYING which comes with
;;; the distribution.
;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec".
;;; PROBLEMS:
;;; - The handlers could be made -- closed over their parameters
;;; (e.g., root vars, etc.)
;;; This code provides a request handler for the HTTP server that implements
;;; a CGI interface to external programs for doing HTTP transactions.
;;; About HTML forms
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This info is in fact independent of CGI, but important to know about,
;;; as many CGI scripts are written for responding to forms-entry in
;;; HTML browsers.
;;;
;;; The form's field data are turned into a single string, of the form
;;; name=val&name=val
;;; where the <name> and <val> parts are URI encoded to hide their
;;; &, =, and + chars, among other things. After URI encoding, the
;;; space chars are converted to + chars, just for fun. It is important
;;; to encode the spaces this way, because the perfectly general %xx escape
;;; mechanism might be insufficiently confusing. This variant encoding is
;;; called "form-url encoding."
;;;
;;; If the form's method is POST,
;;; Browser sends the form's field data in the entity block, e.g.,
;;; "button=on&ans=yes". The request's Content-type: is application/
;;; x-www-form-urlencoded, and the request's Content-length: is the
;;; number of bytes in the form data.
;;;
;;; If the form's method is GET,
;;; Browser sends the form's field data in the URL's <search> part.
;;; (So the server will pass to the CGI script as $QUERY_STRING,
;;; and perhaps also on in argv[]).
;;;
;;; In either case, the data is "form-url encoded" (as described above).
;;; ISINDEX queries:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (Likewise for ISINDEX URL queries from browsers.)
;;; Browser url-form encodes the query (see above), which then becomes the
;;; ?<search> part of the URI. (Hence the CGI script will split the individual
;;; fields into argv[].)
;;; CGI interface:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - The URL's <search> part is assigned to env var $QUERY_STRING, undecoded.
;;; - If it contains no raw "=" chars, it is split at "+" chars. The
;;; substrings are URI decoded, and become the elts of argv[].
;;; - The CGI script is run with stdin hooked up to the socket. If it's going
;;; to read the entity, it should read $CONTENT_LENGTH bytes worth.
;;; - A bunch of env vars are set; see below.
;;; - If the script begins with "nph-" its output is the entire response.
;;; Otherwise, it replies to the server, we peel off a little header
;;; that is used to construct the real header for the response.
;;; See the "spec" for further details. (URL above).
;;;
;;; The "spec" also talks about PUT, but when I tried this on a dummy script,
;;; the NSCA httpd server generated buggy output. So I am only implementing
;;; the POST and GET ops; any other op generates a "405 Method not allowed"
;;; response.
;;; Parameters
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; path for scripts
(define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin")
;;; The request handler for CGI scripts. (car path) is the script to run.
;;; cgi-bin-path is used, if PATH-variable isn't defined
(define (cgi-handler bin-dir . maybe-cgi-bin-path)
(let-optionals
maybe-cgi-bin-path
((cgi-bin-path cgi-default-bin-path))
(let ((request-invariant-cgi-env ; environment variables that never change
`(("PATH" . ,(and (getenv "PATH") cgi-bin-path))
("SERVER_SOFTWARE" . ,sunet-version-identifier)
("SERVER_NAME" . ,(host-info:name (host-info (system-name))))
("GATEWAY_INTERFACE" . "CGI/1.1"))))
(lambda (path req)
(if (pair? path) ; Got to have at least one elt.
(compute-cgi path req bin-dir request-invariant-cgi-env)
(make-error-response (status-code bad-request) req "Empty CGI script"))))))
(define (compute-cgi path req bin-dir request-invariant-cgi-env)
(let* ((prog (car path))
(filename (or (dotdot-check bin-dir (list prog))
(http-error (status-code bad-request) req
"CGI scripts may not contain \"..\" elements.")))
(nph? (string-prefix? "nph-" prog)) ; PROG starts with "nph-" ?
; why did we had (string-suffix? "-nph" prog) here?
(search (http-url-search (request-url req))) ; Compute the
(argv (if (and search (not (string-index search #\=))) ; argv list.
(split-and-decode-search-spec search)
'()))
(env (cgi-env req bin-dir (cdr path) request-invariant-cgi-env))
(doit (lambda ()
(dup->inport (socket:inport (request-socket req)) 0)
(dup->outport (current-output-port) 1)
(dup 1 2)
(apply exec/env filename env argv))))
(http-syslog (syslog-level debug) "[cgi-server] search: ~s, argv: ~s~%" search argv)
(let ((request-method (request-method req)))
(cond
((or (string=? request-method "GET")
(string=? request-method "POST")) ; Could do others also.
(if nph?
(let ((stat (wait (fork doit))))
(if (not (zero? stat))
(make-error-response
(status-code bad-request) req
(format #f "Could not execute CGI script ~a."
filename))
stat)) ;; FIXME! must return http-response object!
(case (file-not-executable? filename)
((search-denied permission)
(make-error-response (status-code forbidden) req
"Permission denied."))
((no-directory nonexistent)
(make-error-response (status-code not-found) req
"File or directory doesn't exist."))
(else
(cgi-make-response (run/port* doit) path req)))))
(else
(make-error-response (status-code method-not-allowed) req request-method))))))
(define (split-and-decode-search-spec s)
(let recur ((i 0))
(cond
((string-index s #\+ i) => (lambda (j) (cons (unescape-uri s i j)
(recur (+ j 1)))))
(else (list (unescape-uri s i (string-length s)))))))
;;; Compute the CGI scripts' process environment by adding the standard CGI
;;; environment var bindings to the current process env -- return result
;;; as an alist.
;;;
;;; You are also supposed to add the headers as env vars in a particular
;;; format, but are allowed to bag it if the environment var storage
;;; requirements might overload the OS. I don't know what you can rely upon
;;; in Unix, so I am just bagging it, period.
;;;
;;; Suppose the URL is
;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c
;;; then:
;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar"
;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/"
;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script"
;;; QUERY_STRING -- not decoded "quux%20a+b=c"
;;; The first three of these vars are *not* encoded, so information is lost
;;; if the URL's path elements contain encoded /'s (%2F). CGI loses.
(define (cgi-env req bin-dir path-suffix request-invariant-cgi-env)
(let* ((sock (request-socket req))
(raddr (socket-remote-address sock))
(headers (request-headers req))
;; Compute the $PATH_INFO and $PATH_TRANSLATED strings.
(path-info (uri-path-list->path path-suffix)) ; No encode or .. check.
(path-translated (path-list->file-name path-info bin-dir))
;; Compute the $SCRIPT_PATH string.
(url-path (http-url-path (request-url req)))
(script-path (take (- (length url-path) (length path-suffix))
url-path))
(script-name (uri-path-list->path script-path)))
(receive (rhost rport)
(socket-address->internet-address raddr)
(receive (lhost lport)
(socket-address->internet-address (socket-local-address sock))
`(("SERVER_PROTOCOL" . ,(version->string (request-version req)))
("SERVER_PORT" . ,(number->string lport))
("REQUEST_METHOD" . ,(request-method req))
("PATH_INFO" . ,path-info)
("PATH_TRANSLATED" . ,path-translated)
("SCRIPT_NAME" . ,script-name)
("REMOTE_ADDR" . ,(format-internet-host-address rhost))
;; ("AUTH_TYPE" . xx) ; Random authentication
;; ("REMOTE_USER" . xx) ; features I don't understand.
;; ("REMOTE_IDENT" . xx)
,@request-invariant-cgi-env ; Stuff that never changes (see cgi-handler).
,@(cond ((http-url-search (request-url req)) =>
(lambda (srch) `(("QUERY_STRING" . ,srch))))
(else '()))
,@(cond ((get-header headers 'content-type) =>
(lambda (ct) `(("CONTENT_TYPE" . ,ct))))
(else '()))
,@(cond ((get-header headers 'content-length) =>
(lambda (cl) ; Skip initial whitespace (& other non-digits).
(let ((first-digit (string-index cl char-set:digit))
(cl-len (string-length cl)))
(if first-digit
`(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len)))
(http-error (status-code bad-request) req
"Illegal `Content-length:' header.")))))
(else '()))
. ,(env->alist))))))
(define (take n lis)
(if (zero? n) '()
(cons (car lis) (take (- n 1) (cdr lis)))))
(define (drop n lis)
(if (zero? n) lis
(drop (- n 1) (cdr lis))))
;;; Script's output for request REQ is available on SCRIPT-PORT.
;;; The script isn't an "nph-" script, so we read the response, and mutate
;;; it into a real HTTP response, which we then send back to the HTTP client.
(define (cgi-make-response script-port path req)
(let* ((headers (read-rfc822-headers script-port))
(ctype (get-header headers 'content-type)) ; The script headers
(loc (get-header headers 'location))
(stat (extract-status-code-and-text
(get-header-lines headers 'status) req))
(extra-headers (delete-headers (delete-headers (delete-headers headers
'content-type)
'location)
'status)))
(http-syslog (syslog-level debug) "[cgi-server] headers: ~s~%" headers)
(http-syslog (syslog-level debug) "[cgi-server] request-method=~a~%"
(request-method req))
(if loc
(if (uri-has-protocol? (string-trim loc))
(make-error-response (status-code moved-perm) req
loc loc)
(make-redirect-response (string-trim loc)))
;; Send the response header back to the client
(make-response ;code message seconds mime extras body
(car stat) ; code
(cdr stat) ; text
(time)
ctype
extra-headers
(make-writer-body
(lambda (out options)
(copy-inport->outport script-port out)
(close-input-port script-port)))))))
(define (uri-has-protocol? loc)
(receive (proto path search frag)
(parse-uri loc)
(if proto #t #f)))
(define (extract-status-code-and-text stat-lines req)
(cond
((not (pair? stat-lines)) ; No status header.
(cons (status-code ok) "The idiot CGI script left out the status line."))
((null? (cdr stat-lines)) ; One line status header.
(with-fatal-error-handler*
(lambda (c d)
(http-error (status-code bad-gateway) req
"CGI script generated an invalid status header."
(car stat-lines) c))
(lambda ()
(let ((status (string-trim (car stat-lines))))
(cons (string->number (substring status 0 3)) ; number
(substring/shared status 4)))))) ; text
(else ; Vas ist das?
(http-error (status-code bad-gateway) req
"CGI script generated multi-line status header."))))