sunet/scheme/httpd/cgi-server.scm

264 lines
10 KiB
Scheme

;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*-
;;; Copyright (c) 1995 by Olin Shivers.
;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec".
;;; PROBLEMS:
;;; - The handlers could be made -- closed over their parameters
;;; (e.g., root vars, etc.)
;;; This code provides a path-handler for the HTTP server that implements
;;; a CGI interface to external programs for doing HTTP transactions.
;;; About HTML forms
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This info is in fact independent of CGI, but important to know about,
;;; as many CGI scripts are written for responding to forms-entry in
;;; HTML browsers.
;;;
;;; The form's field data are turned into a single string, of the form
;;; name=val&name=val
;;; where the <name> and <val> parts are URI encoded to hide their
;;; &, =, and + chars, among other things. After URI encoding, the
;;; space chars are converted to + chars, just for fun. It is important
;;; to encode the spaces this way, because the perfectly general %xx escape
;;; mechanism might be insufficiently confusing. This variant encoding is
;;; called "form-url encoding."
;;;
;;; If the form's method is POST,
;;; Browser sends the form's field data in the entity block, e.g.,
;;; "button=on&ans=yes". The request's Content-type: is application/
;;; x-www-form-urlencoded, and the request's Content-length: is the
;;; number of bytes in the form data.
;;;
;;; If the form's method is GET,
;;; Browser sends the form's field data in the URL's <search> part.
;;; (So the server will pass to the CGI script as $QUERY_STRING,
;;; and perhaps also on in argv[]).
;;;
;;; In either case, the data is "form-url encoded" (as described above).
;;; ISINDEX queries:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (Likewise for ISINDEX URL queries from browsers.)
;;; Browser url-form encodes the query (see above), which then becomes the
;;; ?<search> part of the URI. (Hence the CGI script will split the individual
;;; fields into argv[].)
;;; CGI interface:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - The URL's <search> part is assigned to env var $QUERY_STRING, undecoded.
;;; - If it contains no raw "=" chars, it is split at "+" chars. The
;;; substrings are URI decoded, and become the elts of argv[].
;;; - The CGI script is run with stdin hooked up to the socket. If it's going
;;; to read the entity, it should read $CONTENT_LENGTH bytes worth.
;;; - A bunch of env vars are set; see below.
;;; - If the script begins with "nph-" its output is the entire response.
;;; Otherwise, it replies to the server, we peel off a little header
;;; that is used to construct the real header for the response.
;;; See the "spec" for further details. (URL above).
;;;
;;; The "spec" also talks about PUT, but when I tried this on a dummy script,
;;; the NSCA httpd server generated buggy output. So I am only implementing
;;; the POST and GET ops; any other op generates a "405 Method not allowed"
;;; response.
;;; Parameters
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; path for scripts
(define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin")
;;; The path handler for CGI scripts. (car path) is the script to run.
;;; cgi-bin-path is used, if no PATH-variable isn't defined
(define (cgi-handler bin-dir . maybe-cgi-bin-path)
(let-optionals
maybe-cgi-bin-path
((cgi-bin-path cgi-default-bin-path))
(let ((request-invariant-cgi-env ; environment variables that never change
`(("PATH" . ,(and (getenv "PATH") cgi-bin-path))
("SERVER_SOFTWARE" . ,server/version)
("SERVER_NAME" . ,(host-info:name (host-info (system-name))))
("GATEWAY_INTERFACE" . "CGI/1.1"))))
(lambda (path req)
(if (pair? path) ; Got to have at least one elt.
(let* ((prog (car path))
(filename (or (dotdot-check bin-dir (list prog))
(http-error http-status/bad-request req
(format #f "CGI scripts may not contain \"..\" elements."))))
(nph? (string-prefix? "nph-" prog)) ; PROG starts with "nph-" ?
; why did we had (string-suffix? "-nph" prog) here?
(search (http-url:search (request:url req))) ; Compute the
(argv (if (and search (not (string-index search #\=))) ; argv list.
(split-and-decode-search-spec search)
'()))
(env (cgi-env req bin-dir (cdr path) request-invariant-cgi-env))
(doit (lambda ()
(dup->inport (current-input-port) 0)
(dup->outport (current-output-port) 1)
(apply exec/env filename env argv))))
(http-syslog (syslog-level debug) "[cgi-server] search: ~s, argv: ~s~%" search argv)
(let ((request-method (request:method req)))
(cond
((or (string=? request-method "GET")
(string=? request-method "POST")) ; Could do others also.
(if nph?
(let ((stat (wait (fork doit))))
(if (not (zero? stat))
(http-error http-status/bad-request req
(format #f "Could not execute CGI script ~a."
filename))
stat))
(cgi-send-response (run/port* doit) req)))
(else (http-error http-status/method-not-allowed req)))))
(http-error http-status/bad-request req "Empty CGI script"))))))
(define (split-and-decode-search-spec s)
(let recur ((i 0))
(cond
((string-index s #\+ i) => (lambda (j) (cons (unescape-uri s i j)
(recur (+ j 1)))))
(else (list (unescape-uri s i (string-length s)))))))
;;; Compute the CGI scripts' process environment by adding the standard CGI
;;; environment var bindings to the current process env -- return result
;;; as an alist.
;;;
;;; You are also supposed to add the headers as env vars in a particular
;;; format, but are allowed to bag it if the environment var storage
;;; requirements might overload the OS. I don't know what you can rely upon
;;; in Unix, so I am just bagging it, period.
;;;
;;; Suppose the URL is
;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c
;;; then:
;;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar"
;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/"
;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script"
;;; QUERY_STRING -- not decoded "quux%20a+b=c"
;;; The first three of these vars are *not* encoded, so information is lost
;;; if the URL's path elements contain encoded /'s (%2F). CGI loses.
(define (cgi-env req bin-dir path-suffix request-invariant-cgi-env)
(let* ((sock (request:socket req))
(raddr (socket-remote-address sock))
(headers (request:headers req))
;; Compute the $PATH_INFO and $PATH_TRANSLATED strings.
(path-info (uri-path-list->path path-suffix)) ; No encode or .. check.
(path-translated (path-list->file-name path-info bin-dir))
;; Compute the $SCRIPT_PATH string.
(url-path (http-url:path (request:url req)))
(script-path (take (- (length url-path) (length path-suffix))
url-path))
(script-name (uri-path-list->path script-path)))
(receive (rhost rport)
(socket-address->internet-address raddr)
(receive (lhost lport)
(socket-address->internet-address (socket-local-address sock))
`(("SERVER_PROTOCOL" . ,(version->string (request:version req)))
("SERVER_PORT" . ,(number->string lport))
("REQUEST_METHOD" . ,(request:method req))
("PATH_INFO" . ,path-info)
("PATH_TRANSLATED" . ,path-translated)
("SCRIPT_NAME" . ,script-name)
("REMOTE_HOST" . ,(host-name-or-ip raddr))
("REMOTE_ADDR" . ,(format-internet-host-address rhost))
;; ("AUTH_TYPE" . xx) ; Random authentication
;; ("REMOTE_USER" . xx) ; features I don't understand.
;; ("REMOTE_IDENT" . xx)
,@request-invariant-cgi-env ; Stuff that never changes (see cgi-handler).
,@(cond ((http-url:search (request:url req)) =>
(lambda (srch) `(("QUERY_STRING" . ,srch))))
(else '()))
,@(cond ((get-header headers 'content-type) =>
(lambda (ct) `(("CONTENT_TYPE" . ,ct))))
(else '()))
,@(cond ((get-header headers 'content-length) =>
(lambda (cl) ; Skip initial whitespace (& other non-digits).
(let ((first-digit (string-index cl char-set:digit))
(cl-len (string-length cl)))
(if first-digit
`(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len)))
(http-error http-status/bad-request
req
"Illegal Content-length: header.")))))
(else '()))
. ,(env->alist))))))
(define (take n lis)
(if (zero? n) '()
(cons (car lis) (take (- n 1) (cdr lis)))))
(define (drop n lis)
(if (zero? n) lis
(drop (- n 1) (cdr lis))))
;;; Script's output for request REQ is available on SCRIPT-PORT.
;;; The script isn't an "nph-" script, so we read the response, and mutate
;;; it into a real HTTP response, which we then send back to the HTTP client.
(define (cgi-send-response script-port req)
(let* ((headers (read-rfc822-headers script-port))
(ctype (get-header headers 'content-type)) ; The script headers
(loc (get-header headers 'location))
(stat (let ((stat-lines (get-header-lines headers 'status)))
(cond
((not (pair? stat-lines)) ; No status header.
"200 The idiot CGI script left out the status line.")
((null? (cdr stat-lines)) ; One line status header.
(car stat-lines))
(else ; Vas ist das?
(http-error http-status/internal-error req
"CGI script generated multi-line status header")))))
(out (current-output-port)))
(http-syslog (syslog-level debug) "[cgi-server] headers: ~s~%" headers)
;; Send the response header back to the client
;; (unless it's a headerless HTTP 0.9 response).
(if (not (v0.9-request? req))
(begin
(format out "HTTP/1.0 ~a\r~%" stat)
(if ctype (format out "Content-type: ~a\r~%" ctype))
(if loc (format out "Location: ~a\r~%" loc))
(write-crlf out)))
(http-syslog (syslog-level debug) "[cgi-server] request:method=~a~%" (request:method req))
;; Copy the response body back to the client and close the script port
;; (unless it's a bodiless HEAD transaction).
(if (not (string=? (request:method req) "HEAD"))
(begin
(copy-inport->outport script-port out)
(close-input-port script-port)))))