;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*- ;;; Copyright (c) 1995 by Olin Shivers. ;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec". ;;; Imports and non-R4RS'isms ;;; "\r" in string for carriage-return. ;;; format ;;; string hacks ;;; URI, URL record structs, parsers, and unparsers ;;; write-crlf ;;; scsh syscalls ;;; ? for COND ;;; SWITCH conditional ;;; RFC822 header parsing ;;; HTTP request record structure ;;; HTTP-ERROR & reply codes ;;; Basic path handler support (for ncsa-handler) ;;; PROBLEMS: ;;; - The handlers could be made -- closed over their parameters ;;; (e.g., root vars, etc.) ;;; This code provides a path-handler for the HTTP server that implements ;;; a CGI interface to external programs for doing HTTP transactions. ;;; About HTML forms ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This info is in fact independent of CGI, but important to know about, ;;; as many CGI scripts are written for responding to forms-entry in ;;; HTML browsers. ;;; ;;; The form's field data are turned into a single string, of the form ;;; name=val&name=val ;;; where the and parts are URI encoded to hide their ;;; &, =, and + chars, among other things. After URI encoding, the ;;; space chars are converted to + chars, just for fun. It is important ;;; to encode the spaces this way, because the perfectly general %xx escape ;;; mechanism might be insufficiently confusing. This variant encoding is ;;; called "form-url encoding." ;;; ;;; If the form's method is POST, ;;; Browser sends the form's field data in the entity block, e.g., ;;; "button=on&ans=yes". The request's Content-type: is application/ ;;; x-www-form-urlencoded, and the request's Content-length: is the ;;; number of bytes in the form data. ;;; ;;; If the form's method is GET, ;;; Browser sends the form's field data in the URL's part. ;;; (So the server will pass to the CGI script as $QUERY_STRING, ;;; and perhaps also on in argv[]). ;;; ;;; In either case, the data is "form-url encoded" (as described above). ;;; ISINDEX queries: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; (Likewise for ISINDEX URL queries from browsers.) ;;; Browser url-form encodes the query (see above), which then becomes the ;;; ? part of the URI. (Hence the CGI script will split the individual ;;; fields into argv[].) ;;; CGI interface: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; - The URL's part is assigned to env var $QUERY_STRING, undecoded. ;;; - If it contains no raw "=" chars, it is split at "+" chars. The ;;; substrings are URI decoded, and become the elts of argv[]. ;;; - The CGI script is run with stdin hooked up to the socket. If it's going ;;; to read the entity, it should read $CONTENT_LENGTH bytes worth. ;;; - A bunch of env vars are set; see below. ;;; - If the script begins with "nph-" its output is the entire reply. ;;; Otherwise, it replies to the server, we peel off a little header ;;; that is used to construct the real header for the reply. ;;; See the "spec" for further details. ;;; ;;; The "spec" also talks about PUT, but when I tried this on a dummy script, ;;; the NSCA httpd server generated buggy output. So I am only implementing ;;; the POST and GET ops; any other op generates a "405 Method not allowed" ;;; reply. ;;; Parameters ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; path for scripts (define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin") ;;; The path handler for CGI scripts. (car path) is the script to run. (define (cgi-handler bin-dir) (lambda (path req) (if (pair? path) ; Got to have at least one elt. (let* ((prog (car path)) (filename (or (dotdot-check bin-dir (list prog)) (http-error http-reply/bad-request req (format #f "CGI scripts may not contain \"..\" elements.")))) (nph? (string-suffix? "-nph" prog)) ; PROG end in "-nph" ? (search (http-url:search (request:url req))) ; Compute the (argv (if (and search (not (string-index search #\=))) ; argv list. (split-and-decode-search-spec search) '())) (env (cgi-env req bin-dir (cdr path))) (doit (lambda () (dup->inport (current-input-port) 0) (dup->outport (current-output-port) 1) (apply exec/env filename env argv)))) (http-log "search: ~s, argv: ~s~%" search argv) (let ((request-method (request:method req))) (cond ((or (string=? request-method "GET") (string=? request-method "POST")) ; Could do others also. (if nph? (let ((stat (wait (fork doit)))) (if (not (zero? stat)) (http-error http-reply/bad-request req (format #f "Could not execute CGI script ~a." filename)) stat)) (cgi-send-reply (run/port* doit) req))) (else (http-error http-reply/method-not-allowed req))))) (http-error http-reply/bad-request req "Empty CGI script")))) (define (split-and-decode-search-spec s) (let recur ((i 0)) (cond ((string-index s #\+ i) => (lambda (j) (cons (unescape-uri s i j) (recur (+ j 1))))) (else (list (unescape-uri s i (string-length s))))))) ;;; Compute the CGI scripts' process environment by adding the standard CGI ;;; environment var bindings to the current process env -- return result ;;; as an alist. ;;; ;;; You are also supposed to add the headers as env vars in a particular ;;; format, but are allowed to bag it if the environment var storage ;;; requirements might overload the OS. I don't know what you can rely upon ;;; in Unix, so I am just bagging it, period. ;;; ;;; Suppose the URL is ;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c ;;; then: ;;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar" ;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/" ;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script" ;;; QUERY_STRING -- not decoded "quux%20a+b=c" ;;; The first three of these vars are *not* encoded, so information is lost ;;; if the URL's path elements contain encoded /'s (%2F). CGI loses. (define (cgi-env req bin-dir path-suffix) (let* ((sock (request:socket req)) (raddr (socket-remote-address sock)) (headers (request:headers req)) ;; Compute the $PATH_INFO and $PATH_TRANSLATED strings. (path-info (uri-path-list->path path-suffix)) ; No encode or .. check. (path-translated (path-list->file-name path-info bin-dir)) ;; Compute the $SCRIPT_PATH string. (url-path (http-url:path (request:url req))) (script-path (take (- (length url-path) (length path-suffix)) url-path)) (script-name (uri-path-list->path script-path))) (if (not request-invariant-cgi-env) (initialise-request-invariant-cgi-env)) (receive (rhost rport) (socket-address->internet-address raddr) (receive (lhost lport) (socket-address->internet-address (socket-local-address sock)) `(("SERVER_PROTOCOL" . ,(version->string (request:version req))) ("SERVER_PORT" . ,(number->string lport)) ("REQUEST_METHOD" . ,(request:method req)) ("PATH_INFO" . ,path-info) ("PATH_TRANSLATED" . ,path-translated) ("SCRIPT_NAME" . ,script-name) ("REMOTE_HOST" . ,(host-info:name (host-info raddr))) ("REMOTE_ADDR" . ,(format-internet-host-address rhost)) ;; ("AUTH_TYPE" . xx) ; Random authentication ;; ("REMOTE_USER" . xx) ; features I don't understand. ;; ("REMOTE_IDENT" . xx) ,@request-invariant-cgi-env ; Stuff that never changes (see below). ,@(cond ((http-url:search (request:url req)) => (lambda (srch) `(("QUERY_STRING" . ,srch)))) (else '())) ,@(cond ((get-header headers 'content-type) => (lambda (ct) `(("CONTENT_TYPE" . ,ct)))) (else '())) ,@(cond ((get-header headers 'content-length) => (lambda (cl) ; Skip initial whitespace (& other non-digits). (let ((first-digit (char-set-index cl char-set:numeric)) (cl-len (string-length cl))) (if first-digit `(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len))) (http-error http-reply/bad-request req "Illegal Content-length: header."))))) (else '())) . ,(env->alist)))))) (define request-invariant-cgi-env #f) (define (initialise-request-invariant-cgi-env) (set! request-invariant-cgi-env `(("PATH" . ,(and (getenv "PATH") cgi-default-bin-path)) ("SERVER_SOFTWARE" . ,server/version) ("SERVER_NAME" . ,(host-info:name (host-info (system-name)))) ("GATEWAY_INTERFACE" . "CGI/1.1")))) (define (take n lis) (if (zero? n) '() (cons (car lis) (take (- n 1) (cdr lis))))) (define (drop n lis) (if (zero? n) lis (drop (- n 1) (cdr lis)))) ;;; Script's output for request REQ is available on SCRIPT-PORT. ;;; The script isn't an "nph-" script, so we read the reply, and mutate ;;; it into a real HTTP reply, which we then send back to the HTTP client. (define (cgi-send-reply script-port req) (let* ((headers (read-rfc822-headers script-port)) (ctype (get-header headers 'content-type)) ; The script headers (loc (get-header headers 'location)) (stat (let ((stat-lines (get-header-lines headers 'status))) (cond ((not (pair? stat-lines)) ; No status header. "200 The idiot CGI script left out the status line.") ((null? (cdr stat-lines)) ; One line status header. (car stat-lines)) (else ; Vas ist das? (http-error http-reply/internal-error req "CGI script generated multi-line status header"))))) (out (current-output-port))) (http-log "headers: ~s~%" headers) ;; Send the reply header back to the client ;; (unless it's a headerless HTTP 0.9 reply). (if (not (v0.9-request? req)) (begin (format out "HTTP/1.0 ~a\r~%" stat) (if ctype (format out "Content-type: ~a\r~%" ctype)) (if loc (format out "Location: ~a\r~%" loc)) (write-crlf out))) (http-log "request:method=~a~%" (request:method req)) ;; Copy the reply body back to the client and close the script port ;; (unless it's a bodiless HEAD transaction). (if (not (string=? (request:method req) "HEAD")) (begin (copy-inport->outport script-port out) (close-input-port script-port)))))