;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*- ;;; This file is part of the Scheme Untergrund Networking package. ;;; Copyright (c) 1995 by Olin Shivers. ;;; For copyright information, see the file COPYING which comes with ;;; the distribution. ;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec". ;;; PROBLEMS: ;;; - The handlers could be made -- closed over their parameters ;;; (e.g., root vars, etc.) ;;; This code provides a request handler for the HTTP server that implements ;;; a CGI interface to external programs for doing HTTP transactions. ;;; About HTML forms ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This info is in fact independent of CGI, but important to know about, ;;; as many CGI scripts are written for responding to forms-entry in ;;; HTML browsers. ;;; ;;; The form's field data are turned into a single string, of the form ;;; name=val&name=val ;;; where the and parts are URI encoded to hide their ;;; &, =, and + chars, among other things. After URI encoding, the ;;; space chars are converted to + chars, just for fun. It is important ;;; to encode the spaces this way, because the perfectly general %xx escape ;;; mechanism might be insufficiently confusing. This variant encoding is ;;; called "form-url encoding." ;;; ;;; If the form's method is POST, ;;; Browser sends the form's field data in the entity block, e.g., ;;; "button=on&ans=yes". The request's Content-type: is application/ ;;; x-www-form-urlencoded, and the request's Content-length: is the ;;; number of bytes in the form data. ;;; ;;; If the form's method is GET, ;;; Browser sends the form's field data in the URL's part. ;;; (So the server will pass to the CGI script as $QUERY_STRING, ;;; and perhaps also on in argv[]). ;;; ;;; In either case, the data is "form-url encoded" (as described above). ;;; ISINDEX queries: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; (Likewise for ISINDEX URL queries from browsers.) ;;; Browser url-form encodes the query (see above), which then becomes the ;;; ? part of the URI. (Hence the CGI script will split the individual ;;; fields into argv[].) ;;; CGI interface: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; - The URL's part is assigned to env var $QUERY_STRING, undecoded. ;;; - If it contains no raw "=" chars, it is split at "+" chars. The ;;; substrings are URI decoded, and become the elts of argv[]. ;;; - The CGI script is run with stdin hooked up to the socket. If it's going ;;; to read the entity, it should read $CONTENT_LENGTH bytes worth. ;;; - A bunch of env vars are set; see below. ;;; - If the script begins with "nph-" its output is the entire response. ;;; Otherwise, it replies to the server, we peel off a little header ;;; that is used to construct the real header for the response. ;;; See the "spec" for further details. (URL above). ;;; ;;; The "spec" also talks about PUT, but when I tried this on a dummy script, ;;; the NSCA httpd server generated buggy output. So I am only implementing ;;; the POST and GET ops; any other op generates a "405 Method not allowed" ;;; response. ;;; Parameters ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; path for scripts (define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin") ;;; The request handler for CGI scripts. (car path) is the script to run. ;;; cgi-bin-path is used, if PATH-variable isn't defined (define (cgi-handler bin-dir . maybe-cgi-bin-path) (let-optionals maybe-cgi-bin-path ((cgi-bin-path cgi-default-bin-path)) (let ((request-invariant-cgi-env ; environment variables that never change `(("PATH" . ,cgi-bin-path) ("SERVER_SOFTWARE" . ,sunet-version-identifier) ("SERVER_NAME" . ,(host-info:name (host-info (system-name)))) ("GATEWAY_INTERFACE" . "CGI/1.1")))) (lambda (path req) (if (pair? path) ; Got to have at least one elt. (compute-cgi path req bin-dir request-invariant-cgi-env) (make-error-response (status-code bad-request) req "Empty CGI script")))))) (define (compute-cgi path req bin-dir request-invariant-cgi-env) (let* ((prog (car path)) (filename (or (dotdot-check bin-dir (list prog)) (http-error (status-code bad-request) req "CGI scripts may not contain \"..\" elements."))) (nph? (string-prefix? "nph-" prog)) ; PROG starts with "nph-" ? ; why did we had (string-suffix? "-nph" prog) here? (search (http-url-search (request-url req))) ; Compute the (argv (if (and search (not (string-index search #\=))) ; argv list. (split-and-decode-search-spec search) '())) (env (cgi-env req bin-dir (cdr path) request-invariant-cgi-env)) (doit (lambda () (dup->inport (socket:inport (request-socket req)) 0) (dup->outport (current-output-port) 1) (dup 1 2) (apply exec/env filename env argv)))) (http-syslog (syslog-level debug) "[cgi-server] search: ~s, argv: ~s~%" search argv) (let ((request-method (request-method req))) (cond ((or (string=? request-method "GET") (string=? request-method "POST")) ; Could do others also. (case (file-not-executable? filename) ((search-denied permission) (make-error-response (status-code forbidden) req "Permission denied.")) ((no-directory nonexistent) (make-error-response (status-code not-found) req "File or directory doesn't exist.")) (else (if nph? (cgi-make-nph-response (run/port* doit)) (cgi-make-response (run/port* doit) path req))))) (else (make-error-response (status-code method-not-allowed) req request-method)))))) (define (split-and-decode-search-spec s) (let recur ((i 0)) (cond ((string-index s #\+ i) => (lambda (j) (cons (unescape-uri s i j) (recur (+ j 1))))) (else (list (unescape-uri s i (string-length s))))))) ;;; Compute the CGI scripts' process environment by adding the standard CGI ;;; environment var bindings to the current process env -- return result ;;; as an alist. ;;; ;;; You are also supposed to add the headers as env vars in a particular ;;; format, but are allowed to bag it if the environment var storage ;;; requirements might overload the OS. I don't know what you can rely upon ;;; in Unix, so I am just bagging it, period. ;;; ;;; Suppose the URL is ;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c ;;; then: ;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar" ;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/" ;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script" ;;; QUERY_STRING -- not decoded "quux%20a+b=c" ;;; The first three of these vars are *not* encoded, so information is lost ;;; if the URL's path elements contain encoded /'s (%2F). CGI loses. (define (cgi-env req bin-dir path-suffix request-invariant-cgi-env) (let* ((sock (request-socket req)) (raddr (socket-remote-address sock)) (headers (request-headers req)) ;; Compute the $PATH_INFO and $PATH_TRANSLATED strings. (path-info (uri-path->uri path-suffix)) ; No encode or .. check. (path-translated (path-list->file-name path-info bin-dir)) ;; Compute the $SCRIPT_PATH string. (url-path (http-url-path (request-url req))) (script-path (take (- (length url-path) (length path-suffix)) url-path)) (script-name (uri-path->uri script-path))) (receive (rhost rport) (socket-address->internet-address raddr) (receive (lhost lport) (socket-address->internet-address (socket-local-address sock)) `(("SERVER_PROTOCOL" . ,(version->string (request-version req))) ("SERVER_PORT" . ,(number->string lport)) ("REQUEST_METHOD" . ,(request-method req)) ("PATH_INFO" . ,path-info) ("PATH_TRANSLATED" . ,path-translated) ("SCRIPT_NAME" . ,script-name) ("REMOTE_ADDR" . ,(format-internet-host-address rhost)) ;; ("AUTH_TYPE" . xx) ; Random authentication ;; ("REMOTE_USER" . xx) ; features I don't understand. ;; ("REMOTE_IDENT" . xx) ,@request-invariant-cgi-env ; Stuff that never changes (see cgi-handler). ,@(cond ((http-url-search (request-url req)) => (lambda (srch) `(("QUERY_STRING" . ,srch)))) (else '())) ,@(cond ((get-header headers 'content-type) => (lambda (ct) `(("CONTENT_TYPE" . ,ct)))) (else '())) ,@(cond ((get-header headers 'content-length) => (lambda (cl) ; Skip initial whitespace (& other non-digits). (let ((first-digit (string-index cl char-set:digit)) (cl-len (string-length cl))) (if first-digit `(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len))) (http-error (status-code bad-request) req "Illegal `Content-length:' header."))))) (else '())) . ,(env->alist)))))) (define (take n lis) (if (zero? n) '() (cons (car lis) (take (- n 1) (cdr lis))))) (define (drop n lis) (if (zero? n) lis (drop (- n 1) (cdr lis)))) ;;; Script's output for request REQ is available on SCRIPT-PORT. ;;; The script isn't an "nph-" script, so we read the response, and mutate ;;; it into a real HTTP response, which we then send back to the HTTP client. (define (cgi-make-response script-port path req) (set-port-buffering script-port bufpol/block 4096) (let* ((headers (read-rfc822-headers script-port)) (ctype (get-header headers 'content-type)) (loc (get-header headers 'location)) (stat (cond ((get-header headers 'status) => (lambda (code.text) (extract-status-code-and-text code.text req))) (else (http-syslog (syslog-level notice) "CGI script didn't generate status header.") (cons 200 "OK")))) (extra-headers (delete-headers (delete-headers (delete-headers headers 'content-type) 'location) 'status))) (http-syslog (syslog-level debug) "[cgi-server] headers: ~s~%" headers) (http-syslog (syslog-level debug) "[cgi-server] request-method=~a~%" (request-method req)) (if loc (if (uri-has-protocol? (string-trim loc)) (make-error-response (status-code moved-perm) req loc) (make-redirect-response (string-trim loc))) ;; Send the response header back to the client (make-response ;code message seconds mime extras body (number->status-code (car stat)) (cdr stat) ; text (time) (or ctype "text/html") extra-headers (make-writer-body (lambda (out options) (copy-inport->outport script-port out 4096) (close-input-port script-port))))))) (define (delete-headers headers tag) (alist-delete tag headers)) (define (cgi-make-nph-response script-port) (make-nph-response (make-writer-body (lambda (out options) (copy-inport->outport script-port out))))) (define (uri-has-protocol? loc) (receive (proto path search frag) (parse-uri loc) (if proto #t #f))) (define (extract-status-code-and-text status req) (with-fatal-error-handler* (lambda (c d) (http-error (status-code bad-gateway) req "CGI script generated an invalid status header." status c)) (lambda () (let ((status (string-trim status))) (cons (string->number (substring status 0 3)) ; number (substring/shared status 4)))))) ; text