299 lines
11 KiB
Scheme
299 lines
11 KiB
Scheme
;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*-
|
|
|
|
;;; This file is part of the Scheme Untergrund Networking package.
|
|
|
|
;;; Copyright (c) 1995 by Olin Shivers.
|
|
;;; For copyright information, see the file COPYING which comes with
|
|
;;; the distribution.
|
|
|
|
;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec".
|
|
|
|
;;; PROBLEMS:
|
|
;;; - The handlers could be made -- closed over their parameters
|
|
;;; (e.g., root vars, etc.)
|
|
|
|
;;; This code provides a request handler for the HTTP server that implements
|
|
;;; a CGI interface to external programs for doing HTTP transactions.
|
|
|
|
;;; About HTML forms
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; This info is in fact independent of CGI, but important to know about,
|
|
;;; as many CGI scripts are written for responding to forms-entry in
|
|
;;; HTML browsers.
|
|
;;;
|
|
;;; The form's field data are turned into a single string, of the form
|
|
;;; name=val&name=val
|
|
;;; where the <name> and <val> parts are URI encoded to hide their
|
|
;;; &, =, and + chars, among other things. After URI encoding, the
|
|
;;; space chars are converted to + chars, just for fun. It is important
|
|
;;; to encode the spaces this way, because the perfectly general %xx escape
|
|
;;; mechanism might be insufficiently confusing. This variant encoding is
|
|
;;; called "form-url encoding."
|
|
;;;
|
|
;;; If the form's method is POST,
|
|
;;; Browser sends the form's field data in the entity block, e.g.,
|
|
;;; "button=on&ans=yes". The request's Content-type: is application/
|
|
;;; x-www-form-urlencoded, and the request's Content-length: is the
|
|
;;; number of bytes in the form data.
|
|
;;;
|
|
;;; If the form's method is GET,
|
|
;;; Browser sends the form's field data in the URL's <search> part.
|
|
;;; (So the server will pass to the CGI script as $QUERY_STRING,
|
|
;;; and perhaps also on in argv[]).
|
|
;;;
|
|
;;; In either case, the data is "form-url encoded" (as described above).
|
|
|
|
;;; ISINDEX queries:
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; (Likewise for ISINDEX URL queries from browsers.)
|
|
;;; Browser url-form encodes the query (see above), which then becomes the
|
|
;;; ?<search> part of the URI. (Hence the CGI script will split the individual
|
|
;;; fields into argv[].)
|
|
|
|
|
|
;;; CGI interface:
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; - The URL's <search> part is assigned to env var $QUERY_STRING, undecoded.
|
|
;;; - If it contains no raw "=" chars, it is split at "+" chars. The
|
|
;;; substrings are URI decoded, and become the elts of argv[].
|
|
;;; - The CGI script is run with stdin hooked up to the socket. If it's going
|
|
;;; to read the entity, it should read $CONTENT_LENGTH bytes worth.
|
|
;;; - A bunch of env vars are set; see below.
|
|
;;; - If the script begins with "nph-" its output is the entire response.
|
|
;;; Otherwise, it replies to the server, we peel off a little header
|
|
;;; that is used to construct the real header for the response.
|
|
;;; See the "spec" for further details. (URL above).
|
|
;;;
|
|
;;; The "spec" also talks about PUT, but when I tried this on a dummy script,
|
|
;;; the NSCA httpd server generated buggy output. So I am only implementing
|
|
;;; the POST and GET ops; any other op generates a "405 Method not allowed"
|
|
;;; response.
|
|
|
|
;;; Parameters
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; path for scripts
|
|
(define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin")
|
|
|
|
;;; The request handler for CGI scripts. (car path) is the script to run.
|
|
;;; cgi-bin-path is used, if PATH-variable isn't defined
|
|
|
|
(define (cgi-handler bin-dir . maybe-cgi-bin-path)
|
|
(let-optionals
|
|
maybe-cgi-bin-path
|
|
((cgi-bin-path cgi-default-bin-path))
|
|
|
|
(let ((request-invariant-cgi-env ; environment variables that never change
|
|
`(("PATH" . ,(and (getenv "PATH") cgi-bin-path))
|
|
("SERVER_SOFTWARE" . ,sunet-version-identifier)
|
|
("SERVER_NAME" . ,(host-info:name (host-info (system-name))))
|
|
("GATEWAY_INTERFACE" . "CGI/1.1"))))
|
|
(lambda (path req)
|
|
(if (pair? path) ; Got to have at least one elt.
|
|
(compute-cgi path req bin-dir request-invariant-cgi-env)
|
|
(make-error-response (status-code bad-request) req "Empty CGI script"))))))
|
|
|
|
(define (compute-cgi path req bin-dir request-invariant-cgi-env)
|
|
(let* ((prog (car path))
|
|
|
|
(filename (or (dotdot-check bin-dir (list prog))
|
|
(http-error (status-code bad-request) req
|
|
"CGI scripts may not contain \"..\" elements.")))
|
|
|
|
(nph? (string-prefix? "nph-" prog)) ; PROG starts with "nph-" ?
|
|
; why did we had (string-suffix? "-nph" prog) here?
|
|
|
|
(search (http-url-search (request-url req))) ; Compute the
|
|
(argv (if (and search (not (string-index search #\=))) ; argv list.
|
|
(split-and-decode-search-spec search)
|
|
'()))
|
|
|
|
(env (cgi-env req bin-dir (cdr path) request-invariant-cgi-env))
|
|
|
|
(doit (lambda ()
|
|
(dup->inport (socket:inport (request-socket req)) 0)
|
|
(dup->outport (current-output-port) 1)
|
|
(dup 1 2)
|
|
(apply exec/env filename env argv))))
|
|
|
|
(http-syslog (syslog-level debug) "[cgi-server] search: ~s, argv: ~s~%" search argv)
|
|
(let ((request-method (request-method req)))
|
|
(cond
|
|
((or (string=? request-method "GET")
|
|
(string=? request-method "POST")) ; Could do others also.
|
|
(if nph?
|
|
(let ((stat (wait (fork doit))))
|
|
(if (not (zero? stat))
|
|
(make-error-response
|
|
(status-code bad-request) req
|
|
(format #f "Could not execute CGI script ~a."
|
|
filename))
|
|
stat)) ;; FIXME! must return http-response object!
|
|
(case (file-not-executable? filename)
|
|
((search-denied permission)
|
|
(make-error-response (status-code forbidden) req
|
|
"Permission denied."))
|
|
((no-directory nonexistent)
|
|
(make-error-response (status-code not-found) req
|
|
"File or directory doesn't exist."))
|
|
(else
|
|
(cgi-make-response (run/port* doit) path req)))))
|
|
|
|
(else
|
|
(make-error-response (status-code method-not-allowed) req request-method))))))
|
|
|
|
|
|
(define (split-and-decode-search-spec s)
|
|
(let recur ((i 0))
|
|
(cond
|
|
((string-index s #\+ i) => (lambda (j) (cons (unescape-uri s i j)
|
|
(recur (+ j 1)))))
|
|
(else (list (unescape-uri s i (string-length s)))))))
|
|
|
|
|
|
;;; Compute the CGI scripts' process environment by adding the standard CGI
|
|
;;; environment var bindings to the current process env -- return result
|
|
;;; as an alist.
|
|
;;;
|
|
;;; You are also supposed to add the headers as env vars in a particular
|
|
;;; format, but are allowed to bag it if the environment var storage
|
|
;;; requirements might overload the OS. I don't know what you can rely upon
|
|
;;; in Unix, so I am just bagging it, period.
|
|
;;;
|
|
;;; Suppose the URL is
|
|
;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c
|
|
;;; then:
|
|
;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar"
|
|
;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/"
|
|
;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script"
|
|
;;; QUERY_STRING -- not decoded "quux%20a+b=c"
|
|
;;; The first three of these vars are *not* encoded, so information is lost
|
|
;;; if the URL's path elements contain encoded /'s (%2F). CGI loses.
|
|
|
|
(define (cgi-env req bin-dir path-suffix request-invariant-cgi-env)
|
|
(let* ((sock (request-socket req))
|
|
(raddr (socket-remote-address sock))
|
|
|
|
(headers (request-headers req))
|
|
|
|
;; Compute the $PATH_INFO and $PATH_TRANSLATED strings.
|
|
(path-info (uri-path-list->path path-suffix)) ; No encode or .. check.
|
|
(path-translated (path-list->file-name path-info bin-dir))
|
|
|
|
;; Compute the $SCRIPT_PATH string.
|
|
(url-path (http-url-path (request-url req)))
|
|
(script-path (take (- (length url-path) (length path-suffix))
|
|
url-path))
|
|
(script-name (uri-path-list->path script-path)))
|
|
|
|
(receive (rhost rport)
|
|
(socket-address->internet-address raddr)
|
|
(receive (lhost lport)
|
|
(socket-address->internet-address (socket-local-address sock))
|
|
|
|
`(("SERVER_PROTOCOL" . ,(version->string (request-version req)))
|
|
("SERVER_PORT" . ,(number->string lport))
|
|
("REQUEST_METHOD" . ,(request-method req))
|
|
|
|
("PATH_INFO" . ,path-info)
|
|
("PATH_TRANSLATED" . ,path-translated)
|
|
("SCRIPT_NAME" . ,script-name)
|
|
|
|
("REMOTE_ADDR" . ,(format-internet-host-address rhost))
|
|
|
|
;; ("AUTH_TYPE" . xx) ; Random authentication
|
|
;; ("REMOTE_USER" . xx) ; features I don't understand.
|
|
;; ("REMOTE_IDENT" . xx)
|
|
|
|
,@request-invariant-cgi-env ; Stuff that never changes (see cgi-handler).
|
|
|
|
,@(cond ((http-url-search (request-url req)) =>
|
|
(lambda (srch) `(("QUERY_STRING" . ,srch))))
|
|
(else '()))
|
|
|
|
,@(cond ((get-header headers 'content-type) =>
|
|
(lambda (ct) `(("CONTENT_TYPE" . ,ct))))
|
|
(else '()))
|
|
|
|
,@(cond ((get-header headers 'content-length) =>
|
|
(lambda (cl) ; Skip initial whitespace (& other non-digits).
|
|
(let ((first-digit (string-index cl char-set:digit))
|
|
(cl-len (string-length cl)))
|
|
(if first-digit
|
|
`(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len)))
|
|
(http-error (status-code bad-request) req
|
|
"Illegal `Content-length:' header.")))))
|
|
|
|
(else '()))
|
|
|
|
. ,(env->alist))))))
|
|
|
|
|
|
(define (take n lis)
|
|
(if (zero? n) '()
|
|
(cons (car lis) (take (- n 1) (cdr lis)))))
|
|
|
|
(define (drop n lis)
|
|
(if (zero? n) lis
|
|
(drop (- n 1) (cdr lis))))
|
|
|
|
|
|
;;; Script's output for request REQ is available on SCRIPT-PORT.
|
|
;;; The script isn't an "nph-" script, so we read the response, and mutate
|
|
;;; it into a real HTTP response, which we then send back to the HTTP client.
|
|
|
|
(define (cgi-make-response script-port path req)
|
|
(let* ((headers (read-rfc822-headers script-port))
|
|
(ctype (get-header headers 'content-type)) ; The script headers
|
|
(loc (get-header headers 'location))
|
|
(stat (extract-status-code-and-text
|
|
(get-header-lines headers 'status) req))
|
|
(extra-headers (delete-headers (delete-headers (delete-headers headers
|
|
'content-type)
|
|
'location)
|
|
'status)))
|
|
|
|
(http-syslog (syslog-level debug) "[cgi-server] headers: ~s~%" headers)
|
|
(http-syslog (syslog-level debug) "[cgi-server] request-method=~a~%"
|
|
(request-method req))
|
|
|
|
(if loc
|
|
(if (uri-has-protocol? (string-trim loc))
|
|
(make-error-response (status-code moved-perm) req
|
|
loc loc)
|
|
(make-redirect-response (string-trim loc)))
|
|
;; Send the response header back to the client
|
|
(make-response ;code message seconds mime extras body
|
|
(car stat) ; code
|
|
(cdr stat) ; text
|
|
(time)
|
|
ctype
|
|
extra-headers
|
|
(make-writer-body
|
|
(lambda (out options)
|
|
(copy-inport->outport script-port out)
|
|
(close-input-port script-port)))))))
|
|
|
|
|
|
(define (uri-has-protocol? loc)
|
|
(receive (proto path search frag)
|
|
(parse-uri loc)
|
|
(if proto #t #f)))
|
|
|
|
(define (extract-status-code-and-text stat-lines req)
|
|
(cond
|
|
((not (pair? stat-lines)) ; No status header.
|
|
(cons (status-code ok) "The idiot CGI script left out the status line."))
|
|
((null? (cdr stat-lines)) ; One line status header.
|
|
(with-fatal-error-handler*
|
|
(lambda (c d)
|
|
(http-error (status-code bad-gateway) req
|
|
"CGI script generated an invalid status header."
|
|
(car stat-lines) c))
|
|
(lambda ()
|
|
(let ((status (string-trim (car stat-lines))))
|
|
(cons (string->number (substring status 0 3)) ; number
|
|
(substring/shared status 4)))))) ; text
|
|
(else ; Vas ist das?
|
|
(http-error (status-code bad-gateway) req
|
|
"CGI script generated multi-line status header.")))) |