2000-09-26 10:35:26 -04:00
|
|
|
;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*-
|
|
|
|
;;; Copyright (c) 1995 by Olin Shivers.
|
|
|
|
|
|
|
|
;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec".
|
|
|
|
|
|
|
|
;;; Imports and non-R4RS'isms
|
|
|
|
;;; "\r" in string for carriage-return.
|
|
|
|
;;; format
|
|
|
|
;;; string hacks
|
|
|
|
;;; URI, URL record structs, parsers, and unparsers
|
|
|
|
;;; write-crlf
|
|
|
|
;;; scsh syscalls
|
|
|
|
;;; ? for COND
|
|
|
|
;;; SWITCH conditional
|
|
|
|
;;; RFC822 header parsing
|
|
|
|
;;; HTTP request record structure
|
|
|
|
;;; HTTP-ERROR & reply codes
|
|
|
|
;;; Basic path handler support (for ncsa-handler)
|
|
|
|
|
|
|
|
;;; PROBLEMS:
|
|
|
|
;;; - The handlers could be made -- closed over their parameters
|
|
|
|
;;; (e.g., root vars, etc.)
|
|
|
|
|
|
|
|
;;; This code provides a path-handler for the HTTP server that implements
|
|
|
|
;;; a CGI interface to external programs for doing HTTP transactions.
|
|
|
|
|
|
|
|
;;; About HTML forms
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; This info is in fact independent of CGI, but important to know about,
|
|
|
|
;;; as many CGI scripts are written for responding to forms-entry in
|
|
|
|
;;; HTML browsers.
|
|
|
|
;;;
|
|
|
|
;;; The form's field data are turned into a single string, of the form
|
|
|
|
;;; name=val&name=val
|
|
|
|
;;; where the <name> and <val> parts are URI encoded to hide their
|
|
|
|
;;; &, =, and + chars, among other things. After URI encoding, the
|
|
|
|
;;; space chars are converted to + chars, just for fun. It is important
|
|
|
|
;;; to encode the spaces this way, because the perfectly general %xx escape
|
|
|
|
;;; mechanism might be insufficiently confusing. This variant encoding is
|
|
|
|
;;; called "form-url encoding."
|
|
|
|
;;;
|
|
|
|
;;; If the form's method is POST,
|
|
|
|
;;; Browser sends the form's field data in the entity block, e.g.,
|
|
|
|
;;; "button=on&ans=yes". The request's Content-type: is application/
|
|
|
|
;;; x-www-form-urlencoded, and the request's Content-length: is the
|
|
|
|
;;; number of bytes in the form data.
|
|
|
|
;;;
|
|
|
|
;;; If the form's method is GET,
|
|
|
|
;;; Browser sends the form's field data in the URL's <search> part.
|
|
|
|
;;; (So the server will pass to the CGI script as $QUERY_STRING,
|
|
|
|
;;; and perhaps also on in argv[]).
|
|
|
|
;;;
|
|
|
|
;;; In either case, the data is "form-url encoded" (as described above).
|
|
|
|
|
|
|
|
;;; ISINDEX queries:
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; (Likewise for ISINDEX URL queries from browsers.)
|
|
|
|
;;; Browser url-form encodes the query (see above), which then becomes the
|
|
|
|
;;; ?<search> part of the URI. (Hence the CGI script will split the individual
|
|
|
|
;;; fields into argv[].)
|
|
|
|
|
|
|
|
|
|
|
|
;;; CGI interface:
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; - The URL's <search> part is assigned to env var $QUERY_STRING, undecoded.
|
|
|
|
;;; - If it contains no raw "=" chars, it is split at "+" chars. The
|
|
|
|
;;; substrings are URI decoded, and become the elts of argv[].
|
|
|
|
;;; - The CGI script is run with stdin hooked up to the socket. If it's going
|
|
|
|
;;; to read the entity, it should read $CONTENT_LENGTH bytes worth.
|
|
|
|
;;; - A bunch of env vars are set; see below.
|
|
|
|
;;; - If the script begins with "nph-" its output is the entire reply.
|
|
|
|
;;; Otherwise, it replies to the server, we peel off a little header
|
|
|
|
;;; that is used to construct the real header for the reply.
|
2002-04-14 12:57:38 -04:00
|
|
|
;;; See the "spec" for further details. (URL above).
|
2000-09-26 10:35:26 -04:00
|
|
|
;;;
|
|
|
|
;;; The "spec" also talks about PUT, but when I tried this on a dummy script,
|
|
|
|
;;; the NSCA httpd server generated buggy output. So I am only implementing
|
|
|
|
;;; the POST and GET ops; any other op generates a "405 Method not allowed"
|
|
|
|
;;; reply.
|
|
|
|
|
|
|
|
;;; Parameters
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
;;; path for scripts
|
|
|
|
(define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin")
|
|
|
|
|
|
|
|
|
|
|
|
;;; The path handler for CGI scripts. (car path) is the script to run.
|
|
|
|
|
|
|
|
(define (cgi-handler bin-dir)
|
|
|
|
(lambda (path req)
|
|
|
|
(if (pair? path) ; Got to have at least one elt.
|
|
|
|
(let* ((prog (car path))
|
|
|
|
|
|
|
|
(filename (or (dotdot-check bin-dir (list prog))
|
|
|
|
(http-error http-reply/bad-request req
|
|
|
|
(format #f "CGI scripts may not contain \"..\" elements."))))
|
|
|
|
|
2002-04-14 12:57:38 -04:00
|
|
|
(nph? (string-prefix? "nph-" prog)) ; PROG starts with "nph-" ?
|
|
|
|
; why did we had (string-suffix? "-nph" prog) here?
|
2000-09-26 10:35:26 -04:00
|
|
|
|
|
|
|
(search (http-url:search (request:url req))) ; Compute the
|
2001-04-27 12:19:34 -04:00
|
|
|
(argv (if (and search (not (string-index search #\=))) ; argv list.
|
2000-09-26 10:35:26 -04:00
|
|
|
(split-and-decode-search-spec search)
|
|
|
|
'()))
|
|
|
|
|
|
|
|
(env (cgi-env req bin-dir (cdr path)))
|
|
|
|
|
|
|
|
(doit (lambda ()
|
2001-10-24 07:02:46 -04:00
|
|
|
(dup->inport (current-input-port) 0)
|
|
|
|
(dup->outport (current-output-port) 1)
|
|
|
|
(apply exec/env filename env argv))))
|
2000-09-26 10:35:26 -04:00
|
|
|
|
2002-04-14 12:57:38 -04:00
|
|
|
(http-syslog (syslog-level debug) "[cgi-server] search: ~s, argv: ~s~%" search argv)
|
2001-08-20 07:31:03 -04:00
|
|
|
(let ((request-method (request:method req)))
|
|
|
|
(cond
|
|
|
|
((or (string=? request-method "GET")
|
|
|
|
(string=? request-method "POST")) ; Could do others also.
|
|
|
|
(if nph?
|
2001-10-24 07:02:46 -04:00
|
|
|
(let ((stat (wait (fork doit))))
|
|
|
|
(if (not (zero? stat))
|
|
|
|
(http-error http-reply/bad-request req
|
|
|
|
(format #f "Could not execute CGI script ~a."
|
|
|
|
filename))
|
|
|
|
stat))
|
2001-08-20 07:31:03 -04:00
|
|
|
(cgi-send-reply (run/port* doit) req)))
|
|
|
|
|
|
|
|
(else (http-error http-reply/method-not-allowed req)))))
|
2000-09-26 10:35:26 -04:00
|
|
|
|
|
|
|
(http-error http-reply/bad-request req "Empty CGI script"))))
|
|
|
|
|
|
|
|
|
|
|
|
(define (split-and-decode-search-spec s)
|
|
|
|
(let recur ((i 0))
|
2001-08-20 07:31:03 -04:00
|
|
|
(cond
|
|
|
|
((string-index s #\+ i) => (lambda (j) (cons (unescape-uri s i j)
|
2000-09-26 10:35:26 -04:00
|
|
|
(recur (+ j 1)))))
|
2001-08-20 07:31:03 -04:00
|
|
|
(else (list (unescape-uri s i (string-length s)))))))
|
2000-09-26 10:35:26 -04:00
|
|
|
|
|
|
|
|
|
|
|
;;; Compute the CGI scripts' process environment by adding the standard CGI
|
|
|
|
;;; environment var bindings to the current process env -- return result
|
|
|
|
;;; as an alist.
|
|
|
|
;;;
|
|
|
|
;;; You are also supposed to add the headers as env vars in a particular
|
|
|
|
;;; format, but are allowed to bag it if the environment var storage
|
|
|
|
;;; requirements might overload the OS. I don't know what you can rely upon
|
|
|
|
;;; in Unix, so I am just bagging it, period.
|
|
|
|
;;;
|
|
|
|
;;; Suppose the URL is
|
|
|
|
;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c
|
|
|
|
;;; then:
|
|
|
|
;;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar"
|
|
|
|
;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/"
|
|
|
|
;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script"
|
|
|
|
;;; QUERY_STRING -- not decoded "quux%20a+b=c"
|
|
|
|
;;; The first three of these vars are *not* encoded, so information is lost
|
|
|
|
;;; if the URL's path elements contain encoded /'s (%2F). CGI loses.
|
|
|
|
|
|
|
|
(define (cgi-env req bin-dir path-suffix)
|
|
|
|
(let* ((sock (request:socket req))
|
|
|
|
(raddr (socket-remote-address sock))
|
|
|
|
|
|
|
|
(headers (request:headers req))
|
|
|
|
|
|
|
|
;; Compute the $PATH_INFO and $PATH_TRANSLATED strings.
|
|
|
|
(path-info (uri-path-list->path path-suffix)) ; No encode or .. check.
|
|
|
|
(path-translated (path-list->file-name path-info bin-dir))
|
|
|
|
|
|
|
|
;; Compute the $SCRIPT_PATH string.
|
|
|
|
(url-path (http-url:path (request:url req)))
|
|
|
|
(script-path (take (- (length url-path) (length path-suffix))
|
|
|
|
url-path))
|
|
|
|
(script-name (uri-path-list->path script-path)))
|
|
|
|
|
|
|
|
(if (not request-invariant-cgi-env)
|
|
|
|
(initialise-request-invariant-cgi-env))
|
|
|
|
|
|
|
|
(receive (rhost rport)
|
|
|
|
(socket-address->internet-address raddr)
|
|
|
|
(receive (lhost lport)
|
|
|
|
(socket-address->internet-address (socket-local-address sock))
|
|
|
|
|
|
|
|
`(("SERVER_PROTOCOL" . ,(version->string (request:version req)))
|
|
|
|
("SERVER_PORT" . ,(number->string lport))
|
|
|
|
("REQUEST_METHOD" . ,(request:method req))
|
|
|
|
|
|
|
|
("PATH_INFO" . ,path-info)
|
|
|
|
("PATH_TRANSLATED" . ,path-translated)
|
|
|
|
("SCRIPT_NAME" . ,script-name)
|
|
|
|
|
2002-01-08 09:02:39 -05:00
|
|
|
("REMOTE_HOST" . ,(host-name-or-ip raddr))
|
2001-09-12 14:53:50 -04:00
|
|
|
("REMOTE_ADDR" . ,(format-internet-host-address rhost))
|
2000-09-26 10:35:26 -04:00
|
|
|
|
|
|
|
;; ("AUTH_TYPE" . xx) ; Random authentication
|
|
|
|
;; ("REMOTE_USER" . xx) ; features I don't understand.
|
|
|
|
;; ("REMOTE_IDENT" . xx)
|
|
|
|
|
|
|
|
,@request-invariant-cgi-env ; Stuff that never changes (see below).
|
|
|
|
|
2001-08-20 07:31:03 -04:00
|
|
|
,@(cond ((http-url:search (request:url req)) =>
|
|
|
|
(lambda (srch) `(("QUERY_STRING" . ,srch))))
|
|
|
|
(else '()))
|
|
|
|
|
|
|
|
,@(cond ((get-header headers 'content-type) =>
|
|
|
|
(lambda (ct) `(("CONTENT_TYPE" . ,ct))))
|
|
|
|
(else '()))
|
|
|
|
|
|
|
|
,@(cond ((get-header headers 'content-length) =>
|
|
|
|
(lambda (cl) ; Skip initial whitespace (& other non-digits).
|
2002-04-21 14:55:18 -04:00
|
|
|
(let ((first-digit (string-index cl char-set:numeric))
|
2001-08-20 07:31:03 -04:00
|
|
|
(cl-len (string-length cl)))
|
|
|
|
(if first-digit
|
|
|
|
`(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len)))
|
|
|
|
(http-error http-reply/bad-request
|
|
|
|
req
|
|
|
|
"Illegal Content-length: header.")))))
|
|
|
|
|
|
|
|
(else '()))
|
|
|
|
|
2000-09-26 10:35:26 -04:00
|
|
|
. ,(env->alist))))))
|
|
|
|
|
|
|
|
(define request-invariant-cgi-env #f)
|
|
|
|
(define (initialise-request-invariant-cgi-env)
|
|
|
|
(set! request-invariant-cgi-env
|
|
|
|
`(("PATH" . ,(and (getenv "PATH") cgi-default-bin-path))
|
|
|
|
("SERVER_SOFTWARE" . ,server/version)
|
|
|
|
("SERVER_NAME" . ,(host-info:name (host-info (system-name))))
|
|
|
|
("GATEWAY_INTERFACE" . "CGI/1.1"))))
|
|
|
|
|
|
|
|
|
|
|
|
(define (take n lis)
|
|
|
|
(if (zero? n) '()
|
|
|
|
(cons (car lis) (take (- n 1) (cdr lis)))))
|
|
|
|
|
|
|
|
(define (drop n lis)
|
|
|
|
(if (zero? n) lis
|
|
|
|
(drop (- n 1) (cdr lis))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; Script's output for request REQ is available on SCRIPT-PORT.
|
|
|
|
;;; The script isn't an "nph-" script, so we read the reply, and mutate
|
|
|
|
;;; it into a real HTTP reply, which we then send back to the HTTP client.
|
|
|
|
|
|
|
|
(define (cgi-send-reply script-port req)
|
|
|
|
(let* ((headers (read-rfc822-headers script-port))
|
|
|
|
(ctype (get-header headers 'content-type)) ; The script headers
|
|
|
|
(loc (get-header headers 'location))
|
|
|
|
(stat (let ((stat-lines (get-header-lines headers 'status)))
|
2001-08-20 07:31:03 -04:00
|
|
|
(cond
|
|
|
|
((not (pair? stat-lines)) ; No status header.
|
|
|
|
"200 The idiot CGI script left out the status line.")
|
|
|
|
((null? (cdr stat-lines)) ; One line status header.
|
|
|
|
(car stat-lines))
|
|
|
|
(else ; Vas ist das?
|
|
|
|
(http-error http-reply/internal-error req
|
|
|
|
"CGI script generated multi-line status header")))))
|
2000-09-26 10:35:26 -04:00
|
|
|
(out (current-output-port)))
|
2001-08-20 07:31:03 -04:00
|
|
|
|
2002-04-14 12:57:38 -04:00
|
|
|
(http-syslog (syslog-level debug) "[cgi-server] headers: ~s~%" headers)
|
2000-09-26 10:35:26 -04:00
|
|
|
;; Send the reply header back to the client
|
|
|
|
;; (unless it's a headerless HTTP 0.9 reply).
|
2001-08-20 07:31:03 -04:00
|
|
|
(if (not (v0.9-request? req))
|
|
|
|
(begin
|
|
|
|
(format out "HTTP/1.0 ~a\r~%" stat)
|
|
|
|
(if ctype (format out "Content-type: ~a\r~%" ctype))
|
|
|
|
(if loc (format out "Location: ~a\r~%" loc))
|
|
|
|
(write-crlf out)))
|
2000-09-26 10:35:26 -04:00
|
|
|
|
2002-04-14 12:57:38 -04:00
|
|
|
(http-syslog (syslog-level debug) "[cgi-server] request:method=~a~%" (request:method req))
|
2000-09-26 10:35:26 -04:00
|
|
|
;; Copy the reply body back to the client and close the script port
|
|
|
|
;; (unless it's a bodiless HEAD transaction).
|
2001-08-20 07:31:03 -04:00
|
|
|
(if (not (string=? (request:method req) "HEAD"))
|
|
|
|
(begin
|
|
|
|
(copy-inport->outport script-port out)
|
|
|
|
(close-input-port script-port)))))
|
2000-09-26 10:35:26 -04:00
|
|
|
|
|
|
|
|