sunet/cgi-server.scm

280 lines
11 KiB
Scheme
Raw Permalink Normal View History

2000-09-26 10:35:26 -04:00
;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*-
;;; Copyright (c) 1995 by Olin Shivers.
;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec".
;;; Imports and non-R4RS'isms
;;; "\r" in string for carriage-return.
;;; format
;;; string hacks
;;; URI, URL record structs, parsers, and unparsers
;;; write-crlf
;;; scsh syscalls
;;; ? for COND
;;; SWITCH conditional
;;; RFC822 header parsing
;;; HTTP request record structure
;;; HTTP-ERROR & reply codes
;;; Basic path handler support (for ncsa-handler)
;;; PROBLEMS:
;;; - The handlers could be made -- closed over their parameters
;;; (e.g., root vars, etc.)
;;; This code provides a path-handler for the HTTP server that implements
;;; a CGI interface to external programs for doing HTTP transactions.
;;; About HTML forms
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This info is in fact independent of CGI, but important to know about,
;;; as many CGI scripts are written for responding to forms-entry in
;;; HTML browsers.
;;;
;;; The form's field data are turned into a single string, of the form
;;; name=val&name=val
;;; where the <name> and <val> parts are URI encoded to hide their
;;; &, =, and + chars, among other things. After URI encoding, the
;;; space chars are converted to + chars, just for fun. It is important
;;; to encode the spaces this way, because the perfectly general %xx escape
;;; mechanism might be insufficiently confusing. This variant encoding is
;;; called "form-url encoding."
;;;
;;; If the form's method is POST,
;;; Browser sends the form's field data in the entity block, e.g.,
;;; "button=on&ans=yes". The request's Content-type: is application/
;;; x-www-form-urlencoded, and the request's Content-length: is the
;;; number of bytes in the form data.
;;;
;;; If the form's method is GET,
;;; Browser sends the form's field data in the URL's <search> part.
;;; (So the server will pass to the CGI script as $QUERY_STRING,
;;; and perhaps also on in argv[]).
;;;
;;; In either case, the data is "form-url encoded" (as described above).
;;; ISINDEX queries:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (Likewise for ISINDEX URL queries from browsers.)
;;; Browser url-form encodes the query (see above), which then becomes the
;;; ?<search> part of the URI. (Hence the CGI script will split the individual
;;; fields into argv[].)
;;; CGI interface:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - The URL's <search> part is assigned to env var $QUERY_STRING, undecoded.
;;; - If it contains no raw "=" chars, it is split at "+" chars. The
;;; substrings are URI decoded, and become the elts of argv[].
;;; - The CGI script is run with stdin hooked up to the socket. If it's going
;;; to read the entity, it should read $CONTENT_LENGTH bytes worth.
;;; - A bunch of env vars are set; see below.
;;; - If the script begins with "nph-" its output is the entire reply.
;;; Otherwise, it replies to the server, we peel off a little header
;;; that is used to construct the real header for the reply.
;;; See the "spec" for further details.
;;;
;;; The "spec" also talks about PUT, but when I tried this on a dummy script,
;;; the NSCA httpd server generated buggy output. So I am only implementing
;;; the POST and GET ops; any other op generates a "405 Method not allowed"
;;; reply.
;;; Parameters
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; path for scripts
(define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin")
;;; The path handler for CGI scripts. (car path) is the script to run.
(define (cgi-handler bin-dir)
(lambda (path req)
(if (pair? path) ; Got to have at least one elt.
(let* ((prog (car path))
(filename (or (dotdot-check bin-dir (list prog))
(http-error http-reply/bad-request req
(format #f "CGI scripts may not contain \"..\" elements."))))
(nph? (string-suffix? "-nph" prog)) ; PROG end in "-nph" ?
(search (http-url:search (request:url req))) ; Compute the
(argv (if (and search (not (index search #\=))) ; argv list.
(split-and-decode-search-spec search)
'()))
(env (cgi-env req bin-dir (cdr path)))
(doit (lambda ()
(apply exec/env filename env argv)
(http-error http-reply/bad-request req
(format #f "Could not execute CGI script ~a."
filename)))))
(http-log "search: ~s, argv: ~s~%" search argv)
(switch string=? (request:method req)
(("GET" "POST") ; Could do others also.
(if nph?
(wait (fork doit))
(cgi-send-reply (run/port* doit) req)))
(else (http-error http-reply/method-not-allowed req))))
(http-error http-reply/bad-request req "Empty CGI script"))))
(define (split-and-decode-search-spec s)
(let recur ((i 0))
(? ((index s #\+ i) => (lambda (j) (cons (unescape-uri s i j)
(recur (+ j 1)))))
(else (list (unescape-uri s i (string-length s)))))))
;;; Compute the CGI scripts' process environment by adding the standard CGI
;;; environment var bindings to the current process env -- return result
;;; as an alist.
;;;
;;; You are also supposed to add the headers as env vars in a particular
;;; format, but are allowed to bag it if the environment var storage
;;; requirements might overload the OS. I don't know what you can rely upon
;;; in Unix, so I am just bagging it, period.
;;;
;;; Suppose the URL is
;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c
;;; then:
;;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar"
;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/"
;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script"
;;; QUERY_STRING -- not decoded "quux%20a+b=c"
;;; The first three of these vars are *not* encoded, so information is lost
;;; if the URL's path elements contain encoded /'s (%2F). CGI loses.
(define (cgi-env req bin-dir path-suffix)
(let* ((sock (request:socket req))
(raddr (socket-remote-address sock))
(headers (request:headers req))
;; Compute the $PATH_INFO and $PATH_TRANSLATED strings.
(path-info (uri-path-list->path path-suffix)) ; No encode or .. check.
(path-translated (path-list->file-name path-info bin-dir))
;; Compute the $SCRIPT_PATH string.
(url-path (http-url:path (request:url req)))
(script-path (take (- (length url-path) (length path-suffix))
url-path))
(script-name (uri-path-list->path script-path)))
(if (not request-invariant-cgi-env)
(initialise-request-invariant-cgi-env))
(receive (rhost rport)
(socket-address->internet-address raddr)
(receive (lhost lport)
(socket-address->internet-address (socket-local-address sock))
`(("SERVER_PROTOCOL" . ,(version->string (request:version req)))
("SERVER_PORT" . ,(number->string lport))
("REQUEST_METHOD" . ,(request:method req))
("PATH_INFO" . ,path-info)
("PATH_TRANSLATED" . ,path-translated)
("SCRIPT_NAME" . ,script-name)
("REMOTE_HOST" . ,(host-info:name (host-info raddr)))
("REMOTE_ADDR" . ,(internet-address->dotted-string rhost))
;; ("AUTH_TYPE" . xx) ; Random authentication
;; ("REMOTE_USER" . xx) ; features I don't understand.
;; ("REMOTE_IDENT" . xx)
,@request-invariant-cgi-env ; Stuff that never changes (see below).
,@(? ((http-url:search (request:url req)) =>
(lambda (srch) `(("QUERY_STRING" . ,srch))))
(else '()))
,@(? ((get-header headers 'content-type) =>
(lambda (ct) `(("CONTENT_TYPE" . ,ct))))
(else '()))
,@(? ((get-header headers 'content-length) =>
(lambda (cl) ; Skip initial whitespace (& other non-digits).
(let ((first-digit (char-set-index cl char-set:numeric))
(cl-len (string-length cl)))
(if first-digit
`(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len)))
(http-error http-reply/bad-request
req
"Illegal Content-length: header.")))))
(else '()))
. ,(env->alist))))))
(define request-invariant-cgi-env #f)
(define (initialise-request-invariant-cgi-env)
(set! request-invariant-cgi-env
`(("PATH" . ,(and (getenv "PATH") cgi-default-bin-path))
("SERVER_SOFTWARE" . ,server/version)
("SERVER_NAME" . ,(host-info:name (host-info (system-name))))
("GATEWAY_INTERFACE" . "CGI/1.1"))))
(define (take n lis)
(if (zero? n) '()
(cons (car lis) (take (- n 1) (cdr lis)))))
(define (drop n lis)
(if (zero? n) lis
(drop (- n 1) (cdr lis))))
;;; Script's output for request REQ is available on SCRIPT-PORT.
;;; The script isn't an "nph-" script, so we read the reply, and mutate
;;; it into a real HTTP reply, which we then send back to the HTTP client.
(define (cgi-send-reply script-port req)
(let* ((headers (read-rfc822-headers script-port))
(ctype (get-header headers 'content-type)) ; The script headers
(loc (get-header headers 'location))
(stat (let ((stat-lines (get-header-lines headers 'status)))
(? ((not (pair? stat-lines)) ; No status header.
"200 The idiot CGI script left out the status line.")
((null? (cdr stat-lines)) ; One line status header.
(car stat-lines))
(else ; Vas ist das?
(http-error http-reply/internal-error req
"CGI script generated multi-line status header")))))
(out (current-output-port)))
(http-log "headers: ~s~%" headers)
;; Send the reply header back to the client
;; (unless it's a headerless HTTP 0.9 reply).
(unless (v0.9-request? req)
(format out "HTTP/1.0 ~a\r~%" stat)
(if ctype (format out "Content-type: ~a\r~%" ctype))
(if loc (format out "Location: ~a\r~%" loc))
(write-crlf out))
(http-log "request:method=~a~%" (request:method req))
;; Copy the reply body back to the client and close the script port
;; (unless it's a bodiless HEAD transaction).
(unless (string=? (request:method req) "HEAD")
(copy-inport->outport script-port out)
(close-input-port script-port))))
;;; This proc and its inverse should be in a general IP module.
(define (internet-address->dotted-string num32)
(let* ((num24 (arithmetic-shift num32 -8))
(num16 (arithmetic-shift num24 -8))
(num08 (arithmetic-shift num16 -8))
(byte0 (bitwise-and #b11111111 num08))
(byte1 (bitwise-and #b11111111 num16))
(byte2 (bitwise-and #b11111111 num24))
(byte3 (bitwise-and #b11111111 num32)))
(string-append (number->string byte0) "." (number->string byte1) "."
(number->string byte2) "." (number->string byte3))))