;;; http server in the Scheme Shell -*- Scheme -*- ;;; Olin Shivers ;;; Copyright (c) 1994 by Brian D. Carlstrom and Olin Shivers. ;;; Problems: ;;; Need to html-quote URI's when printing them out to HTML text. ;;; This file implements the core of an HTTP server: code to establish ;;; net connections, read and parse requests, and handler errors. ;;; It does not have the code to actually handle requests. That's up ;;; to other modules, and could vary from server to server. To build ;;; a complete server, you need to define path handlers (see below) -- ;;; they determine how requests are to be handled. ;;; ;;; A draft document detailing the HTTP 1.0 protocol can be found at ;;; http://www.w3.org/hypertext/WWW/Protocols/HTTP1.0/ ;;; draft-ietf-http-spec.html ;;; Imports and non-R4RS'isms ;;; \r \n in strings for cr and lf. ;;; let-optionals (let-opt) ;;; receive values (MV return) ;;; scsh system calls ;;; rfc822 header parsing ;;; crlf-io (read cr/lf terminated lines) ;;; when, unless, switch, ? (conditionals) ;;; uri, url packages ;;; defrec package (record structures) ;;; defenum (enumerated types) ;;; ignore-errors (HANDLE package) ;;; string hacking stuff ;;; char-set stuff ;;; format (Formatted output) ;;; httpd error stuff ;;; condition-stuff (S48 error conditions) ;;; Constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define server/version "Scheme-Underground/1.0") (define server/protocol "HTTP/1.0") ;;; Configurable Variables ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Address for config error reports (define server/admin "sperber@informatik.uni-tuebingen.de") (define (set-server/admin! s) (set! server/admin s)) (define *http-log?* #t) (define *http-log-port* (open-output-file "/tmp/bla")) (define (http-log fmt . args) (if *http-log?* (begin (apply format *http-log-port* fmt args) (force-output *http-log-port*) ))) ;;; (httpd path-handler [port server-root-dir]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; The server top-level. PATH-HANDLER is the top-level request path handler -- ;;; the procedure that actually deals with the request. PORT defaults to 80. ;;; SERVER-ROOT-DIR is the server's working directory; it defaults to ;;; /usr/local/etc/httpd (define (httpd path-handler . args) (let-optionals args ((port 80) (root-dir "/usr/local/etc/httpd")) (with-cwd root-dir (bind-listen-accept-loop protocol-family/internet ;; Why is the output socket unbuffered? So that if the client ;; closes the connection, we won't lose when we try to close the ;; socket by trying to flush the output buffer. (lambda (sock addr) ; Called once for every connection. (set-port-buffering (socket:outport sock) 'bufpol/none) ; No buffering (spawn (lambda () ; Kill this line to bag forking. ; Should propagate. ecch. (with-current-input-port (socket:inport sock) ; bind the (with-current-output-port (socket:outport sock) ; stdio ports, & (set-port-buffering (current-input-port) 'bufpol/none) (process-toplevel-request path-handler sock) (close-socket sock))) ; do it. ))) port)))) ;;; Top-level http request processor ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Read, parse, and handle a single http request. The only thing that makes ;;; this complicated is handling errors -- as a server, we can't just let the ;;; standard error handlers toss us into a breakpoint. We have to catch the ;;; error, send an error reply back to the client if we can, and then keep ;;; on trucking. This means using the S48's condition system to catch and ;;; handle the various errors, which introduces a major point of R4RS ;;; incompatibiliy -- R4RS has no exception system. So if you were to port ;;; this code to some other Scheme, you'd really have to sit down and think ;;; about this issue for a minute. (define (process-toplevel-request handler sock) ;; This top-level error-handler catches *all* uncaught errors and warnings. ;; If the error condition is a reportable HTTP error, we send a reply back ;; to the client. In any event, we abort the transaction, and return from ;; PROCESS-TOPLEVEL-REQUEST. ;; ;; We *oughta* map non-http-errors into replies anyway. (with-fatal-error-handler (lambda (c decline) ; No call to decline (http-log "Error! ~s~%" c) (if (http-error? c) ; -- we handle all. (apply send-http-error-reply (condition-stuff c)))) (let ((req (with-fatal-error-handler ; Map syntax errors (lambda (c decline) ; to http errors. (if (fatal-syntax-error? c) (apply http-error http-reply/bad-request #f ; No request yet. "Request parsing error -- report to client maintainer." (condition-stuff c)) (decline))) ; Actual work: (parse-http-request sock)))) ; (1) Parse request. (handler (http-url:path (request:url req)) req)))) ; (2) Deal with it. ;;;; HTTP request parsing ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; This code defines the http REQUEST data structure, and provides ;;;; code to read requests from an input port. (define-record request method ; A string such as "GET", "PUT", etc. uri ; The escaped URI string as read from request line. url ; An http URL record (see url.scm). version ; A (major . minor) integer pair. headers ; An rfc822 header alist (see rfc822.scm). socket) ; The socket connected to the client. (define-record-discloser type/request (lambda (req) (list 'request (request:method req) (request:uri req) (request:url req) (request:version req) (request:headers req) (request:socket req)))) ;;; A http protocol version is an integer pair: (major . minor). (define (version< v1 v2) (or (< (car v1) (car v2)) (and (= (car v1) (car v2)) (< (cdr v1) (cdr v2))))) (define (version<= v1 v2) (not (version< v2 v1))) (define (v0.9-request? req) (version<= (request:version req) '(0 . 9))) (define (version->string v) (string-append "HTTP/" (number->string (car v)) "." (number->string (cdr v)))) ;;; Read and parse an http request from INPORT. ;;; ;;; Note: this parser parses the URI into an http URL record. If the URI ;;; isn't an http URL, the parser fails. This may not be right. There's ;;; nothing in the http protocol to prevent you from passing a non-http ;;; URI -- what this would mean, however, is not clear. Like so much of ;;; the Web, the protocols are redundant, underconstrained, and ill-specified. (define (parse-http-request sock) (let ((line (read-crlf-line))) ;; Blat out some logging info. (if *http-log?* (let* ((addr (socket-remote-address sock)) (host (host-info:name (host-info addr)))) (http-log "~a: ~a~%" host line))) (if (eof-object? line) (fatal-syntax-error "EOF while parsing request.") (let* ((elts (string->words line)) ; Split at white-space. (version (switch = (length elts) ((2) '(0 . 9)) ((3) (parse-http-version (caddr elts))) (else (fatal-syntax-error "Bad HTTP version."))))) (let* ((meth (car elts)) (uri-string (cadr elts)) (url (parse-http-servers-url-fragment uri-string sock)) (headers (if (equal? version '(0 . 9)) '() (read-rfc822-headers)))) (make-request meth uri-string url version headers sock)))))) ;;; Parse the URL, but if it begins without the "http://host:port" prefix, ;;; interpolate one from SOCKET. It would sleazier but faster if we just ;;; computed the default host and port at server-startup time, instead of ;;; on every request. (define (parse-http-servers-url-fragment uri-string socket) (receive (scheme path search frag-id) (parse-uri uri-string) (if frag-id ; Can't have a #frag part. (fatal-syntax-error "HTTP URL contains illegal # suffix." uri-string) (if scheme (if (string-ci=? scheme "http") ; Better be an http url. (parse-http-url path search #f) (fatal-syntax-error "Non-HTTP URL" uri-string)) ;; Interpolate the userhost struct from our net connection. (if (and (pair? path) (string=? (car path) "")) (let* ((addr (socket-local-address socket)) (local-name (my-fqdn addr)) (portnum (my-port addr))) (make-http-url (make-userhost #f #f local-name (number->string portnum)) (map unescape-uri (cdr path)) ;Skip initial /. search #f)) (fatal-syntax-error "Path fragment must begin with slash" uri-string)))))) (define parse-http-version (let ((re (make-regexp "^HTTP/([0-9]+)\\.([0-9]+)$")) (lose (lambda (s) (fatal-syntax-error "Bad HTTP version" s)))) (lambda (vstring) (let ((m (regexp-exec re vstring))) (if m (cons (or (string->number (match:substring m 1) 10) (lose vstring)) (or (string->number (match:substring m 2) 10) (lose vstring))) (lose vstring)))))) ;;; Split string into a list of whitespace-separated strings. ;;; This could have been trivially defined in scsh as (field-splitter " \t\n") ;;; but I hand-coded it because it's short, and I didn't want invoke the ;;; regexp machinery for something so simple. (define non-whitespace (char-set-invert char-set:whitespace)) (define (string->words s) (let recur ((start 0)) (cond ((char-set-index s non-whitespace start) => (lambda (start) (cond ((char-set-index s char-set:whitespace start) => (lambda (end) (cons (substring s start end) (recur end)))) (else (list (substring s start (string-length s))))))) (else '())))) ;;;; Sending replies ;;;;;;;;;;;;;;;;;;;; ;;; Reply codes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; (define http-reply/ok 200), etc. ;;; Also, build an alist HTTP-REPLY-TEXT-TABLE mapping integer reply codes ;;; to their diagnostic text messages. (define-syntax define-http-reply-codes (syntax-rules () ((define-http-reply-codes table set (name val msg) ...) (begin (define table '((val . msg) ...)) (define-enum-constant set name val) ...)))) (define-http-reply-codes http-reply-text-table http-reply (ok 200 "OK") (created 201 "Created") (accepted 202 "Accepted") (prov-info 203 "Provisional Information") (no-content 204 "No Content") (mult-choice 300 "Multiple Choices") (moved-perm 301 "Moved Permanently") (moved-temp 302 "Moved Temporarily") (method 303 "Method (obsolete)") (not-mod 304 "Not Modified") (bad-request 400 "Bad Request") (unauthorized 401 "Unauthorized") (payment-req 402 "Payment Required") (forbidden 403 "Forbidden") (not-found 404 "Not Found") (method-not-allowed 405 "Method Not Allowed") (none-acceptable 406 "None Acceptable") (proxy-auth-required 407 "Proxy Authentication Required") (timeout 408 "Request Timeout") (conflict 409 "Conflict") (gone 410 "Gone") (internal-error 500 "Internal Server Error") (not-implemented 501 "Not Implemented") (bad-gateway 502 "Bad Gateway") (service-unavailable 503 "Service Unavailable") (gateway-timeout 504 "Gateway Timeout")) (define (reply-code->text code) (cdr (assv code http-reply-text-table))) ;;; Text generation utilities. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (time->http-date-string time) (format-date "~A, ~d-~b-~y ~H:~M:~S GMT" (date time 0))) ;;; Output the first chunk of a reply header. (define (begin-http-header out reply-code) (format out "~A ~d ~A\r~%" server/protocol reply-code (reply-code->text reply-code)) (format out "Date: ~A\r~%" (time->http-date-string (time))) (format out "Server: ~A\r~%" server/version)) (define (title-html out message new-protocol?) (if new-protocol? (write-crlf out)) ; Separate html from headers. (format out "~%~%~A~%~%~%~%" message) (format out "~%

~A

~%" message)) ;;; (send-http-error-reply reply-code req [message . extras]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Take an http-error condition, and format it into a reply to the client. ;;; ;;; As a special case, request REQ is allowed to be #f, meaning we haven't ;;; even had a chance to parse and construct the request. This is only used ;;; for 400 BAD-REQUEST error report, and we make minimal assumptions in this ;;; case (0.9 protocol for the reply, for example). I might be better off ;;; writing a special-case procedure for that case... ;;; SEND-HTTP-ERROR-REPLY is called from error handlers, so to avoid ;;; infinite looping, if an error occurs while it is running, we just ;;; silently return. (We no longer need to do this; I have changed ;;; WITH-FATAL-ERROR-HANDLER* so that this is not necessary, but I'll ;;; leave it in to play it safe.) (define (send-http-error-reply reply-code req . args) (ignore-errors (lambda () ; Ignore errors -- see note above. (apply really-send-http-error-reply reply-code req args)))) (define (really-send-http-error-reply reply-code req . args) (http-log "sending error-reply ~a ~%" reply-code) (let* ((message (if (pair? args) (car args))) (extras (if (pair? args) (cdr args) '())) (new-protocol? (and req (not (v0.9-request? req)))) ; 1.0 or better? ;; Is it OK to send back an HTML body explaining things? (html-ok? (or (not req) (not (string=? (request:method req) "HEAD")))) (out (current-output-port)) (generic-title (lambda () (title-html out (reply-code->text reply-code) new-protocol?))) (do-msg (lambda () (cond (message (display message out) (newline out)))))) (if new-protocol? (begin-http-header out reply-code)) ;; Don't output the blank line, as individual clauses might ;; want to add more headers. (if html-ok? (write-string "Content-type: text/html\r\n" out)) ;; If html-ok?, we must send back some html, with the tag unclosed. (switch = reply-code ;; This error reply requires two args: message is the new URI: field, ;; and the first EXTRA is the older Location: field. ((http-reply/moved-temp http-reply/moved-perm) (when new-protocol? (format out "URI: ~A\r~%" message) (format out "Location: ~A\r~%" (car extras))) (when html-ok? (title-html out "Document moved" new-protocol?) (format out "This document has ~A moved to a new location.~%" (if (= reply-code http-reply/moved-temp) "temporarily" "permanently") message))) ((http-reply/bad-request) (when html-ok? (generic-title) (write-string "

Client sent a query that this server could not understand.\n" out) (if message (format out "
~%Reason: ~A~%" message)))) ((http-reply/unauthorized) (if new-protocol? (format out "WWW-Authenticate: ~A\r~%\r~%" message)) ; Vas is das? (when html-ok? (title-html out "Authorization Required" new-protocol?) (write-string "

Browser not authentication-capable or\n" out) (write-string "authentication failed.\n" out) (if message (format out "~a~%" message)))) ((http-reply/forbidden) (unless html-ok? (title-html out "Request not allowed." new-protocol?) (format out "Your client does not have permission to perform a ~A~%" (request:method req)) (format out "operation on url ~a.~%" (request:uri req)) (if message (format out "

~%~a~%" message)))) ((http-reply/not-found) (when html-ok? (title-html out "URL not found" new-protocol?) (write-string "

The requested URL was not found on this server.\n" out) (if message (format out "

~%~a~%" message)))) ((http-reply/internal-error) (format (current-error-port) "ERROR: ~A~%" message) (when html-ok? (generic-title) (format out "The server encountered an internal error or misconfiguration and was unable to complete your request.

Please inform the server administrator, ~A, of the circumstances leading to the error, and time it occured.~%" server/admin) (if message (format out "

~%~a~%" message)))) ((http-reply/not-implemented) (when html-ok? (generic-title) (format out "This server does not currently implement the requested method (~A).~%" (request:method req)) (if message (format out "

~a~%" message)))) (else (if html-ok? (generic-title)))) (cond (html-ok? ;; Output extra stuff and close the tag. (for-each (lambda (x) (format out "
~s~%" x)) extras) (write-string "\n" out))) ; (force-output out) ;;; TODO check this ; (flush-all-ports) (force-output out) ; (if bkp? (breakpoint "http error")) )) ;;; Return my Internet host name (my fully-qualified domain name). ;;; This works only if an actual resolver is behind host-info. ;;; ;;; On systems that do DNS via NIS/Yellow Pages, you only get an ;;; unqualified hostname. Also, in case of aliased names, you just ;;; might get the wrong one. Furthermore, you may get screwed in the ;;; presence of a server accelerator such as Squid. ;;; ;;; In these cases, and on NeXTSTEP, you'll have to set it by hand. (define (my-fqdn addr) (or *my-fqdn* (host-info:name (host-info addr)))) (define *my-fqdn* #f) (define (set-my-fqdn! fqdn) (set! *my-fqdn* fqdn)) (define (my-port addr) (or *my-port* (receive (ip-addr portnum) (socket-address->internet-address addr) portnum))) (define *my-port* #f) (define (set-my-port! portnum) (set! *my-port* portnum))