;;; http server in the Scheme Shell -*- Scheme -*- ;;; Olin Shivers ;;; Copyright (c) 1994 by Brian D. Carlstrom and Olin Shivers. ;;; This file implements the core of an HTTP server: code to establish ;;; net connections, read and parse requests, and handler errors. ;;; It does not have the code to actually handle requests. That's up ;;; to other modules, and could vary from server to server. To build ;;; a complete server, you need to define path handlers (see below) -- ;;; they determine how requests are to be handled. ;;; ;;; The RFC detailing the HTTP 1.0 protocol, RFC 1945, can be found at ;;; http://www.w3.org/Protocols/rfc1945/rfc1945 ;;; Imports and non-R4RS'isms ;;; \r \n in strings for cr and lf. ;;; receive values (MV return) ;;; scsh system calls ;;; rfc822 header parsing ;;; crlf-io (read cr/lf terminated lines) ;;; uri, url packages ;;; ignore-errors (HANDLE package) ;;; char-set stuff ;;; format (Formatted output) ;;; httpd error stuff ;;; condition-stuff (S48 error conditions) ;;; (httpd options) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; The server top-level. PATH-HANDLER is the top-level request path handler -- ;;; the procedure that actually deals with the request. (define (httpd options) (let ((port (httpd-options-port options)) (root-dir (httpd-options-root-directory options)) (rate-limiter (cond ((httpd-options-simultaneous-requests options) => make-rate-limiter) (else #f)))) (init-http-log! options) (with-syslog-destination "httpd" #f #f #f (lambda () (with-cwd root-dir (bind-listen-accept-loop protocol-family/internet ;; Why is the output socket unbuffered? So that if the client ;; closes the connection, we won't lose when we try to close the ;; socket by trying to flush the output buffer. (lambda (sock addr) (if rate-limiter (begin (rate-limit-block rate-limiter) (rate-limit-open rate-limiter))) (with-fatal-error-handler (lambda (c decline) (http-syslog (syslog-level notice) "error during connection negotiation~%") (if rate-limiter (rate-limit-close rate-limiter))) (call-with-values (lambda () (socket-address->internet-address (socket-remote-address sock))) (lambda (host-address service-port) (if (and rate-limiter *http-syslog?*) (http-syslog (syslog-level info) "<~a>~a: concurrent request #~a~%" (pid) (format-internet-host-address host-address) (rate-limiter-current-requests rate-limiter))) (set-port-buffering (socket:outport sock) bufpol/none) ; No buffering (fork-thread (lambda () (set-port-buffering (current-input-port) bufpol/none) (process-toplevel-request sock host-address options) (if *http-syslog?* (http-syslog (syslog-level debug) "<~a>~a [closing]~%" (pid) (format-internet-host-address host-address))) (with-fatal-error-handler (lambda (c decline) (if *http-syslog?* (http-syslog (syslog-level notice) "<~a>~a [error closing (~a)]~%" (pid) (format-internet-host-address host-address) c))) (close-socket sock)) (if rate-limiter (rate-limit-close rate-limiter)) (if *http-syslog?* (http-syslog (syslog-level info) "<~a>~a [closed]~%" (pid) (format-internet-host-address host-address))))))))) port)))))) ;;; Top-level http request processor ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Read, parse, and handle a single http request. The only thing that makes ;;; this complicated is handling errors -- as a server, we can't just let the ;;; standard error handlers toss us into a breakpoint. We have to catch the ;;; error, send an error reply back to the client if we can, and then keep ;;; on trucking. This means using the S48's condition system to catch and ;;; handle the various errors, which introduces a major point of R4RS ;;; incompatibiliy -- R4RS has no exception system. So if you were to port ;;; this code to some other Scheme, you'd really have to sit down and think ;;; about this issue for a minute. (define (process-toplevel-request sock host-address options) ;; This top-level error-handler catches *all* uncaught errors and warnings. ;; If the error condition is a reportable HTTP error, we send a reply back ;; to the client. In any event, we abort the transaction, and return from ;; PROCESS-TOPLEVEL-REQUEST. ;; ;; We *oughta* map non-http-errors into replies anyway. (with-fatal-error-handler* (lambda (c decline) (http-syslog (syslog-level notice) "<~a>~a: error: ~s~%" (pid) (format-internet-host-address host-address) c) (with-fatal-error-handler* (lambda (c decline) (http-syslog (syslog-level notice) "<~a>~a [error shutting down: ~s]~%" (pid) (format-internet-host-address host-address) c)) (lambda () (shutdown-socket sock shutdown/sends+receives) (http-syslog (syslog-level info) "<~a>~a [shut down]~%" (pid) (format-internet-host-address host-address)) (decline)))) (lambda () (call-with-values (lambda () (with-fatal-error-handler* (lambda (c decline) (http-syslog (syslog-level notice) "<~a>~a: error: ~s~%" (pid) (format-internet-host-address host-address) c) (cond ((http-error? c) (apply (lambda (reply-code req . args) (values req (apply make-http-error-response reply-code req args))) (condition-stuff c))) ((fatal-syntax-error? c) (values #f (apply make-http-error-response http-reply/bad-request #f ; No request yet. "Request parsing error -- report to client maintainer." (condition-stuff c)))) (else (decline)))) (lambda () (let* ((req (parse-http-request sock options)) (response ((httpd-options-path-handler options) (http-url:path (request:url req)) req))) (values req response))))) (lambda (req response) (send-http-response response (socket:outport sock) options) (http-log req http-reply/ok)))))) ;;;; HTTP request parsing ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; This code provides procedures to read requests from an input ;;;; port. ;;; Read and parse an http request from INPORT. ;;; ;;; Note: this parser parses the URI into an http URL record. If the URI ;;; isn't an http URL, the parser fails. This may not be right. There's ;;; nothing in the http protocol to prevent you from passing a non-http ;;; URI -- what this would mean, however, is not clear. Like so much of ;;; the Web, the protocols are redundant, underconstrained, and ill-specified. (define (parse-http-request sock options) (let ((line (read-crlf-line (socket:inport sock)))) ;; Blat out some logging info. (if *http-syslog?* (call-with-values (lambda () (socket-address->internet-address (socket-remote-address sock))) (lambda (host-address service-port) (http-syslog (syslog-level info) "<~a>~a: ~a~%" (pid) (format-internet-host-address host-address) line)))) (if (eof-object? line) (fatal-syntax-error "EOF while parsing request.") (let* ((elts (string->words line)) ; Split at white-space. (version (case (length elts) ((2) '(0 . 9)) ((3) (parse-http-version (caddr elts))) (else (fatal-syntax-error "Bad HTTP version."))))) (let* ((meth (car elts)) (uri-string (cadr elts)) (url (parse-http-servers-url-fragment uri-string sock options)) (headers (if (equal? version '(0 . 9)) '() (read-rfc822-headers (socket:inport sock))))) (make-request meth uri-string url version headers sock)))))) ;;; Parse the URL, but if it begins without the "http://host:port" prefix, ;;; interpolate one from SOCKET. It would sleazier but faster if we just ;;; computed the default host and port at server-startup time, instead of ;;; on every request. (define (parse-http-servers-url-fragment uri-string socket options) (receive (scheme path search frag-id) (parse-uri uri-string) (if frag-id ; Can't have a #frag part. (fatal-syntax-error "HTTP URL contains illegal # suffix." uri-string) (if scheme (if (string-ci=? scheme "http") ; Better be an http url. (parse-http-url path search #f) (fatal-syntax-error "Non-HTTP URL" uri-string)) ;; Interpolate the userhost struct from our net connection. (if (and (pair? path) (string=? (car path) "")) (let* ((addr (socket-local-address socket)) (local-name (my-reported-fqdn addr options)) (portnum (my-reported-port addr options))) (make-http-url (make-userhost #f #f local-name (number->string portnum)) (map unescape-uri (cdr path)) ; Skip initial /. search #f)) (fatal-syntax-error "Path fragment must begin with slash" uri-string)))))) (define parse-http-version (let ((re (make-regexp "^HTTP/([0-9]+)\\.([0-9]+)$")) (lose (lambda (s) (fatal-syntax-error "Bad HTTP version" s)))) (lambda (vstring) (let ((m (regexp-exec re vstring))) (if m (cons (or (string->number (match:substring m 1) 10) (lose vstring)) (or (string->number (match:substring m 2) 10) (lose vstring))) (lose vstring)))))) ;;; Split string into a list of whitespace-separated strings. ;;; This could have been trivially defined in scsh as (field-splitter " \t\n") ;;; but I hand-coded it because it's short, and I didn't want invoke the ;;; regexp machinery for something so simple. (define non-whitespace (char-set-complement char-set:whitespace)) (define (string->words s) (let recur ((start 0)) (cond ((string-index s non-whitespace start) => (lambda (start) (cond ((string-index s char-set:whitespace start) => (lambda (end) (cons (substring s start end) (recur end)))) (else (list (substring s start (string-length s))))))) (else '())))) (define (send-http-response response port options) (display server/protocol port) (write-char #\space port) (display (response-code response) port) (write-char #\space port) (display (response-message response) port) (write-crlf port) (send-http-headers (list (cons 'server server/version) (cons 'content-type (response-mime response)) (cons 'date (time->http-date-string (response-seconds response)))) port) (send-http-headers (response-extras response) port) (write-crlf port) (display-http-body (response-body response) port options)) (define (send-http-headers headers port) (for-each (lambda (pair) (display (car pair) port) (display ": " port) (display (cdr pair) port) (write-crlf port)) headers)) ;;; (make-http-error-response reply-code req [message . extras]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Take an http-error condition, and format it into a reply to the client. ;;; ;;; As a special case, request REQ is allowed to be #f, meaning we haven't ;;; even had a chance to parse and construct the request. This is only used ;;; for 400 BAD-REQUEST error report. ;;; MAKE-HTTP-ERROR-RESPONSE is called from error handlers, so to avoid ;;; infinite looping, if an error occurs while it is running, we just ;;; silently return. (We no longer need to do this; I have changed ;;; WITH-FATAL-ERROR-HANDLER* so that this is not necessary, but I'll ;;; leave it in to play it safe.) (define (make-http-error-response reply-code req . args) (ignore-errors (lambda () ; Ignore errors -- see note above. (apply really-make-http-error-response reply-code req args)))) (define (really-make-http-error-response reply-code req . args) (http-log req reply-code) (let* ((message (and (pair? args) (car args))) (extras (if (pair? args) (cdr args) '())) (generic-title (lambda (port) (title-html port (reply-code->text reply-code)))) (close-html (lambda (port) (for-each (lambda (x) (format port "
~s~%" x)) extras) (write-string "\n" port))) (create-response (lambda (headers writer-proc) (make-response reply-code (reply-code->text reply-code) (time) "text/html" headers (make-writer-body writer-proc))))) (cond ;; This error reply requires two args: message is the new URI: field, ;; and the first EXTRA is the older Location: field. ((or (= reply-code http-reply/moved-temp) (= reply-code http-reply/moved-perm)) (create-response (list (cons 'uri message) (cons 'location (car extras))) (lambda (port options) (title-html port "Document moved") (format port "This document has ~A moved to a new location.~%" (if (= reply-code http-reply/moved-temp) "temporarily" "permanently") message) (close-html port)))) ((= reply-code http-reply/bad-request) (create-response '() (lambda (port options) (generic-title port) (write-string "

Client sent a query that this server could not understand.\n" port) (if message (format port "
~%Reason: ~A~%" message)) (close-html port)))) ((= reply-code http-reply/unauthorized) (create-response (list (cons 'WWW-Authenticate message)) ; Vas is das? (lambda (port options) (title-html port "Authorization Required") (write-string "

Browser not authentication-capable or\n" port) (write-string "authentication failed.\n" port) (if message (format port "~a~%" message)) (close-html port)))) ((= reply-code http-reply/forbidden) (create-response '() (lambda (port options) (title-html port "Request not allowed.") (format port "Your client does not have permission to perform a ~A~%" (request:method req)) (format port "operation on url ~a.~%" (request:uri req)) (if message (format port "

~%~a~%" message)) (close-html port)))) ((= reply-code http-reply/not-found) (create-response '() (lambda (port options) (title-html port "URL not found") (write-string "

The requested URL was not found on this server.\n" port) (if message (format port "

~%~a~%" message)) (close-html port)))) ((= reply-code http-reply/internal-error) (http-syslog (syslog-level error) "internal-error: ~A" message) (create-response '() (lambda (port options) (generic-title port) (format port "The server encountered an internal error or misconfiguration and was unable to complete your request.

Please inform the server administrator, ~A, of the circumstances leading to the error, and time it occured.~%" (httpd-options-server-admin options)) (if message (format port "

~%~a~%" message)) (close-html port)))) ((= reply-code http-reply/not-implemented) (create-response '() (lambda (port options) (generic-title port) (format port "This server does not currently implement the requested method (~A).~%" (request:method req)) (if message (format port "

~a~%" message)) (close-html port)))) (else (http-syslog (syslog-level info) "Skipping unhandled reply code ~A.~%" reply-code) (create-response '() (lambda (port options) (generic-title port) (close-html port))))))) ;;; Return my Internet host name (my fully-qualified domain name). ;;; This works only if an actual resolver is behind host-info. ;;; ;;; In case of aliased names, you just might get the wrong one. ;;; Furthermore, you may get screwed in the presence of a server ;;; accelerator such as Squid. (define my-reported-fqdn (let ((fqdn-lock (make-lock)) (fqdn-cache #f) (used-addr #f) (used-options #f)) (lambda (addr options) (obtain-lock fqdn-lock) (let ((result (if fqdn-cache (or (and (equal? used-addr addr) (equal? used-options options)) fqdn-cache) (begin (set! fqdn-cache (or (httpd-options-fqdn options) (dns-lookup-ip (socket-address->string addr)) (host-info:name (host-info addr)))) (set! used-addr addr) (set! used-options options) fqdn-cache)))) (release-lock fqdn-lock) result)))) (define (my-reported-port addr options) (or (httpd-options-reported-port options) (receive (ip-addr portnum) (socket-address->internet-address addr) portnum)))