sunet/scheme/httpd/core.scm

431 lines
16 KiB
Scheme

;;; http server in the Scheme Shell -*- Scheme -*-
;;; Olin Shivers <shivers@lcs.mit.edu>
;;; Copyright (c) 1994 by Brian D. Carlstrom and Olin Shivers.
;;; This file implements the core of an HTTP server: code to establish
;;; net connections, read and parse requests, and handler errors.
;;; It does not have the code to actually handle requests. That's up
;;; to other modules, and could vary from server to server. To build
;;; a complete server, you need to define path handlers (see below) --
;;; they determine how requests are to be handled.
;;;
;;; The RFC detailing the HTTP 1.0 protocol, RFC 1945, can be found at
;;; http://www.w3.org/Protocols/rfc1945/rfc1945
;;; Imports and non-R4RS'isms
;;; \r \n in strings for cr and lf.
;;; receive values (MV return)
;;; scsh system calls
;;; rfc822 header parsing
;;; crlf-io (read cr/lf terminated lines)
;;; uri, url packages
;;; ignore-errors (HANDLE package)
;;; char-set stuff
;;; format (Formatted output)
;;; httpd error stuff
;;; condition-stuff (S48 error conditions)
;;; (httpd options)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; The server top-level. PATH-HANDLER is the top-level request path handler --
;;; the procedure that actually deals with the request.
(define (httpd options)
(let ((port (httpd-options-port options))
(root-dir (httpd-options-root-directory options))
(rate-limiter
(cond
((httpd-options-simultaneous-requests options)
=> make-rate-limiter)
(else #f))))
(init-http-log! options)
(with-syslog-destination
"httpd" #f #f #f
(lambda ()
(with-cwd
root-dir
(bind-listen-accept-loop
protocol-family/internet
;; Why is the output socket unbuffered? So that if the client
;; closes the connection, we won't lose when we try to close the
;; socket by trying to flush the output buffer.
(lambda (sock addr)
(if rate-limiter
(begin
(rate-limit-block rate-limiter)
(rate-limit-open rate-limiter)))
(with-fatal-error-handler
(lambda (c decline)
(http-syslog (syslog-level notice) "error during connection negotiation~%")
(if rate-limiter
(rate-limit-close rate-limiter)))
(call-with-values
(lambda ()
(socket-address->internet-address (socket-remote-address sock)))
(lambda (host-address service-port)
(if (and rate-limiter *http-syslog?*)
(http-syslog (syslog-level info) "<~a>~a: concurrent request #~a~%"
(pid)
(format-internet-host-address host-address)
(rate-limiter-current-requests rate-limiter)))
(set-port-buffering (socket:outport sock) bufpol/none) ; No buffering
(fork-thread
(lambda ()
(with-current-input-port
(socket:inport sock)
(with-current-output-port
(socket:outport sock)
(set-port-buffering (current-input-port) bufpol/none)
(process-toplevel-request sock host-address options)))
(if *http-syslog?*
(http-syslog (syslog-level debug) "<~a>~a [closing]~%"
(pid)
(format-internet-host-address host-address)))
(with-fatal-error-handler
(lambda (c decline)
(if *http-syslog?*
(http-syslog (syslog-level notice) "<~a>~a [error closing (~a)]~%"
(pid)
(format-internet-host-address host-address)
c)))
(close-socket sock))
(if rate-limiter
(rate-limit-close rate-limiter))
(if *http-syslog?*
(http-syslog (syslog-level info) "<~a>~a [closed]~%"
(pid)
(format-internet-host-address host-address)))))))))
port))))))
;;; Top-level http request processor
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Read, parse, and handle a single http request. The only thing that makes
;;; this complicated is handling errors -- as a server, we can't just let the
;;; standard error handlers toss us into a breakpoint. We have to catch the
;;; error, send an error reply back to the client if we can, and then keep
;;; on trucking. This means using the S48's condition system to catch and
;;; handle the various errors, which introduces a major point of R4RS
;;; incompatibiliy -- R4RS has no exception system. So if you were to port
;;; this code to some other Scheme, you'd really have to sit down and think
;;; about this issue for a minute.
(define (process-toplevel-request sock host-address options)
;; This top-level error-handler catches *all* uncaught errors and warnings.
;; If the error condition is a reportable HTTP error, we send a reply back
;; to the client. In any event, we abort the transaction, and return from
;; PROCESS-TOPLEVEL-REQUEST.
;;
;; We *oughta* map non-http-errors into replies anyway.
(with-fatal-error-handler (lambda (c decline) ; No call to decline
(http-syslog (syslog-level notice) "<~a>~a: error: ~s~%"
(pid)
(format-internet-host-address host-address)
c)
(if (http-error? c) ; -- we handle all.
(apply (lambda (reply-code req . args)
(apply send-http-error-reply
reply-code req options
args))
(condition-stuff c))
(with-fatal-error-handler
(lambda (c decline)
(http-syslog (syslog-level notice) "<~a>~a [error shutting down: ~s]~%"
(pid)
(format-internet-host-address host-address)
c))
(shutdown-socket sock shutdown/sends+receives)
(http-syslog (syslog-level info) "<~a>~a [shut down]~%"
(pid)
(format-internet-host-address host-address)))))
(let ((req (with-fatal-error-handler ; Map syntax errors
(lambda (c decline) ; to http errors.
(if (fatal-syntax-error? c)
(apply http-error http-reply/bad-request
#f ; No request yet.
"Request parsing error -- report to client maintainer."
(condition-stuff c))
(decline))) ; Actual work:
(parse-http-request sock options))) ; (1) Parse request.
(handler
(httpd-options-path-handler options)))
(handler (http-url:path (request:url req)) req) ; (2) Deal with it.
(http-log req http-reply/ok))))
;;;; HTTP request parsing
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; This code provides procedures to read requests from an input
;;;; port.
;;; Read and parse an http request from INPORT.
;;;
;;; Note: this parser parses the URI into an http URL record. If the URI
;;; isn't an http URL, the parser fails. This may not be right. There's
;;; nothing in the http protocol to prevent you from passing a non-http
;;; URI -- what this would mean, however, is not clear. Like so much of
;;; the Web, the protocols are redundant, underconstrained, and ill-specified.
(define (parse-http-request sock options)
(let ((line (read-crlf-line)))
; (display line (current-error-port)) (newline (current-error-port))
;; Blat out some logging info.
(if *http-syslog?*
(call-with-values
(lambda ()
(socket-address->internet-address (socket-remote-address sock)))
(lambda (host-address service-port)
(http-syslog (syslog-level info) "<~a>~a: ~a~%"
(pid)
(format-internet-host-address host-address)
line))))
(if (eof-object? line)
(fatal-syntax-error "EOF while parsing request.")
(let* ((elts (string->words line)) ; Split at white-space.
(version (case (length elts)
((2) '(0 . 9))
((3) (parse-http-version (caddr elts)))
(else (fatal-syntax-error "Bad HTTP version.")))))
(let* ((meth (car elts))
(uri-string (cadr elts))
(url (parse-http-servers-url-fragment uri-string sock options))
(headers (if (equal? version '(0 . 9)) '()
(read-rfc822-headers))))
(make-request meth uri-string url version headers sock))))))
;;; Parse the URL, but if it begins without the "http://host:port" prefix,
;;; interpolate one from SOCKET. It would sleazier but faster if we just
;;; computed the default host and port at server-startup time, instead of
;;; on every request.
(define (parse-http-servers-url-fragment uri-string socket options)
(receive (scheme path search frag-id) (parse-uri uri-string)
(if frag-id ; Can't have a #frag part.
(fatal-syntax-error "HTTP URL contains illegal #<fragment> suffix."
uri-string)
(if scheme
(if (string-ci=? scheme "http") ; Better be an http url.
(parse-http-url path search #f)
(fatal-syntax-error "Non-HTTP URL" uri-string))
;; Interpolate the userhost struct from our net connection.
(if (and (pair? path) (string=? (car path) ""))
(let* ((addr (socket-local-address socket))
(local-name (my-reported-fqdn addr options))
(portnum (my-reported-port addr options)))
(make-http-url (make-userhost #f #f
local-name
(number->string portnum))
(map unescape-uri (cdr path)) ; Skip initial /.
search
#f))
(fatal-syntax-error "Path fragment must begin with slash"
uri-string))))))
(define parse-http-version
(let ((re (make-regexp "^HTTP/([0-9]+)\\.([0-9]+)$"))
(lose (lambda (s) (fatal-syntax-error "Bad HTTP version" s))))
(lambda (vstring)
(let ((m (regexp-exec re vstring)))
(if m
(cons (or (string->number (match:substring m 1) 10) (lose vstring))
(or (string->number (match:substring m 2) 10) (lose vstring)))
(lose vstring))))))
;;; Split string into a list of whitespace-separated strings.
;;; This could have been trivially defined in scsh as (field-splitter " \t\n")
;;; but I hand-coded it because it's short, and I didn't want invoke the
;;; regexp machinery for something so simple.
(define non-whitespace (char-set-complement char-set:whitespace))
(define (string->words s)
(let recur ((start 0))
(cond ((string-index s non-whitespace start) =>
(lambda (start)
(cond ((string-index s char-set:whitespace start) =>
(lambda (end)
(cons (substring s start end)
(recur end))))
(else (list (substring s start (string-length s)))))))
(else '()))))
;;; (send-http-error-reply reply-code req options [message . extras])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Take an http-error condition, and format it into a reply to the client.
;;;
;;; As a special case, request REQ is allowed to be #f, meaning we haven't
;;; even had a chance to parse and construct the request. This is only used
;;; for 400 BAD-REQUEST error report, and we make minimal assumptions in this
;;; case (0.9 protocol for the reply, for example). I might be better off
;;; writing a special-case procedure for that case...
;;; SEND-HTTP-ERROR-REPLY is called from error handlers, so to avoid
;;; infinite looping, if an error occurs while it is running, we just
;;; silently return. (We no longer need to do this; I have changed
;;; WITH-FATAL-ERROR-HANDLER* so that this is not necessary, but I'll
;;; leave it in to play it safe.)
(define (send-http-error-reply reply-code req options . args)
(ignore-errors (lambda () ; Ignore errors -- see note above.
(apply really-send-http-error-reply reply-code req options args))))
(define (really-send-http-error-reply reply-code req options . args)
(http-log req reply-code)
(let* ((message (if (pair? args) (car args)))
(extras (if (pair? args) (cdr args) '()))
(new-protocol? (and req (not (v0.9-request? req)))) ; 1.0 or better?
;; Is it OK to send back an HTML body explaining things?
(html-ok? (or (not req)
(not (string=? (request:method req) "HEAD"))))
(out (current-output-port))
(generic-title (lambda ()
(title-html out
(reply-code->text reply-code)
new-protocol?)))
(do-msg (lambda () (cond (message (display message out) (newline out))))))
(if new-protocol? (begin-http-header out reply-code))
;; Don't output the blank line, as individual clauses might
;; want to add more headers.
(if html-ok? (write-string "Content-type: text/html\r\n" out))
;; If html-ok?, we must send back some html, with the <body> tag unclosed.
(cond
;; This error reply requires two args: message is the new URI: field,
;; and the first EXTRA is the older Location: field.
((or (= reply-code http-reply/moved-temp)
(= reply-code http-reply/moved-perm))
(if new-protocol?
(begin
(format out "URI: ~A\r~%" message)
(format out "Location: ~A\r~%" (car extras))))
(if html-ok?
(begin
(title-html out "Document moved" new-protocol?)
(format out
"This document has ~A moved to a <A HREF=\"~A\">new location</A>.~%"
(if (= reply-code http-reply/moved-temp) "temporarily" "permanently")
message))))
((= reply-code http-reply/bad-request)
(if html-ok?
(begin
(generic-title)
(write-string "<P>Client sent a query that this server could not understand.\n"
out)
(if message (format out "<BR>~%Reason: ~A~%" message)))))
((= reply-code http-reply/unauthorized)
(if new-protocol?
(format out "WWW-Authenticate: ~A\r~%\r~%" message)) ; Vas is das?
(if html-ok?
(begin
(title-html out "Authorization Required" new-protocol?)
(write-string "<P>Browser not authentication-capable or\n" out)
(write-string "authentication failed.\n" out)
(if message (format out "~a~%" message)))))
((= reply-code http-reply/forbidden)
(if (not html-ok?)
(begin
(title-html out "Request not allowed." new-protocol?)
(format out
"Your client does not have permission to perform a ~A~%"
(request:method req))
(format out "operation on url ~a.~%" (request:uri req))
(if message (format out "<P>~%~a~%" message)))))
((= reply-code http-reply/not-found)
(if html-ok?
(begin
(title-html out "URL not found" new-protocol?)
(write-string
"<P>The requested URL was not found on this server.\n"
out)
(if message (format out "<P>~%~a~%" message)))))
((= reply-code http-reply/internal-error)
(http-syslog (syslog-level error) "internal-error: ~A" message)
(if html-ok?
(begin
(generic-title)
(format out "The server encountered an internal error or
misconfiguration and was unable to complete your request.
<P>
Please inform the server administrator, ~A, of the circumstances leading to
the error, and time it occured.~%"
(httpd-options-server-admin options))
(if message (format out "<P>~%~a~%" message)))))
((= reply-code http-reply/not-implemented)
(if html-ok?
(begin
(generic-title)
(format out "This server does not currently implement
the requested method (~A).~%"
(request:method req))
(if message (format out "<P>~a~%" message)))))
(else
(http-syslog (syslog-level info) "Skipping unhandled reply code ~A.~%" reply-code)
(if html-ok? (generic-title))))
(cond
(html-ok?
;; Output extra stuff and close the <body> tag.
(for-each (lambda (x) (format out "<BR>~s~%" x)) extras)
(write-string "</BODY>\n" out)))
; (force-output out) ;;; TODO check this
; (flush-all-ports)
(force-output out)
; (if bkp? (breakpoint "http error"))
))
;;; Return my Internet host name (my fully-qualified domain name).
;;; This works only if an actual resolver is behind host-info.
;;;
;;; On systems that do DNS via NIS/Yellow Pages, you only get an
;;; unqualified hostname. Also, in case of aliased names, you just
;;; might get the wrong one. Furthermore, you may get screwed in the
;;; presence of a server accelerator such as Squid.
(define *fqdn-cache* #f)
(define (my-reported-fqdn addr options)
(or *fqdn-cache*
(begin
(set! *fqdn-cache* (or (httpd-options-fqdn options)
(host-info:name (host-info addr))))
*fqdn-cache*)))
(define (my-reported-port addr options)
(or (httpd-options-reported-port options)
(receive (ip-addr portnum) (socket-address->internet-address addr)
portnum)))