sunet/scheme/httpd/core.scm

483 lines
16 KiB
Scheme

;;; http server in the Scheme Shell -*- Scheme -*-
;;; Olin Shivers <shivers@lcs.mit.edu>
;;; Copyright (c) 1994 by Brian D. Carlstrom and Olin Shivers.
;;; This file implements the core of an HTTP server: code to establish
;;; net connections, read and parse requests, and handler errors.
;;; It does not have the code to actually handle requests. That's up
;;; to other modules, and could vary from server to server. To build
;;; a complete server, you need to define path handlers (see below) --
;;; they determine how requests are to be handled.
;;;
;;; The RFC detailing the HTTP 1.0 protocol, RFC 1945, can be found at
;;; http://www.w3.org/Protocols/rfc1945/rfc1945
;;; Imports and non-R4RS'isms
;;; \r \n in strings for cr and lf.
;;; receive values (MV return)
;;; scsh system calls
;;; rfc822 header parsing
;;; crlf-io (read cr/lf terminated lines)
;;; uri, url packages
;;; ignore-errors (HANDLE package)
;;; char-set stuff
;;; format (Formatted output)
;;; httpd error stuff
;;; condition-stuff (S48 error conditions)
;;; (httpd options)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; The server top-level. PATH-HANDLER is the top-level request path handler --
;;; the procedure that actually deals with the request.
(define (httpd options)
(let ((port (httpd-options-port options))
(root-dir (httpd-options-root-directory options))
(rate-limiter
(cond
((httpd-options-simultaneous-requests options)
=> make-rate-limiter)
(else #f))))
(init-http-log! options)
(with-syslog-destination
"httpd" #f #f #f
(lambda ()
(with-cwd
root-dir
(bind-listen-accept-loop
protocol-family/internet
;; Why is the output socket unbuffered? So that if the client
;; closes the connection, we won't lose when we try to close the
;; socket by trying to flush the output buffer.
(lambda (sock addr)
(if rate-limiter
(begin
(rate-limit-block rate-limiter)
(rate-limit-open rate-limiter)))
(with-fatal-error-handler
(lambda (c decline)
(http-syslog (syslog-level notice) "error during connection negotiation~%")
(if rate-limiter
(rate-limit-close rate-limiter)))
(call-with-values
(lambda ()
(socket-address->internet-address (socket-remote-address sock)))
(lambda (host-address service-port)
(if (and rate-limiter *http-syslog?*)
(http-syslog (syslog-level info) "<~a>~a: concurrent request #~a~%"
(pid)
(format-internet-host-address host-address)
(rate-limiter-current-requests rate-limiter)))
(set-port-buffering (socket:outport sock) bufpol/none) ; No buffering
(fork-thread
(lambda ()
(set-port-buffering (current-input-port) bufpol/none)
(process-toplevel-request sock host-address options)
(if *http-syslog?*
(http-syslog (syslog-level debug) "<~a>~a [closing]~%"
(pid)
(format-internet-host-address host-address)))
(with-fatal-error-handler
(lambda (c decline)
(if *http-syslog?*
(http-syslog (syslog-level notice) "<~a>~a [error closing (~a)]~%"
(pid)
(format-internet-host-address host-address)
c)))
(close-socket sock))
(if rate-limiter
(rate-limit-close rate-limiter))
(if *http-syslog?*
(http-syslog (syslog-level info) "<~a>~a [closed]~%"
(pid)
(format-internet-host-address host-address)))))))))
port))))))
;;; Top-level http request processor
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Read, parse, and handle a single http request. The only thing that makes
;;; this complicated is handling errors -- as a server, we can't just let the
;;; standard error handlers toss us into a breakpoint. We have to catch the
;;; error, send an error response back to the client if we can, and then keep
;;; on trucking. This means using the S48's condition system to catch and
;;; handle the various errors, which introduces a major point of R4RS
;;; incompatibiliy -- R4RS has no exception system. So if you were to port
;;; this code to some other Scheme, you'd really have to sit down and think
;;; about this issue for a minute.
(define (process-toplevel-request sock host-address options)
;; This top-level error-handler catches *all* uncaught errors and warnings.
;; If the error condition is a reportable HTTP error, we send a response back
;; to the client. In any event, we abort the transaction, and return from
;; PROCESS-TOPLEVEL-REQUEST.
;;
;; We *oughta* map non-http-errors into replies anyway.
(with-fatal-error-handler*
(lambda (c decline)
(http-syslog (syslog-level notice) "<~a>~a: error: ~s~%"
(pid)
(format-internet-host-address host-address)
c)
(with-fatal-error-handler*
(lambda (c decline)
(http-syslog (syslog-level notice) "<~a>~a [error shutting down: ~s]~%"
(pid)
(format-internet-host-address host-address)
c))
(lambda ()
(shutdown-socket sock shutdown/sends+receives)
(http-syslog (syslog-level info) "<~a>~a [shut down]~%"
(pid)
(format-internet-host-address host-address))
(decline))))
(lambda ()
(call-with-values
(lambda ()
(with-fatal-error-handler*
(lambda (c decline)
(http-syslog (syslog-level notice) "<~a>~a: error: ~s~%"
(pid)
(format-internet-host-address host-address)
c)
(cond
((http-error? c)
(apply (lambda (status-code req . args)
(values req
(apply make-http-error-response
status-code req
args)))
(condition-stuff c)))
((fatal-syntax-error? c)
(values #f
(apply make-http-error-response http-status/bad-request
#f ; No request yet.
"Request parsing error -- report to client maintainer."
(condition-stuff c))))
(else
(decline))))
(lambda ()
(let* ((req (parse-http-request sock options))
(response ((httpd-options-path-handler options)
(http-url:path (request:url req))
req)))
(values req response)))))
(lambda (req response)
(send-http-response req response (socket:outport sock) options)
(http-log req http-status/ok))))))
;;;; HTTP request parsing
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; This code provides procedures to read requests from an input
;;;; port.
;;; Read and parse an http request from INPORT.
;;;
;;; Note: this parser parses the URI into an http URL record. If the URI
;;; isn't an http URL, the parser fails. This may not be right. There's
;;; nothing in the http protocol to prevent you from passing a non-http
;;; URI -- what this would mean, however, is not clear. Like so much of
;;; the Web, the protocols are redundant, underconstrained, and ill-specified.
(define (parse-http-request sock options)
(let ((line (read-crlf-line (socket:inport sock))))
;; Blat out some logging info.
(if *http-syslog?*
(call-with-values
(lambda ()
(socket-address->internet-address (socket-remote-address sock)))
(lambda (host-address service-port)
(http-syslog (syslog-level info) "<~a>~a: ~a~%"
(pid)
(format-internet-host-address host-address)
line))))
(if (eof-object? line)
(fatal-syntax-error "EOF while parsing request.")
(let* ((elts (string->words line)) ; Split at white-space.
(version (case (length elts)
((2) '(0 . 9))
((3) (parse-http-version (caddr elts)))
(else (fatal-syntax-error "Bad HTTP version.")))))
(let* ((meth (car elts))
(uri-string (cadr elts))
(url (parse-http-servers-url-fragment uri-string sock options))
(headers (if (equal? version '(0 . 9)) '()
(read-rfc822-headers (socket:inport sock)))))
(make-request meth uri-string url version headers sock))))))
;;; Parse the URL, but if it begins without the "http://host:port" prefix,
;;; interpolate one from SOCKET. It would sleazier but faster if we just
;;; computed the default host and port at server-startup time, instead of
;;; on every request.
(define (parse-http-servers-url-fragment uri-string socket options)
(receive (scheme path search frag-id) (parse-uri uri-string)
(if frag-id ; Can't have a #frag part.
(fatal-syntax-error "HTTP URL contains illegal #<fragment> suffix."
uri-string)
(if scheme
(if (string-ci=? scheme "http") ; Better be an http url.
(parse-http-url path search #f)
(fatal-syntax-error "Non-HTTP URL" uri-string))
;; Interpolate the userhost struct from our net connection.
(if (and (pair? path) (string=? (car path) ""))
(let* ((addr (socket-local-address socket))
(local-name (my-reported-fqdn addr options))
(portnum (my-reported-port addr options)))
(make-http-url (make-userhost #f #f
local-name
(number->string portnum))
(map unescape-uri (cdr path)) ; Skip initial /.
search
#f))
(fatal-syntax-error "Path fragment must begin with slash"
uri-string))))))
(define parse-http-version
(let ((re (make-regexp "^HTTP/([0-9]+)\\.([0-9]+)$"))
(lose (lambda (s) (fatal-syntax-error "Bad HTTP version" s))))
(lambda (vstring)
(let ((m (regexp-exec re vstring)))
(if m
(cons (or (string->number (match:substring m 1) 10) (lose vstring))
(or (string->number (match:substring m 2) 10) (lose vstring)))
(lose vstring))))))
;;; Split string into a list of whitespace-separated strings.
;;; This could have been trivially defined in scsh as (field-splitter " \t\n")
;;; but I hand-coded it because it's short, and I didn't want invoke the
;;; regexp machinery for something so simple.
(define non-whitespace (char-set-complement char-set:whitespace))
(define (string->words s)
(let recur ((start 0))
(cond ((string-index s non-whitespace start) =>
(lambda (start)
(cond ((string-index s char-set:whitespace start) =>
(lambda (end)
(cons (substring s start end)
(recur end))))
(else (list (substring s start (string-length s)))))))
(else '()))))
(define (send-http-headers response port)
(display server/protocol port)
(write-char #\space port)
(display (response-code response) port)
(write-char #\space port)
(display (response-message response) port)
(write-crlf port)
(send-http-header-fields
(list (cons 'server server/version)
(cons 'content-type (response-mime response))
(cons 'date (time->http-date-string (response-seconds response))))
port)
(send-http-header-fields (response-extras response) port)
(write-crlf port))
(define (send-http-response request response port options)
(if (not (v0.9-request? request))
(send-http-headers response port)
(if (not (string=? (request:method request) "HEAD"))
(display-http-body (response-body response) port options))))
(define (send-http-header-fields headers port)
(for-each (lambda (pair)
(display (car pair) port)
(display ": " port)
(display (cdr pair) port)
(write-crlf port))
headers))
;;; (make-http-error-response status-code req [message . extras])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Take an http-error condition, and format it into a response to the client.
;;;
;;; As a special case, request REQ is allowed to be #f, meaning we haven't
;;; even had a chance to parse and construct the request. This is only used
;;; for 400 BAD-REQUEST error report.
;;; MAKE-HTTP-ERROR-RESPONSE is called from error handlers, so to avoid
;;; infinite looping, if an error occurs while it is running, we just
;;; silently return. (We no longer need to do this; I have changed
;;; WITH-FATAL-ERROR-HANDLER* so that this is not necessary, but I'll
;;; leave it in to play it safe.)
(define (make-http-error-response status-code req . args)
(ignore-errors
(lambda () ; Ignore errors -- see note above.
(apply really-make-http-error-response status-code req args))))
(define (really-make-http-error-response status-code req . args)
(http-log req status-code)
(let* ((message (and (pair? args) (car args)))
(extras (if (pair? args) (cdr args) '()))
(generic-title (lambda (port)
(title-html port
(status-code->text status-code))))
(close-html (lambda (port)
(for-each (lambda (x) (format port "<BR>~s~%" x)) extras)
(write-string "</BODY>\n" port)))
(create-response
(lambda (headers writer-proc)
(make-response status-code
(status-code->text status-code)
(time)
"text/html"
headers
(make-writer-body writer-proc)))))
(cond
;; This error response requires two args: message is the new URI: field,
;; and the first EXTRA is the older Location: field.
((or (= status-code http-status/moved-temp)
(= status-code http-status/moved-perm))
(create-response
(list (cons 'uri message)
(cons 'location (car extras)))
(lambda (port options)
(title-html port "Document moved")
(format port
"This document has ~A moved to a <A HREF=\"~A\">new location</A>.~%"
(if (= status-code http-status/moved-temp) "temporarily" "permanently")
message)
(close-html port))))
((= status-code http-status/bad-request)
(create-response
'()
(lambda (port options)
(generic-title port)
(write-string "<P>Client sent a query that this server could not understand.\n"
port)
(if message (format port "<BR>~%Reason: ~A~%" message))
(close-html port))))
((= status-code http-status/unauthorized)
(create-response
(list (cons 'WWW-Authenticate message)) ; Vas is das?
(lambda (port options)
(title-html port "Authorization Required")
(write-string "<P>Browser not authentication-capable or\n" port)
(write-string "authentication failed.\n" port)
(if message (format port "~a~%" message))
(close-html port))))
((= status-code http-status/forbidden)
(create-response
'()
(lambda (port options)
(title-html port "Request not allowed.")
(format port
"Your client does not have permission to perform a ~A~%"
(request:method req))
(format port "operation on url ~a.~%" (request:uri req))
(if message (format port "<P>~%~a~%" message))
(close-html port))))
((= status-code http-status/not-found)
(create-response
'()
(lambda (port options)
(title-html port "URL not found")
(write-string
"<P>The requested URL was not found on this server.\n"
port)
(if message (format port "<P>~%~a~%" message))
(close-html port))))
((= status-code http-status/internal-error)
(http-syslog (syslog-level error) "internal-error: ~A" message)
(create-response
'()
(lambda (port options)
(generic-title port)
(format port "The server encountered an internal error or
misconfiguration and was unable to complete your request.
<P>
Please inform the server administrator, ~A, of the circumstances leading to
the error, and time it occured.~%"
(httpd-options-server-admin options))
(if message (format port "<P>~%~a~%" message))
(close-html port))))
((= status-code http-status/not-implemented)
(create-response
'()
(lambda (port options)
(generic-title port)
(format port "This server does not currently implement
the requested method (~A).~%"
(request:method req))
(if message (format port "<P>~a~%" message))
(close-html port))))
(else
(http-syslog (syslog-level info) "Skipping unhandled status code ~A.~%" status-code)
(create-response
'()
(lambda (port options)
(generic-title port)
(close-html port)))))))
;;; Return my Internet host name (my fully-qualified domain name).
;;; This works only if an actual resolver is behind host-info.
;;;
;;; In case of aliased names, you just might get the wrong one.
;;; Furthermore, you may get screwed in the presence of a server
;;; accelerator such as Squid.
(define my-reported-fqdn
(let ((fqdn-lock (make-lock))
(fqdn-cache #f)
(used-addr #f)
(used-options #f))
(lambda (addr options)
(obtain-lock fqdn-lock)
(let ((result
(if fqdn-cache
(or (and (equal? used-addr addr)
(equal? used-options options))
fqdn-cache)
(begin
(set! fqdn-cache (or (httpd-options-fqdn options)
(dns-lookup-ip (socket-address->string addr))
(host-info:name (host-info addr))))
(set! used-addr addr)
(set! used-options options)
fqdn-cache))))
(release-lock fqdn-lock)
result))))
(define (my-reported-port addr options)
(or (httpd-options-reported-port options)
(receive (ip-addr portnum) (socket-address->internet-address addr)
portnum)))