From 01310403c10cc8ad8aac1f49d6b006e4624f7c41 Mon Sep 17 00:00:00 2001 From: mainzelm Date: Tue, 26 Sep 2000 14:35:26 +0000 Subject: [PATCH] *** empty log message *** --- .gitignore | 28 ++ COPYING | 26 ++ ChangeLog | 27 ++ Readme | 45 +++ cgi-script.scm | 94 ++++++ cgi-server.scm | 279 +++++++++++++++++ conditionals.scm | 98 ++++++ crlf-io.scm | 39 +++ htmlout.scm | 196 ++++++++++++ http-top.scm | 58 ++++ httpd-access-control.scm | 77 +++++ httpd-core.scm | 480 ++++++++++++++++++++++++++++ httpd-error.scm | 131 ++++++++ httpd-handlers.scm | 534 +++++++++++++++++++++++++++++++ info-gateway.scm | 642 ++++++++++++++++++++++++++++++++++++++ modules.scm | 374 ++++++++++++++++++++++ parse-forms.scm | 65 ++++ program-modules.scm | 44 +++ rfc822.scm | 213 +++++++++++++ scheme-program-server.scm | 58 ++++ server.scm | 44 +++ seval.scm | 111 +++++++ smtp.scm | 606 +++++++++++++++++++++++++++++++++++ stringhax.scm | 63 ++++ su-httpd.txt | 352 +++++++++++++++++++++ toothless.scm | 58 ++++ uri.scm | 278 +++++++++++++++++ url.scm | 150 +++++++++ 28 files changed, 5170 insertions(+) create mode 100644 .gitignore create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 Readme create mode 100644 cgi-script.scm create mode 100644 cgi-server.scm create mode 100644 conditionals.scm create mode 100644 crlf-io.scm create mode 100644 htmlout.scm create mode 100644 http-top.scm create mode 100644 httpd-access-control.scm create mode 100644 httpd-core.scm create mode 100644 httpd-error.scm create mode 100644 httpd-handlers.scm create mode 100644 info-gateway.scm create mode 100644 modules.scm create mode 100644 parse-forms.scm create mode 100644 program-modules.scm create mode 100644 rfc822.scm create mode 100644 scheme-program-server.scm create mode 100755 server.scm create mode 100644 seval.scm create mode 100644 smtp.scm create mode 100644 stringhax.scm create mode 100644 su-httpd.txt create mode 100644 toothless.scm create mode 100644 uri.scm create mode 100644 url.scm diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da8168b --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# CVS default ignores begin +tags +TAGS +.make.state +.nse_depinfo +*~ +\#* +.#* +,* +_$* +*$ +*.old +*.bak +*.BAK +*.orig +*.rej +.del-* +*.a +*.olb +*.o +*.obj +*.so +*.exe +*.Z +*.elc +*.ln +core +# CVS default ignores end diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..a56daa5 --- /dev/null +++ b/COPYING @@ -0,0 +1,26 @@ +Copyright (c) 1995-2000 by Olin Shivers. +Copyright (c) 1996-2000 by Mike Sperber +Copyright (c) 1999-2000 by Martin Gasbichler. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the authors may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..ea95eec --- /dev/null +++ b/ChangeLog @@ -0,0 +1,27 @@ +1997-12-01 Michael Sperber [Mr. Preprocessor] + + * smtp.scm, parse-forms.scm: now 0.5.1-conformant + +Mon Dec 2 10:41:09 1996 Michael Sperber [Mr. Preprocessor] + + * ChangeLog: released 1.0beta2. + + * htmlout.scm: Added escape-html and emit-text. + + * http-core.scm: Made maintainer, domain name, and port + configurable. + + * everything: Commented date routines back in. + + * httpd-access-control.scm: New file. Provides rudimentary access + control. + + * httpd-basic-handlers.scm: Added directory index serving. See + comments in that file and in su-httpd.txt. + + * info-gateway.scm: Added GNU info gateway. + + * everything: Things now work under scsh 0.4.4. + + * ChangeLog: started this file. + diff --git a/Readme b/Readme new file mode 100644 index 0000000..f203896 --- /dev/null +++ b/Readme @@ -0,0 +1,45 @@ +The SU Net package, version 1.0 +=============================== + +This directory contains my code for doing Net hacking from Scheme/scsh. +It includes: + An smtp client library. + Forge mail from the comfort of your own Scheme process. + + rfc822 header library + Read email-style headers. Useful in several contexts (smtp, http, etc.) + + Simple structured HTML output library + Balanced delimiters, etc. htmlout.scm. + + HTTP server library + This is a complete implementation of an HTTP 1.0 server. + The server is very extensible, via a mechanism called "path handlers." + The library includes other standalone libraries that may be of use: + + URI and URL parsers and unparsers. + + A library to help writing CGI scripts in Scheme. + + Server extensions for interfacing to CGI scripts. + + Server extensions for uploading Scheme code. + +------------------------------------------------------------------------------- +Note well: +- You can't do serious programming in Scheme within the bounds of R4RS. + I work in Scheme 48 and scsh. Every file does have a comment header + describing its non-R4RS dependencies, should you decide to try porting + it to another Scheme. + +- Only simple documentation, but my code is written in my usual style -- + voluminously commented. + + -Olin +------------------------------------------------------------------------------- +Note further: + +The net package is currently being maintained by Mike Sperber +. + +My main focus for further development is on making the HTTP server +into a realistic full-blown package, but I'll gladly accept patches +and suggestions for the other parts of the net package. + + -Mike diff --git a/cgi-script.scm b/cgi-script.scm new file mode 100644 index 0000000..973ae87 --- /dev/null +++ b/cgi-script.scm @@ -0,0 +1,94 @@ +;;; NCSA's WWW Common Gateway Interface -- script-side code -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec". + +;;; Imports and non-R4RS'isms +;;; switch (control structure) +;;; getenv read-string (scsh) +;;; error +;;; parse-html-form-query (parse-html-forms package) + + +;;; This file provides routines to help you write programs in Scheme +;;; that can interface to HTTP servers using the CGI program interface +;;; to carry out HTTP transactions. + +;;; Other files/packages that will be of help: +;;; rfc822 For reading headers from entities. +;;; uri url For parsing and unparsing these things. Also for generally +;;; URI-decoding strings. +;;; htmlout For generating HTML output. + +;;; About HTML forms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This info is in fact independent of CGI, but important to know about, +;;; as many CGI scripts are written for responding to forms-entry in +;;; HTML browsers. +;;; +;;; The form's field data are turned into a single string, of the form +;;; name=val&name=val +;;; where the and parts are URI encoded to hide their +;;; &, =, and + chars, among other things. After URI encoding, the +;;; space chars are converted to + chars, just for fun. It is important +;;; to encode the spaces this way, because the perfectly general %xx escape +;;; mechanism might be insufficiently confusing. This variant encoding is +;;; called "form-url encoding." +;;; +;;; If the form's method is POST, +;;; Browser sends the form's field data in the entity block, e.g., +;;; "button=on&ans=yes". The request's Content-type: is application/ +;;; x-www-form-urlencoded, and the request's Content-length: is the +;;; number of bytes in the form data. +;;; +;;; If the form's method is GET, +;;; Browser sends the form's field data in the URL's part. +;;; (So the server will pass to the CGI script as $QUERY_STRING, +;;; and perhaps also on in argv[]). +;;; +;;; In either case, the data is "form-url encoded" (as described above). + +;;; ISINDEX queries: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (Likewise for ISINDEX URL queries from browsers.) +;;; Browser url-form encodes the query (see above), which then becomes the +;;; ? part of the URI. (Hence the CGI script will split the individual +;;; fields into argv[].) + + +;;; CGI interface: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; - The URL's part is assigned to env var $QUERY_STRING, undecoded. +;;; - If it contains no raw "=" chars, it is split at "+" chars. The +;;; substrings are URI decoded, and become the elts of argv[]. You aren't +;;; supposed to rely on this unless you are replying to ISINDEX queries. +;;; - The CGI script is run with stdin hooked up to the socket. If it's going +;;; to read the entity, it should read $CONTENT_LENGTH bytes worth. +;;; - A bunch of env vars are set with useful values. +;;; - Entity block is passed to script on stdin; +;;; script writes reply to stdout. +;;; - If the script begins with "nph-" its output is the entire reply. +;;; Otherwise, when it replies to the server, it sends back a special +;;; little header that tells the server how to construct the real header +;;; for the reply. +;;; See the "spec" for further details. + + +;;; (cgi-form-query) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Return the form data as an alist of decoded strings. +;;; So a query string like "button=on&reply=Oh,%20yes" becomes alist +;;; (("button" . "on") ("reply" . "Oh, yes")) +;;; This only works for GET and POST methods. + +(define (cgi-form-query) + (switch string=? (getenv "REQUEST_METHOD") + + (("GET") + (parse-html-form-query (getenv "QUERY_STRING"))) + + (("POST") + (let ((nchars (string->number (getenv "CONTENT_LENGTH")))) + (parse-html-form-query (read-string nchars)))) + + (else (error "Method not handled.")))) ; Don't be calling me. diff --git a/cgi-server.scm b/cgi-server.scm new file mode 100644 index 0000000..12e3074 --- /dev/null +++ b/cgi-server.scm @@ -0,0 +1,279 @@ +;;; Server support for NCSA's WWW Common Gateway Interface -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; See http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for a sort of "spec". + +;;; Imports and non-R4RS'isms +;;; "\r" in string for carriage-return. +;;; format +;;; string hacks +;;; URI, URL record structs, parsers, and unparsers +;;; write-crlf +;;; scsh syscalls +;;; ? for COND +;;; SWITCH conditional +;;; RFC822 header parsing +;;; HTTP request record structure +;;; HTTP-ERROR & reply codes +;;; Basic path handler support (for ncsa-handler) + +;;; PROBLEMS: +;;; - The handlers could be made -- closed over their parameters +;;; (e.g., root vars, etc.) + +;;; This code provides a path-handler for the HTTP server that implements +;;; a CGI interface to external programs for doing HTTP transactions. + +;;; About HTML forms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This info is in fact independent of CGI, but important to know about, +;;; as many CGI scripts are written for responding to forms-entry in +;;; HTML browsers. +;;; +;;; The form's field data are turned into a single string, of the form +;;; name=val&name=val +;;; where the and parts are URI encoded to hide their +;;; &, =, and + chars, among other things. After URI encoding, the +;;; space chars are converted to + chars, just for fun. It is important +;;; to encode the spaces this way, because the perfectly general %xx escape +;;; mechanism might be insufficiently confusing. This variant encoding is +;;; called "form-url encoding." +;;; +;;; If the form's method is POST, +;;; Browser sends the form's field data in the entity block, e.g., +;;; "button=on&ans=yes". The request's Content-type: is application/ +;;; x-www-form-urlencoded, and the request's Content-length: is the +;;; number of bytes in the form data. +;;; +;;; If the form's method is GET, +;;; Browser sends the form's field data in the URL's part. +;;; (So the server will pass to the CGI script as $QUERY_STRING, +;;; and perhaps also on in argv[]). +;;; +;;; In either case, the data is "form-url encoded" (as described above). + +;;; ISINDEX queries: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (Likewise for ISINDEX URL queries from browsers.) +;;; Browser url-form encodes the query (see above), which then becomes the +;;; ? part of the URI. (Hence the CGI script will split the individual +;;; fields into argv[].) + + +;;; CGI interface: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; - The URL's part is assigned to env var $QUERY_STRING, undecoded. +;;; - If it contains no raw "=" chars, it is split at "+" chars. The +;;; substrings are URI decoded, and become the elts of argv[]. +;;; - The CGI script is run with stdin hooked up to the socket. If it's going +;;; to read the entity, it should read $CONTENT_LENGTH bytes worth. +;;; - A bunch of env vars are set; see below. +;;; - If the script begins with "nph-" its output is the entire reply. +;;; Otherwise, it replies to the server, we peel off a little header +;;; that is used to construct the real header for the reply. +;;; See the "spec" for further details. +;;; +;;; The "spec" also talks about PUT, but when I tried this on a dummy script, +;;; the NSCA httpd server generated buggy output. So I am only implementing +;;; the POST and GET ops; any other op generates a "405 Method not allowed" +;;; reply. + +;;; Parameters +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; path for scripts +(define cgi-default-bin-path "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin") + + +;;; The path handler for CGI scripts. (car path) is the script to run. + +(define (cgi-handler bin-dir) + (lambda (path req) + (if (pair? path) ; Got to have at least one elt. + (let* ((prog (car path)) + + (filename (or (dotdot-check bin-dir (list prog)) + (http-error http-reply/bad-request req + (format #f "CGI scripts may not contain \"..\" elements.")))) + + (nph? (string-suffix? "-nph" prog)) ; PROG end in "-nph" ? + + (search (http-url:search (request:url req))) ; Compute the + (argv (if (and search (not (index search #\=))) ; argv list. + (split-and-decode-search-spec search) + '())) + + (env (cgi-env req bin-dir (cdr path))) + + (doit (lambda () + (apply exec/env filename env argv) + (http-error http-reply/bad-request req + (format #f "Could not execute CGI script ~a." + filename))))) + + (http-log "search: ~s, argv: ~s~%" search argv) + (switch string=? (request:method req) + (("GET" "POST") ; Could do others also. + (if nph? + (wait (fork doit)) + (cgi-send-reply (run/port* doit) req))) + + (else (http-error http-reply/method-not-allowed req)))) + + (http-error http-reply/bad-request req "Empty CGI script")))) + + +(define (split-and-decode-search-spec s) + (let recur ((i 0)) + (? ((index s #\+ i) => (lambda (j) (cons (unescape-uri s i j) + (recur (+ j 1))))) + (else (list (unescape-uri s i (string-length s))))))) + + +;;; Compute the CGI scripts' process environment by adding the standard CGI +;;; environment var bindings to the current process env -- return result +;;; as an alist. +;;; +;;; You are also supposed to add the headers as env vars in a particular +;;; format, but are allowed to bag it if the environment var storage +;;; requirements might overload the OS. I don't know what you can rely upon +;;; in Unix, so I am just bagging it, period. +;;; +;;; Suppose the URL is +;;; //machine/cgi-bin/test-script/foo/bar?quux%20a+b=c +;;; then: +;;; PATH_INFO -- extra info after the script-name path prefix. "/foo/bar" +;;; PATH_TRANSLATED -- non-virtual version of above. "/u/Web/foo/bar/" +;;; SCRIPT_NAME virtual path to script "/cgi-bin/test-script" +;;; QUERY_STRING -- not decoded "quux%20a+b=c" +;;; The first three of these vars are *not* encoded, so information is lost +;;; if the URL's path elements contain encoded /'s (%2F). CGI loses. + +(define (cgi-env req bin-dir path-suffix) + (let* ((sock (request:socket req)) + (raddr (socket-remote-address sock)) + + (headers (request:headers req)) + + ;; Compute the $PATH_INFO and $PATH_TRANSLATED strings. + (path-info (uri-path-list->path path-suffix)) ; No encode or .. check. + (path-translated (path-list->file-name path-info bin-dir)) + + ;; Compute the $SCRIPT_PATH string. + (url-path (http-url:path (request:url req))) + (script-path (take (- (length url-path) (length path-suffix)) + url-path)) + (script-name (uri-path-list->path script-path))) + + (if (not request-invariant-cgi-env) + (initialise-request-invariant-cgi-env)) + + (receive (rhost rport) + (socket-address->internet-address raddr) + (receive (lhost lport) + (socket-address->internet-address (socket-local-address sock)) + + `(("SERVER_PROTOCOL" . ,(version->string (request:version req))) + ("SERVER_PORT" . ,(number->string lport)) + ("REQUEST_METHOD" . ,(request:method req)) + + ("PATH_INFO" . ,path-info) + ("PATH_TRANSLATED" . ,path-translated) + ("SCRIPT_NAME" . ,script-name) + + ("REMOTE_HOST" . ,(host-info:name (host-info raddr))) + ("REMOTE_ADDR" . ,(internet-address->dotted-string rhost)) + + ;; ("AUTH_TYPE" . xx) ; Random authentication + ;; ("REMOTE_USER" . xx) ; features I don't understand. + ;; ("REMOTE_IDENT" . xx) + + ,@request-invariant-cgi-env ; Stuff that never changes (see below). + + ,@(? ((http-url:search (request:url req)) => + (lambda (srch) `(("QUERY_STRING" . ,srch)))) + (else '())) + + ,@(? ((get-header headers 'content-type) => + (lambda (ct) `(("CONTENT_TYPE" . ,ct)))) + (else '())) + + ,@(? ((get-header headers 'content-length) => + (lambda (cl) ; Skip initial whitespace (& other non-digits). + (let ((first-digit (char-set-index cl char-set:numeric)) + (cl-len (string-length cl))) + (if first-digit + `(("CONTENT_LENGTH" . ,(substring cl first-digit cl-len))) + (http-error http-reply/bad-request + req + "Illegal Content-length: header."))))) + + (else '())) + + . ,(env->alist)))))) + +(define request-invariant-cgi-env #f) +(define (initialise-request-invariant-cgi-env) + (set! request-invariant-cgi-env + `(("PATH" . ,(and (getenv "PATH") cgi-default-bin-path)) + ("SERVER_SOFTWARE" . ,server/version) + ("SERVER_NAME" . ,(host-info:name (host-info (system-name)))) + ("GATEWAY_INTERFACE" . "CGI/1.1")))) + + +(define (take n lis) + (if (zero? n) '() + (cons (car lis) (take (- n 1) (cdr lis))))) + +(define (drop n lis) + (if (zero? n) lis + (drop (- n 1) (cdr lis)))) + + +;;; Script's output for request REQ is available on SCRIPT-PORT. +;;; The script isn't an "nph-" script, so we read the reply, and mutate +;;; it into a real HTTP reply, which we then send back to the HTTP client. + +(define (cgi-send-reply script-port req) + (let* ((headers (read-rfc822-headers script-port)) + (ctype (get-header headers 'content-type)) ; The script headers + (loc (get-header headers 'location)) + (stat (let ((stat-lines (get-header-lines headers 'status))) + (? ((not (pair? stat-lines)) ; No status header. + "200 The idiot CGI script left out the status line.") + ((null? (cdr stat-lines)) ; One line status header. + (car stat-lines)) + (else ; Vas ist das? + (http-error http-reply/internal-error req + "CGI script generated multi-line status header"))))) + (out (current-output-port))) + + (http-log "headers: ~s~%" headers) + ;; Send the reply header back to the client + ;; (unless it's a headerless HTTP 0.9 reply). + (unless (v0.9-request? req) + (format out "HTTP/1.0 ~a\r~%" stat) + (if ctype (format out "Content-type: ~a\r~%" ctype)) + (if loc (format out "Location: ~a\r~%" loc)) + (write-crlf out)) + + (http-log "request:method=~a~%" (request:method req)) + ;; Copy the reply body back to the client and close the script port + ;; (unless it's a bodiless HEAD transaction). + (unless (string=? (request:method req) "HEAD") + (copy-inport->outport script-port out) + (close-input-port script-port)))) + + +;;; This proc and its inverse should be in a general IP module. + +(define (internet-address->dotted-string num32) + (let* ((num24 (arithmetic-shift num32 -8)) + (num16 (arithmetic-shift num24 -8)) + (num08 (arithmetic-shift num16 -8)) + (byte0 (bitwise-and #b11111111 num08)) + (byte1 (bitwise-and #b11111111 num16)) + (byte2 (bitwise-and #b11111111 num24)) + (byte3 (bitwise-and #b11111111 num32))) + (string-append (number->string byte0) "." (number->string byte1) "." + (number->string byte2) "." (number->string byte3)))) diff --git a/conditionals.scm b/conditionals.scm new file mode 100644 index 0000000..4260cfb --- /dev/null +++ b/conditionals.scm @@ -0,0 +1,98 @@ +;;; handy syntax +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-syntax when + (syntax-rules () + ((when bool body1 body2 ...) + (if bool (begin body1 body2 ...))))) + + +(define-syntax unless + (syntax-rules () + ((unless bool body1 body2 ...) + (if (not bool) (begin body1 body2 ...))))) + +(define-syntax ? ; ? is synonym for COND. + (syntax-rules () + ((? clause ...) (cond clause ...)))) + + +;;; Like CASE, but you specify the key-comparison procedure. +;;; SWITCH evaluates its keys each time through the conditional. +;;; SWITCHQ keys are not evaluated -- are simply constants. +;;; (switchq string=? (vector-ref vec i) +;;; (("plus" "minus") ...) +;;; (("times" "div") ...) +;;; (else ...)) + +(define-syntax switchq + (syntax-rules () + ((switchq compare key clause ...) + (let ((k key) ; Eval KEY and COMPARE + (c compare)) ; just once, then call %switch. + (%switchq c k clause ...))))) ; C, K are vars, hence replicable. + +(define-syntax %switchq + (syntax-rules (else) + ((%switchq compare key ((key1 ...) body1 body2 ...) rest ...) + (if (or (compare key 'key1) ...) + (begin body1 body2 ...) + (%switchq compare key rest ...))) + + ((%switchq compare key ((key1 ...)) rest ...) ; Null body. + (if (not (or (compare key 'key1) ...)) + (%switchq compare key rest ...))) + + ((%switchq compare key (else body ...)) + (begin body ...)) + + ((%switchq compare key) '#f))) + + +(define-syntax switch + (syntax-rules () + ((switch compare key clause ...) + (let ((k key) ; Eval KEY and COMPARE + (c compare)) ; just once, then call %switch. + (%switch c k clause ...))))) ; C, K are vars, hence replicable. + +(define-syntax %switch + (syntax-rules (else) + ((%switch compare key ((key1 ...) body1 body2 ...) rest ...) + (if (or (compare key key1) ...) + (begin body1 body2 ...) + (%switch compare key rest ...))) + + ((%switch compare key ((key1 ...)) rest ...) ; Null body. + (if (not (or (compare key key1) ...)) + (%switch compare key rest ...))) + + ((%switch compare key (else body ...)) + (begin body ...)) + + ((%switch compare key) '#f))) + +;;; I can't get this to work -- S48 complains "too many ...'s". +;(define-syntax switchq +; (syntax-rules (else) +; ((switchq compare key clause ...) +; (letrec-syntax ((%switchq (syntax-rules (else) +; ((%switchq compare key +; ((key1 ...) body1 body2 ...) rest ...) +; (if (or (compare key 'key1) ...) +; (begin body1 body2 ...) +; (%switchq compare key rest ...))) +; +; ; Null body. +; ((%switchq compare key ((key1 ...)) rest ...) +; (if (not (or (compare key 'key1) ...)) +; (%switchq compare key rest ...))) +; +; ((%switchq compare key (else body ...)) +; (begin body ...)) +; +; ((%switchq compare key) '#f)))) +; +; (let ((k key) ; Eval KEY and COMPARE +; (c compare)) ; just once, then call %switch. +; (%switchq c k clause ...)))))); C, K are vars, hence replicable. diff --git a/crlf-io.scm b/crlf-io.scm new file mode 100644 index 0000000..19294b4 --- /dev/null +++ b/crlf-io.scm @@ -0,0 +1,39 @@ +;;; Read cr/lf and lf terminated lines. -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; External dependencies and non-R4RS'isms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; ascii->char (To create a carriage-return) +;;; read-line write-string force-output (scsh I/O procs) +;;; receive values (MV return) +;;; let-optionals +;;; "\r\n" in strings for cr/lf. (Not R4RS) + +;;; (read-crlf-line [fd/port retain-crlf?]) -> string or EOF object +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Read a line terminated by either line-feed or EOF. If RETAIN-CRLF? is #f +;;; (the default), a terminating cr/lf or lf sequence is trimmed from the +;;; returned string. +;;; +;;; This is simple and inefficient. It would be save one copy if we didn't +;;; use READ-LINE, but replicated its implementation instead. + +(define (read-crlf-line . args) + (let-optionals args ((fd/port (current-input-port)) + (retain-crlf? #f)) + (let ((ln (read-line fd/port retain-crlf?))) + (if (or retain-crlf? (eof-object? ln)) + ln + (let ((slen (string-length ln))) ; Trim a trailing cr, if any. + (if (or (zero? slen) + (not (char=? (string-ref ln (- slen 1)) cr))) + ln + (substring ln 0 (- slen 1)))))))) + +(define cr (ascii->char 13)) + +(define (write-crlf port) + (write-string "\r\n" port) + (force-output port)) + + diff --git a/htmlout.scm b/htmlout.scm new file mode 100644 index 0000000..523baa2 --- /dev/null +++ b/htmlout.scm @@ -0,0 +1,196 @@ +;;; Simple code for doing structured html output. -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; External dependencies +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; format ; Output +;;; receive values ; Multiple-value return + +;;; - An attribute-quoter, that will map an attribute value to its +;;; HTML text representation -- surrounding it with single or double quotes, +;;; as appropriate, etc. + +;;; Printing HTML tags. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; All the emit-foo procedures have the same basic calling conventions: +;;; (emit-foo out ... [ ...]) +;;; - OUT is either a port or #t for the current input port. +;;; - Each attribute is either a (name . value) pair, which is printed as +;;; name="value" +;;; or a single symbol or string, which is simply printed as-is +;;; (this is useful for attributes that don't have values, such as the +;;; ISMAP attribute in tags). + + + +;;; + +(define (emit-tag out tag . attrs) + (let ((out (if (eq? out #t) (current-output-port) out))) + (display "<" out) + (display tag out) + (for-each (lambda (attr) + (display #\space out) + (cond ((pair? attr) ; name="val" + (display (car attr) out) + (display "=\"" out) ; Should check for + (display (cdr attr) out) ; internal double-quote + (display #\" out)) ; etc. + (else + (display attr out)))) ; name + attrs) + (display #\> out))) + + +;;; + +(define (emit-close-tag out tag) + (format out "" tag)) + + +;;;

+ +(define (emit-p . args) ; (emit-p [out attr1 ...]) + (receive (out attrs) (if (pair? args) + (let* ((out (car args))) + (values (if (eq? out #t) (current-output-port) out) + (cdr args))) + (values (current-output-port) args)) + + (apply emit-tag out 'p attrs) + (newline out) + (newline out))) + + +;;; Make Money Fast!!! + +(define (emit-title out title) ; Takes no attributes. + (format out "~a~%~%" title)) + +(define (emit-header out level text . attribs) + (apply with-tag* out (string-append "H" (number->string level)) + (lambda () (display text (fmt->port out))) + attribs)) + +;;; ...and so forth. Could stand to define a bunch of little emitters for the +;;; various tags. (define-tag-emitter ...) + + +;;; Printing out balanced ... pairs. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; (with-tag out tag (attr-elt ...) body ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Execute the body forms between a ... pair. +;;; The (ATTR-ELT ...) list specifies the attributes for the . +;;; It is rather like a LET-list, having the form +;;; ((name val) ...) +;;; Each NAME must be a symbol, and each VAL must be a Scheme expression +;;; whose value is the string to use as attribute NAME's value. Attributes +;;; that have no value (e.g., ISMAP) can be specified as attr-elt NAME, +;;; instead of (NAME VALUE). +;;; +;;; For example, +;;; (let ((hp "http://clark.lcs.mit.edu/~shivers")) ; My home page. +;;; (with-tag port A ((href hp-url) (name "hp")) +;;; (display "home page" port))) +;;; outputs +;;; home page + +(define-syntax with-tag + (syntax-rules () + ((with-tag out tag (attr-elt ...) body ...) + (with-tag* out 'tag (lambda () body ...) + (%hack-attr-elt attr-elt) + ...)))) + +;;; Why does this have to be top-level? +;;; Why can't this be a LET-SYNTAX inside of WITH-TAG? + +(define-syntax %hack-attr-elt + (syntax-rules () ; Build attribute-list element: + ((%hack-attr-elt (name val)) ; (name elt) => (cons 'name elt) + (cons 'name val)) + ((%hack-attr-elt name) 'name))) ; name => 'name + + +;;; Execute THUNK between a ... pair. + +(define (with-tag* out tag thunk . attrs) + (apply emit-tag out tag attrs) + (call-with-values thunk + (lambda results + (emit-close-tag out tag) + (apply values results)))) + + +(define (fmt->port x) + (if (eq? x #t) (current-output-port) x)) + +;;; Translate text to HTML, mapping special chars such as <, >, &, and +;;; double-quote to their HTML escape sequences. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Mike Sperber +;; This is fairly simple-minded + +;; Note iso8859-1 above 127 is perfectly OK + +(define *html-entity-alist* + (list + (cons (ascii->char 60) "<") + (cons (ascii->char 62) ">") + (cons (ascii->char 38) "&") + (cons (ascii->char 34) """))) + +(define *html-entities* + (chars->char-set (map car *html-entity-alist*))) + +(define *html-entity-table* + (let ((v (make-vector 256 #f))) + (for-each (lambda (entry) + (vector-set! v + (char->ascii (car entry)) + (cdr entry))) + *html-entity-alist*) + v)) + +(define (string-set-substring! t start s) + (let* ((l (string-length s)) + (end (+ l start))) + (do ((i start (+ 1 i))) + ((= i end) t) + (string-set! t i (string-ref s (- i start)))))) + +(define (escape-html s) + (let ((target-length + (string-reduce 0 + (lambda (c i) + (+ i + (if (char-set-contains? *html-entities* c) + (string-length + (vector-ref *html-entity-table* + (char->ascii c))) + 1))) + s))) + (if (= target-length (string-length s)) + s + (let ((target (make-string target-length))) + (string-reduce + 0 + (lambda (c i) + (+ i + (if (char-set-contains? *html-entities* c) + (let ((entity (vector-ref *html-entity-table* (char->ascii c)))) + (string-set-substring! target i entity) + (string-length entity)) + (begin + (string-set! target i c) + 1)))) + s) + target)))) + +(define (emit-text s . maybe-port) + (if (null? maybe-port) + (write-string (escape-html s)) + (write-string (escape-html s) (car maybe-port)))) \ No newline at end of file diff --git a/http-top.scm b/http-top.scm new file mode 100644 index 0000000..954b401 --- /dev/null +++ b/http-top.scm @@ -0,0 +1,58 @@ +;;; Scheme Underground Web Server -*- Scheme -*- +;;; Olin Shivers + +;;; This file contains a few example top-level path-handlers and +;;; other useful fragments. + +;;; We import procedures from these structures: +;;; httpd-core +;;; cgi-server-package +;;; httpd-basic-handlers +;;; seval-handler-package + + +;;; - /h// => serve from ~user/public_html. +;;; - /seval You may POST Scheme code to this URL, and receive the output. +;;; - Otherwise, serve files from the standard HTTP demon repository. + +(define ph1 + (alist-path-dispatcher + `(("h" . ,(home-dir-handler "public_html")) + ("seval" . ,seval-handler) + ("cgi-bin" . ,(cgi-handler "/usr/local/etc/httpd/cgi-bin"))) + (rooted-file-handler "/usr/local/etc/httpd/htdocs"))) + + +;;; Do a rough approximation of NCSA httpd server semantics: +;;; - /~shivers/... serves file ~shivers/public_html/... +;;; - /cgi-bin/ passes control to script +;;; /usr/local/etc/httpd/cgi-bin/ +;;; - Otherwise, just serve files out of the standard directory. + +(define ph2 + (alist-path-dispatcher + `(("cgi-bin" . ,(cgi-handler "/usr/local/etc/httpd/cgi-bin"))) + (tilde-home-dir-handler "public_html" + (rooted-file-handler "/usr/local/etc/httpd/htdocs")))) + +;;; Greatest hits path handler. + +(define ph3 + (alist-path-dispatcher + `(("h" . ,(home-dir-handler "public_html")) + ("seval" . ,seval-handler) + ("cgi-bin" . ,(cgi-handler "/usr/local/etc/httpd/cgi-bin"))) + (tilde-home-dir-handler "public_html" + (rooted-file-handler "/usr/local/etc/httpd/htdocs")))) + + + +;;; Crank up a server on port 8001, first resetting our identity to +;;; user "nobody". Initialise the request-invariant part of the CGI +;;; env before starting. + +(define (httpd1) + (set-gid -2) ; Should be (set-uid (->uid "nobody")) + (set-uid -2) ; but NeXTSTEP loses. + (initialise-request-invariant-cgi-env) + (httpd ph 8001 "/usr/local/etc/httpd/htdocs")) diff --git a/httpd-access-control.scm b/httpd-access-control.scm new file mode 100644 index 0000000..be61e4a --- /dev/null +++ b/httpd-access-control.scm @@ -0,0 +1,77 @@ +;;; http server in the Scheme Shell -*- Scheme -*- +;;; Access control +;;; Copyright (c) 1996 by Mike Sperber. + +;;; This code is very rudimentary at the moment and up for some expansion. +;;; Right now, it is primarily useful for running the server through a +;;; web accelerator + +;;; Also notes that this code doesn't work in vanilla 0.4.4 as +;;; host-info is broken. + +(define (access-denier . hosts) + (lambda (info) + (and (any? (lambda (host) + (host-matches? info host)) + hosts) + 'deny))) + +(define (access-allower . hosts) + (lambda (info) + (and (any? (lambda (host) + (host-matches? info host)) + hosts) + 'allow))) + +(define (access-controller . controls) + (lambda (info) + (let loop ((controls controls)) + (if (null? controls) + #f + (cond + (((car controls) info) => identity) + (else (loop (cdr controls)))))))) + +(define (access-controlled-handler control ph) + (lambda (path req) + (if (eq? + (control (host-info (socket-remote-address (request:socket req)))) + 'deny) + (http-error http-reply/forbidden req) + (ph path req)))) + +(define (address->list address) + (list (arithmetic-shift (bitwise-and address #xff000000) -24) + (arithmetic-shift (bitwise-and address #xff0000) -16) + (arithmetic-shift (bitwise-and address #xff00) -8) + (bitwise-and address #xff))) + +(define (host-matches? info host) + (cond + ((list? host) + (let ((len (length host))) + (any? (lambda (address) + (equal? (take len (address->list address)) host)) + (host-info:addresses info)))) + (else ; (string? host) + (any? (lambda (name) + (string-match host (downcase-string name))) + (cons (host-info:name info) + (host-info:aliases info)))))) + +(define normalize-host + (let ((split (infix-splitter "\\.")) + (number (make-regexp "[0-9]+"))) + (lambda (host) + (let ((components (split host))) + (if (every? (lambda (component) + (regexp-exec number component)) + components) + (map string->number components) + host))))) + +(define (take n l) + (let loop ((n n) (l l) (r '())) + (if (zero? n) + (reverse r) + (loop (- n 1) (cdr l) (cons (car l) r))))) \ No newline at end of file diff --git a/httpd-core.scm b/httpd-core.scm new file mode 100644 index 0000000..effb1a5 --- /dev/null +++ b/httpd-core.scm @@ -0,0 +1,480 @@ +;;; http server in the Scheme Shell -*- Scheme -*- +;;; Olin Shivers + +;;; Copyright (c) 1994 by Brian D. Carlstrom and Olin Shivers. + +;;; Problems: +;;; Need to html-quote URI's when printing them out to HTML text. + +;;; This file implements the core of an HTTP server: code to establish +;;; net connections, read and parse requests, and handler errors. +;;; It does not have the code to actually handle requests. That's up +;;; to other modules, and could vary from server to server. To build +;;; a complete server, you need to define path handlers (see below) -- +;;; they determine how requests are to be handled. +;;; +;;; A draft document detailing the HTTP 1.0 protocol can be found at +;;; http://www.w3.org/hypertext/WWW/Protocols/HTTP1.0/ +;;; draft-ietf-http-spec.html + +;;; Imports and non-R4RS'isms +;;; \r \n in strings for cr and lf. +;;; let-optionals (let-opt) +;;; receive values (MV return) +;;; scsh system calls +;;; rfc822 header parsing +;;; crlf-io (read cr/lf terminated lines) +;;; when, unless, switch, ? (conditionals) +;;; uri, url packages +;;; defrec package (record structures) +;;; defenum (enumerated types) +;;; ignore-errors (HANDLE package) +;;; string hacking stuff +;;; char-set stuff +;;; format (Formatted output) +;;; httpd error stuff +;;; condition-stuff (S48 error conditions) + +;;; Constants +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define server/version "Scheme-Underground/1.0") +(define server/protocol "HTTP/1.0") + +;;; Configurable Variables +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address for config error reports +(define server/admin "sperber@informatik.uni-tuebingen.de") +(define (set-server/admin! s) + (set! server/admin s)) + + +(define *http-log?* #t) +(define *http-log-port* (error-output-port)) +(define (http-log fmt . args) + (? (*http-log?* + (apply format *http-log-port* fmt args) + (force-output *http-log-port*)))) + + +;;; (httpd path-handler [port server-root-dir]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The server top-level. PATH-HANDLER is the top-level request path handler -- +;;; the procedure that actually deals with the request. PORT defaults to 80. +;;; SERVER-ROOT-DIR is the server's working directory; it defaults to +;;; /usr/local/etc/httpd + +(define (httpd path-handler . args) + (let-optionals args ((port 80) + (root-dir "/usr/local/etc/httpd")) + (with-cwd root-dir + (bind-listen-accept-loop protocol-family/internet + + ;; Why is the output socket unbuffered? So that if the client + ;; closes the connection, we won't lose when we try to close the + ;; socket by trying to flush the output buffer. + (lambda (sock addr) ; Called once for every connection. + (set-port-buffering (socket:outport sock) bufpol/none) ; No buffering + + (fork (lambda () ; Kill this line to bag forking. + (let* ((i (dup->inport (socket:inport sock) 0)) + (o (dup->outport (socket:outport sock) 1))) + (set-port-buffering i bufpol/none) ; Should propagate. ecch. + (with-current-input-port i ; bind the + (with-current-output-port o ; stdio ports, & + (process-toplevel-request path-handler sock))) ; do it. + (close-input-port i) ; Really only necessary + (close-output-port o)))) ; for non-forking variant. + + (reap-zombies) ; Clean up: reap dead children, + (close-socket sock)) ; and close socket. + + port)))) + +;;; Top-level http request processor +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Read, parse, and handle a single http request. The only thing that makes +;;; this complicated is handling errors -- as a server, we can't just let the +;;; standard error handlers toss us into a breakpoint. We have to catch the +;;; error, send an error reply back to the client if we can, and then keep +;;; on trucking. This means using the S48's condition system to catch and +;;; handle the various errors, which introduces a major point of R4RS +;;; incompatibiliy -- R4RS has no exception system. So if you were to port +;;; this code to some other Scheme, you'd really have to sit down and think +;;; about this issue for a minute. + +(define (process-toplevel-request handler sock) + ;; This top-level error-handler catches *all* uncaught errors and warnings. + ;; If the error condition is a reportable HTTP error, we send a reply back + ;; to the client. In any event, we abort the transaction, and return from + ;; PROCESS-TOPLEVEL-REQUEST. + ;; + ;; We *oughta* map non-http-errors into replies anyway. + (with-fatal-error-handler (lambda (c decline) ; No call to decline + (http-log "Error! ~s~%" c) + (if (http-error? c) ; -- we handle all. + (apply send-http-error-reply + (condition-stuff c)))) + + (let ((req (with-fatal-error-handler ; Map syntax errors + (lambda (c decline) ; to http errors. + (if (fatal-syntax-error? c) + (apply http-error http-reply/bad-request + #f ; No request yet. + "Request parsing error -- report to client maintainer." + (condition-stuff c)) + (decline))) ; Actual work: + (parse-http-request sock)))) ; (1) Parse request. + (handler (http-url:path (request:url req)) req)))) ; (2) Deal with it. + + +;;;; HTTP request parsing +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; This code defines the http REQUEST data structure, and provides +;;;; code to read requests from an input port. + +(define-record request + method ; A string such as "GET", "PUT", etc. + uri ; The escaped URI string as read from request line. + url ; An http URL record (see url.scm). + version ; A (major . minor) integer pair. + headers ; An rfc822 header alist (see rfc822.scm). + socket) ; The socket connected to the client. + +;;; A http protocol version is an integer pair: (major . minor). + +(define (version< v1 v2) + (or (< (car v1) (car v2)) + (and (= (car v1) (car v2)) + (< (cdr v1) (cdr v2))))) + +(define (version<= v1 v2) (not (version< v2 v1))) + +(define (v0.9-request? req) + (version<= (request:version req) '(0 . 9))) + + +(define (version->string v) + (string-append "HTTP/" + (number->string (car v)) + "." + (number->string (cdr v)))) + + +;;; Read and parse an http request from INPORT. +;;; +;;; Note: this parser parses the URI into an http URL record. If the URI +;;; isn't an http URL, the parser fails. This may not be right. There's +;;; nothing in the http protocol to prevent you from passing a non-http +;;; URI -- what this would mean, however, is not clear. Like so much of +;;; the Web, the protocols are redundant, underconstrained, and ill-specified. + +(define (parse-http-request sock) + (let ((line (read-crlf-line))) + + ;; Blat out some logging info. + + (if *http-log?* + (let* ((addr (socket-remote-address sock)) + (host (host-info:name (host-info addr)))) + (http-log "~a: ~a~%" host line))) + + (if (eof-object? line) + (fatal-syntax-error "EOF while parsing request.") + + (let* ((elts (string->words line)) ; Split at white-space. + (version (switch = (length elts) + ((2) '(0 . 9)) + ((3) (parse-http-version (caddr elts))) + (else (fatal-syntax-error "Bad HTTP version."))))) + + (let* ((meth (car elts)) + (uri-string (cadr elts)) + (url (parse-http-servers-url-fragment uri-string sock)) + (headers (if (equal? version '(0 . 9)) '() + (read-rfc822-headers)))) + (make-request meth uri-string url version headers sock)))))) + + + +;;; Parse the URL, but if it begins without the "http://host:port" prefix, +;;; interpolate one from SOCKET. It would sleazier but faster if we just +;;; computed the default host and port at server-startup time, instead of +;;; on every request. + +(define (parse-http-servers-url-fragment uri-string socket) + (receive (scheme path search frag-id) (parse-uri uri-string) + (if frag-id ; Can't have a #frag part. + (fatal-syntax-error "HTTP URL contains illegal # suffix." + uri-string) + + (if scheme + (if (string-ci=? scheme "http") ; Better be an http url. + (parse-http-url path search #f) + (fatal-syntax-error "Non-HTTP URL" uri-string)) + + ;; Interpolate the userhost struct from our net connection. + (if (and (pair? path) (string=? (car path) "")) + (let* ((addr (socket-local-address socket)) + (local-name (my-fqdn addr)) + (portnum (my-port addr))) + (make-http-url (make-userhost #f #f + local-name + (number->string portnum)) + (map unescape-uri (cdr path)) ;Skip initial /. + search + #f)) + + (fatal-syntax-error "Path fragment must begin with slash" + uri-string)))))) + + +(define parse-http-version + (let ((re (make-regexp "^HTTP/([0-9]+)\\.([0-9]+)$")) + (lose (lambda (s) (fatal-syntax-error "Bad HTTP version" s)))) + (lambda (vstring) + (let ((m (regexp-exec re vstring))) + (if m + (cons (or (string->number (match:substring m 1) 10) (lose vstring)) + (or (string->number (match:substring m 2) 10) (lose vstring))) + (lose vstring)))))) + + +;;; Split string into a list of whitespace-separated strings. +;;; This could have been trivially defined in scsh as (field-splitter " \t\n") +;;; but I hand-coded it because it's short, and I didn't want invoke the +;;; regexp machinery for something so simple. + +(define non-whitespace (char-set-invert char-set:whitespace)) + +(define (string->words s) + (let recur ((start 0)) + (? ((char-set-index s non-whitespace start) => + (lambda (start) + (? ((char-set-index s char-set:whitespace start) => + (lambda (end) + (cons (substring s start end) + (recur end)))) + (else (list (substring s start (string-length s))))))) + (else '())))) + + +;;;; Sending replies +;;;;;;;;;;;;;;;;;;;; + +;;; Reply codes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (define http-reply/ok 200), etc. +;;; Also, build an alist HTTP-REPLY-TEXT-TABLE mapping integer reply codes +;;; to their diagnostic text messages. + +(define-syntax define-http-reply-codes + (syntax-rules () + ((define-http-reply-codes table set (name val msg) ...) + (begin (define table '((val . msg) ...)) + (define-enum-constant set name val) + ...)))) + +(define-http-reply-codes http-reply-text-table http-reply + (ok 200 "OK") + (created 201 "Created") + (accepted 202 "Accepted") + (prov-info 203 "Provisional Information") + (no-content 204 "No Content") + + (mult-choice 300 "Multiple Choices") + (moved-perm 301 "Moved Permanently") + (moved-temp 302 "Moved Temporarily") + (method 303 "Method (obsolete)") + (not-mod 304 "Not Modified") + + (bad-request 400 "Bad Request") + (unauthorized 401 "Unauthorized") + (payment-req 402 "Payment Required") + (forbidden 403 "Forbidden") + (not-found 404 "Not Found") + (method-not-allowed 405 "Method Not Allowed") + (none-acceptable 406 "None Acceptable") + (proxy-auth-required 407 "Proxy Authentication Required") + (timeout 408 "Request Timeout") + (conflict 409 "Conflict") + (gone 410 "Gone") + + (internal-error 500 "Internal Server Error") + (not-implemented 501 "Not Implemented") + (bad-gateway 502 "Bad Gateway") + (service-unavailable 503 "Service Unavailable") + (gateway-timeout 504 "Gateway Timeout")) + +(define (reply-code->text code) + (cdr (assv code http-reply-text-table))) + + +;;; Text generation utilities. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (time->http-date-string time) + (format-date "~A, ~d-~b-~y ~H:~M:~S GMT" (date time 0))) + +;;; Output the first chunk of a reply header. + +(define (begin-http-header out reply-code) + (format out "~A ~d ~A\r~%" + server/protocol reply-code (reply-code->text reply-code)) + (format out "Date: ~A\r~%" (time->http-date-string (time))) + (format out "Server: ~A\r~%" server/version)) + +(define (title-html out message new-protocol?) + (if new-protocol? (write-crlf out)) ; Separate html from headers. + (format out "~%~%~A~%~%~%~%" message) + (format out "~%

~A

~%" message)) + + +;;; (send-http-error-reply reply-code req [message . extras]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Take an http-error condition, and format it into a reply to the client. +;;; +;;; As a special case, request REQ is allowed to be #f, meaning we haven't +;;; even had a chance to parse and construct the request. This is only used +;;; for 400 BAD-REQUEST error report, and we make minimal assumptions in this +;;; case (0.9 protocol for the reply, for example). I might be better off +;;; writing a special-case procedure for that case... + +;;; SEND-HTTP-ERROR-REPLY is called from error handlers, so to avoid +;;; infinite looping, if an error occurs while it is running, we just +;;; silently return. (We no longer need to do this; I have changed +;;; WITH-FATAL-ERROR-HANDLER* so that this is not necessary, but I'll +;;; leave it in to play it safe.) + +(define (send-http-error-reply reply-code req . args) + (ignore-errors (lambda () ; Ignore errors -- see note above. + (apply really-send-http-error-reply reply-code req args)))) + +(define (really-send-http-error-reply reply-code req . args) + (let* ((message (if (pair? args) (car args))) + (extras (if (pair? args) (cdr args) '())) + + (new-protocol? (and req (not (v0.9-request? req)))) ; 1.0 or better? + + ;; Is it OK to send back an HTML body explaining things? + (html-ok? (or (not req) + (not (string=? (request:method req) "HEAD")))) + + (out (current-output-port)) + + (generic-title (lambda () + (title-html out + (reply-code->text reply-code) + new-protocol?))) + + (do-msg (lambda () (? (message (display message out) (newline out)))))) + + (if new-protocol? (begin-http-header out reply-code)) + + ;; Don't output the blank line, as individual clauses might + ;; want to add more headers. + (if html-ok? (write-string "Content-type: text/html\r\n" out)) + + ;; If html-ok?, we must send back some html, with the tag unclosed. + (switch = reply-code + + ;; This error reply requires two args: message is the new URI: field, + ;; and the first EXTRA is the older Location: field. + ((http-reply/moved-temp http-reply/moved-perm) + (when new-protocol? + (format out "URI: ~A\r~%" message) + (format out "Location: ~A\r~%" (car extras))) + (when html-ok? + (title-html out "Document moved" new-protocol?) + (format out + "This document has ~A moved to a new location.~%" + (if (= reply-code http-reply/moved-temp) "temporarily" "permanently") + message))) + + ((http-reply/bad-request) + (when html-ok? + (generic-title) + (write-string "

Client sent a query that this server could not understand.\n" + out) + (if message (format out "
~%Reason: ~A~%" message)))) + + ((http-reply/unauthorized) + (if new-protocol? + (format out "WWW-Authenticate: ~A\r~%\r~%" message)) ; Vas is das? + (when html-ok? + (title-html out "Authorization Required" new-protocol?) + (write-string "

Browser not authentication-capable or\n" out) + (write-string "authentication failed.\n" out) + (if message (format out "~a~%" message)))) + + ((http-reply/forbidden) + (unless html-ok? + (title-html out "Request not allowed." new-protocol?) + (format out "Your client does not have permission to perform a ~A~%" + (request:method req)) + (format out "operation on url ~a.~%" (request:uri req)) + (if message (format out "

~%~a~%" message)))) + + ((http-reply/not-found) + (when html-ok? + (title-html out "URL not found" new-protocol?) + (write-string "

The requested URL was not found on this server.\n" + out) + (if message (format out "

~%~a~%" message)))) + + ((http-reply/internal-error) + (format (error-output-port) "ERROR: ~A~%" message) + (when html-ok? + (generic-title) + (format out "The server encountered an internal error or +misconfiguration and was unable to complete your request. +

+Please inform the server administrator, ~A, of the circumstances leading to +the error, and time it occured.~%" + server/admin) + (if message (format out "

~%~a~%" message)))) + + ((http-reply/not-implemented) + (when html-ok? + (generic-title) + (format out "This server does not currently implement +the requested method (~A).~%" + (request:method req)) + (if message (format out "

~a~%" message)))) + + (else (if html-ok? (generic-title)))) + + (? (html-ok? + ;; Output extra stuff and close the tag. + (for-each (lambda (x) (format out "
~s~%" x)) extras) + (write-string "\n" out))) + (force-output out) +; (if bkp? (breakpoint "http error")) + )) + + +;;; Return my Internet host name (my fully-qualified domain name). +;;; This works only if an actual resolver is behind host-info. +;;; +;;; On systems that do DNS via NIS/Yellow Pages, you only get an +;;; unqualified hostname. Also, in case of aliased names, you just +;;; might get the wrong one. Furthermore, you may get screwed in the +;;; presence of a server accelerator such as Squid. +;;; +;;; In these cases, and on NeXTSTEP, you'll have to set it by hand. + +(define (my-fqdn addr) + (or *my-fqdn* + (host-info:name (host-info addr)))) + +(define *my-fqdn* #f) +(define (set-my-fqdn! fqdn) + (set! *my-fqdn* fqdn)) + +(define (my-port addr) + (or *my-port* + (receive (ip-addr portnum) (socket-address->internet-address addr) + portnum))) +(define *my-port* #f) +(define (set-my-port! portnum) + (set! *my-port* portnum)) diff --git a/httpd-error.scm b/httpd-error.scm new file mode 100644 index 0000000..754dc14 --- /dev/null +++ b/httpd-error.scm @@ -0,0 +1,131 @@ +;;; Error stuff for the http server. -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; An http error condition is a data structure with the following pieces: +;;; (error-code request message . irritants) +;;; You recognise one with HTTP-ERROR?, and retrieve the pieces with +;;; CONDITION-STUFF. +;;; +;;; You can find out more about the Scheme 48 condition system by consulting +;;; s48-error.txt, where I scribbled some notes as I was browsing the source +;;; code when I wrote this file. + +;;; ,open conditions signals handle + +;;; HTTP error condition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Define a sub-type of the S48 error condition, the HTTP error condition. +;;; An HTTP error is one that corresponds to one of the HTTP error reply +;;; codes, so you can reliably use an HTTP error condition to construct an +;;; error reply message to send back to the HTTP client. + +(define-condition-type 'http-error '(error)) + +(define http-error? (condition-predicate 'http-error)) + +(define (http-error error-code req . args) + (apply signal 'http-error error-code req args)) + +;;; Syntax error condition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Scheme 48 has a "syntax error" error condition, but it isn't an error +;;; condition! It's a warning condition. I don't understand this. +;;; We define a *fatal* syntax error here for the parsers to use. + +(define-condition-type 'fatal-syntax-error '(error)) + +(define fatal-syntax-error? (condition-predicate 'fatal-syntax-error)) + +(define (fatal-syntax-error msg . irritants) + (apply signal 'fatal-syntax-error msg irritants)) + + +;;; (with-fatal-error-handler* handler thunk) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Call THUNK, and return whatever it returns. If THUNK signals a condition, +;;; and that condition is an error condition (or a subtype of error), then +;;; HANDLER gets a chance to handle it. +;;; The HANDLER proc is applied to two values: +;;; (HANDLER condition decline) +;;; HANDLER's continuation is WITH-FATAL-ERROR-HANDLER*'s; whatever HANDLER +;;; returns is returned from WITH-FATAL-ERROR-HANDLER. HANDLER declines to +;;; handle the error by throwing to DECLINE, a nullary continuation. +;;; +;;; Why is it called with-FATAL-error-handler*? Because returning to the +;;; guy that signalled the error is not an option. +;;; +;;; Why the nested outer pair of CALL/CC's? Well, what happens if the user's +;;; error handler *itself* raises an error? This could potentially give +;;; rise to an infinite loop, because WITH-HANDLER runs its handler in +;;; the original condition-signaller's context, so you'd search back for a +;;; handler, and find yourself again. For example, here is an infinite loop: +;;; +;;; (with-handler (lambda (condition more) +;;; (display "Loop!") +;;; (error "ouch")) ; Get back, Loretta. +;;; (lambda () (error "start me up"))) +;;; +;;; I could require W-F-E-H* users to code carefully, but instead I make sure +;;; the user's fatal-error handler runs in w-f-e-h*'s handler context, so +;;; if it signals a condition, we'll start the search from there. That's the +;;; point of continuation K. When the original thunk completes successfully, +;;; we dodge the K hackery by using ACCEPT to make a normal return. + +(define (with-fatal-error-handler* handler thunk) + (call-with-current-continuation + (lambda (accept) + ((call-with-current-continuation + (lambda (k) + (with-handler (lambda (condition more) + (if (error? condition) + (call-with-current-continuation + (lambda (decline) + (k (lambda () (handler condition decline)))))) + (more)) ; Keep looking for a handler. + (lambda () (call-with-values thunk accept))))))))) + +(define-syntax with-fatal-error-handler + (syntax-rules () + ((with-fatal-error-handler handler body ...) + (with-fatal-error-handler* handler + (lambda () body ...))))) + +;This one ran HANDLER in the signaller's condition-handler context. +;It was therefore susceptible to infinite loops if you didn't code +;your handler's carefully. +; +;(define (with-fatal-error-handler* handler thunk) +; (call-with-current-continuation +; (lambda (accept) +; (with-handler (lambda (condition more) +; (if (error? condition) +; (call-with-current-continuation +; (lambda (decline) +; (accept (handler condition decline))))) +; (more)) ; Keep looking for a handler. +; thunk)))) + +;;; (%error-handler-cond kont eh-clauses cond-clauses) +;;; Transform error-handler clauses into COND clauses by wrapping continuation +;;; KONT around the body of each e-h clause, so that if it fires, the result +;;; is thrown to KONT, but if no clause fires, the cond returns to the default +;;; continuation. + +;(define-syntax %error-handler-cond +; (syntax-rules (=> else) +; +; ((%error-handler-cond kont ((test => proc) clause ...) (ans ...)) +; (%error-handler-cond kont +; (clause ...) +; ((test => (lambda (v) (kont (proc v)))) ans ...))) +; +; ((%error-handler-cond kont ((test body ...) clause ...) (ans ...)) +; (%error-handler-cond kont +; (clause ...) +; ((test (kont (begin body ...))) ans ...))) +; +; ((%error-handler-cond kont ((else body ...)) (ans-clause ...)) +; (cond (else body ...) ans-clause ...)) +; +; ((%error-handler-cond kont () (ans-clause ...)) +; (cond ans-clause ...)))) diff --git a/httpd-handlers.scm b/httpd-handlers.scm new file mode 100644 index 0000000..06d6fce --- /dev/null +++ b/httpd-handlers.scm @@ -0,0 +1,534 @@ +;;; http server in the Scheme Shell -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; Imports and non-R4RS'isms +;;; scsh syscalls +;;; format Formatted output +;;; ?, UNLESS, SWITCH Conditionals +;;; httpd-core stuff +;;; httpd error stuff +;;; CONDITION-STUFF Scheme 48 error conditions +;;; url stuff + +;;; Path handlers +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Path handlers are the guys that actually perform the requested operation +;;; on the URL. The handler interface is +;;; (handler path-list request) +;;; The path-list is a URL path list that is a suffix of REQUEST's url's +;;; path-list. Path handlers can decide how to handle an operation by +;;; recursively keying off of the elements in path-list. +;;; +;;; The object-oriented view: +;;; One way to look at this is to think of the request's METHOD as a +;;; generic operation on the URL. Recursive path handlers do method +;;; lookup to determine how to implement a given operation on a particular +;;; path. +;;; +;;; The REQUEST is a request record, as defined in httpd-core.scm, containing +;;; the details of the client request. However, path handlers should *not* +;;; read the request entity from, or write the reply to the request's socket. +;;; Path-handler I/O should be done on the current i/o ports: if the handler +;;; needs to read an entity, it should read it from (CURRENT-INPUT-PORT); when +;;; the handler wishes to write a reply, it should write it to +;;; (CURRENT-OUTPUT-PORT). This makes it easy for the procedure that called +;;; the handler to establish I/O indirections or filters if it so desires. +;;; +;;; This file implements a basic set of path handlers and some useful +;;; support procedures for them. + + +(define server/buffer-size 8192) ; WTF + + +;;; (alist-path-dispatcher hander-alist default-handler) -> handler +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This function creates a table-driven path-handler that dispatches off +;;; of the car of the request path. The handler uses the car to index into +;;; a path-handler alist. If it finds a hit, it recurses using the table's +;;; path-handler. If no hits, it handles the path with a default handler. +;;; An alist handler is passed the tail of the original path; the +;;; default handler gets the entire original path. +;;; +;;; This procedure is how you say: "If the first element of the URL's +;;; path is 'foo', do X; if it's 'bar', do Y; otherwise, do Z." + +(define (alist-path-dispatcher handler-alist default-handler) + (lambda (path req) + (? ((and (pair? path) (assoc (car path) handler-alist)) => + (lambda (entry) ((cdr entry) (cdr path) req))) + (else (default-handler path req))))) + + +;;; (home-dir-handler user-public-dir) -> handler +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Return a path handler that looks things up in a specific directory +;;; in the user's home directory. If ph = (home-dir-handler "public_html") +;;; then ph is a path-handler that serves files out of peoples' public_html +;;; subdirectory. So +;;; (ph '("shivers" "hk.html") req) +;;; will serve the file +;;; ~shivers/public_html/hk.html +;;; The path handler treats the URL path as ( . ), +;;; serving +;;; ~// + +(define (home-dir-handler user-public-dir) + (lambda (path req) + (if (pair? path) + (serve-rooted-file-path (string-append (http-homedir (car path) req) + "/" + user-public-dir) + (cdr path) + file-serve + req) + (http-error http-reply/bad-request req + "Path contains no home directory.")))) + +;;; (tilde-home-dir-handler user-public-dir default) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; If the car of the path is a tilde-marked home directory (e.g., "~kgk"), +;;; do home-directory service as in HOME-DIR-HANDLER, otherwise punt to the +;;; default handler. + +(define (tilde-home-dir-handler user-public-dir default-ph) + (lambda (path req) + (if (and (pair? path) ; Is it a ~foo/... + (let ((head (car path))) ; home-directory path? + (and (> (string-length head) 0) + (char=? (string-ref head 0) #\~)))) + + (let* ((tilde-home (car path)) ; Yes. + (slen (string-length tilde-home)) + (subdir (string-append + (http-homedir (substring tilde-home 1 slen) req) + "/" + user-public-dir))) + (serve-rooted-file-path subdir (cdr path) file-serve req)) + + (default-ph path req)))) ; No. + + +;;; Make a handler that serves files relative to a particular root +;;; in the file system. You may follow symlinks, but you can't back up +;;; past ROOT with ..'s. + +(define (rooted-file-handler root) + (lambda (path req) + (serve-rooted-file-path root path file-serve req))) + +;;; Dito, but also serve directory indices for directories without +;;; index.html. ICON-NAME specifies how to generate the links to +;;; various decorative icons for the listings. It can either be a +;;; prcoedure which gets passed one of the icon tags in TAG->ICON and +;;; is expected to return a link pointing to the icon. If it is a +;;; string, that is taken as prefix to which the names from TAG->ICON +;;; are appended. + +(define (rooted-file-or-directory-handler root icon-name) + (let ((file-serve-and-dir (file-server-and-dir icon-name))) + (lambda (path req) + (serve-rooted-file-path root path file-serve-and-dir req)))) + + +;;; The null path handler -- handles nothing, sends back an error reply. +;;; Can be useful as the default in table-driven path handlers. + +(define (null-path-handler path req) + (http-error http-reply/not-found req)) + + +;;;; Support procs for the path handlers +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +;;; (SERVE-ROOTED-FILE-PATH root file-path req) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Do a request for a file. The file-name is determined by appending the +;;; the FILE-PATH list the string ROOT. E.g., if +;;; ROOT = "/usr/shivers" FILE-PATH = ("a" "b" "c" "foo.html") +;;; then we serve file +;;; /usr/shivers/a/b/c/foo.html +;;; Elements of FILE-PATH are *not allowed* to contain .. elements. +;;; (N.B.: Although the ..'s can appear in relative URI's, /foo/../ path +;;; sequences are processed away by the browser when the URI is converted +;;; to an absolute URI before it is sent off to the server.) +;;; It is possible to sneak a .. past this kind of front-end resolving by +;;; encoding it (e.g., "foo%2F%2E%2E" for "foo/.."). If the client tries +;;; this, SERVE-ROOTED-FILE-PATH will catch it, and abort the transaction. +;;; So you cannot make the reference back up past ROOT. E.g., this is +;;; not allowed: +;;; FILE-PATH = ("a" "../.." "c" "foo.html") +;;; +;;; Only GET and HEAD ops are provided. +;;; The URL's component must be #f. +;;; The file is served if the server has read or stat(2) access to it, +;;; respectively. If the server is run as root, this might be a problem. +;;; +;;; FILE-SERVE is a procedure which gets passed the file name, the +;;; path, and the HTTP request to serve the file propert after the +;;; security checks. Look in ROOTED-FILE-HANDLER and +;;; ROOTED-FILE-OR-DIRECTORY-HANDLER for examples on how to feed this. + +(define (serve-rooted-file-path root file-path file-serve req) + (if (http-url:search (request:url req)) + (http-error http-reply/bad-request req + "Indexed search not provided for this URL.") + + (? ((dotdot-check root file-path) => + (lambda (fname) (file-serve fname file-path req))) + (else + (http-error http-reply/bad-request req + "URL contains unresolvable ..'s."))))) + + +;; Just (file-info fname) with error handling. + +(define (stat-carefully fname req) + (with-errno-handler + ((errno packet) + ((errno/noent) + (http-error http-reply/not-found req)) + ((errno/acces) + (http-error http-reply/forbidden req))) + (file-info fname #t))) + +;;; A basic file request handler -- ship the dude the file. No fancy path +;;; checking. That has presumably been taken care of. This handler only +;;; takes care of GET and HEAD methods. + +(define (file-serve-or-dir fname file-path req directory-serve) + (if (file-name-directory? fname) ; Simple index generation. + (directory-serve fname file-path req) + + (switch string=? (request:method req) + (("GET" "HEAD") ; Absolutely. + (let ((info (stat-carefully fname req))) + (case (file-info:type info) + + ((regular fifo socket) + (send-file fname info req)) + + ((directory) ; Send back a redirection "foo" -> "foo/" + (http-error http-reply/moved-perm req + (string-append (request:uri req) "/") + (string-append (http-url->string (request:url req)) + "/"))) + + (else (http-error http-reply/forbidden req))))) + + (else (http-error http-reply/method-not-allowed req))))) + +(define (directory-index-serve fname file-path req) + (file-serve (string-append fname "index.html") file-path req)) + +(define (file-serve fname file-path req) + (file-serve-or-dir fname file-path req directory-index-serve)) + +(define (tag->alt tag) + (case tag + ((directory) "[DIR]") + ((text) "[TXT]") + ((doc) "[DOC]") + ((image) "[IMG]") + ((movie) "[MVI]") + ((audio) "[AU ]") + ((archive) "[TAR]") + ((compressed) "[ZIP]") + ((uu) "[UU ]") + ((binhex) "[HQX]") + ((binary) "[BIN]") + (else "[ ]"))) + +;; These icons can, for example, be found in the cern-httpd-3.0 +;; distribution at http://www.w3.org/pub/WWW/Daemon/ + +(define (tag->icon tag) + (case tag + ((directory) "directory.xbm") + ((text) "text.xbm") + ((doc) "doc.xbm") + ((image) "image.xbm") + ((movie) "movie.xbm") + ((audio) "sound.xbm") + ((archive) "tar.xbm") + ((compressed) "compressed.xbm") + ((uu) "uu.xbm") + ((binhex) "binhex.xbm") + ((binary) "binary.xbm") + ((blank) "blank.xbm") + ((back) "back.xbm") + (else "unknown.xbm"))) + +(define (file-extension->tag fname) + (switch string-ci=? (file-name-extension fname) + ((".txt") 'text) + ((".doc" ".html" ".rtf" ".tex") 'doc) + ((".gif" ".jpg" ".jpeg" ".tiff" ".tif") 'image) + ((".mpeg" ".mpg") 'movie) + ((".au" ".snd" ".wav") 'audio) + ((".tar" ".zip" ".zoo") 'archive) + ((".gz" ".Z" ".z") 'compressed) + ((".uu") 'uu) + ((".hqx") 'binhex) + (else 'binary))) + +(define (file-tag fname type) + (case type + ((regular fifo socket) (file-extension->tag fname)) + ((directory) 'directory) + (else 'unknown))) + +(define (time->directory-index-date-string time) + (format-date "~d-~b-~y ~H:~M:~S GMT" (date time 0))) + +(define (read-max-lines fname max) + (call-with-input-file + fname + (lambda (port) + (let loop ((r "") (i max)) + (if (zero? i) + r + (let ((line (read-line port))) + (if (eof-object? line) + r + (loop (string-append r " " line) (- i 1))))))))) + +(define (string-cut s n) + (if (>= (string-length s) n) + (substring s 0 n) + s)) + +(define html-file-header + (let ((title-tag-regexp (make-regexp "<[Tt][Ii][Tt][Ll][Ee]>")) + (title-close-tag-regexp (make-regexp ""))) + (lambda (fname n) + (let ((stuff (read-max-lines fname 10))) + (cond + ((regexp-exec title-tag-regexp stuff) + => (lambda (open-match) + (cond + ((regexp-exec title-close-tag-regexp stuff + (match:end open-match)) + => (lambda (close-match) + (string-cut (substring stuff + (match:end open-match) + (match:start close-match)) + n))) + (else (string-cut (substring stuff + (match:end open-match) + (string-length stuff)) + n))))) + (else "")))))) + +(define (file-documentation fname n) + (cond + ((file-extension->content-type fname) + => (lambda (content-type) + (if (and (string=? content-type "text/html" ) + (file-readable? fname)) + (html-file-header fname n) + ""))) + (else ""))) + +(define (directory-index req dir icon-name) + + (define (pad-file-name file) + (write-string (make-string (- 21 (string-length file)) + #\space))) + + (define (emit-file-name file) + (let ((l (string-length file))) + (if (<= l 20) + (emit-text file) + (emit-text (substring file 0 20))))) + + (define (index-entry file) + (let* ((fname (directory-as-file-name (string-append dir file))) + (info (stat-carefully fname req)) + (type (file-info:type info)) + (size (file-info:size info)) + (tag (file-tag file type))) + (emit-tag #t 'img + (cons 'src (icon-name tag)) + (cons 'alt (tag->alt tag))) + (with-tag #t a ((href file)) + (emit-file-name file)) + (pad-file-name file) + (emit-text (time->directory-index-date-string (file-info:mtime info))) + (if size + (let* ((size-string + (string-append (number->string (quotient size 1024)) + "K")) + (size-string + (if (<= (string-length size-string) 7) + size-string + (string-append (number->string (quotient size (* 1024 1024))) + "M"))) + (size-string + (if (<= (string-length size-string) 8) + (string-append + (make-string (- 8 (string-length size-string)) #\space) + size-string) + size-string))) + (write-string size-string)) + (write-string (make-string 8 #\space))) + (write-char #\space) + (emit-text (file-documentation fname 24)) + (newline))) + + (let ((files (with-errno-handler + ((errno packet) + ((errno/acces) + (http-error http-reply/forbidden req))) + (directory-files dir)))) + (for-each index-entry files) + (length files))) + +(define (directory-server icon-name) + (let ((icon-name + (cond + ((procedure? icon-name) icon-name) + ((string? icon-name) + (lambda (tag) + (string-append icon-name (tag->icon tag)))) + (else tag->icon)))) + (lambda (fname file-path req) + (switch string=? (request:method req) + (("GET" "HEAD") + + (unless (eq? 'directory (file-info:type (stat-carefully fname req))) + (http-error http-reply/forbidden req)) + + (unless (v0.9-request? req) + (begin-http-header #t http-reply/ok) + (write-string "Content-type: text/html\r\n") + (write-string "\r\n")) + + (with-tag #t html () + (let ((title (string-append "Index of /" + (join-strings file-path "/")))) + (with-tag #t head () + (emit-title #t title)) + (with-tag #t body () + (emit-header #t 1 title) + (with-tag #t pre () + (emit-tag #t 'img + (cons 'src (icon-name 'blank)) + (cons 'alt " ")) + (write-string "Name ") + (write-string "Last modified ") + (write-string "Size ") + (write-string "Description") + (emit-tag #t 'hr) + (emit-tag #t 'img + (cons 'src (icon-name 'back)) + (cons 'alt "[UP ]")) + (unless (null? file-path) + (with-tag #t a ((href "..")) + (write-string "Parent directory")) + (newline)) + (let ((n-files (directory-index req fname icon-name))) + (emit-tag #t 'hr) + (format #t "~d files" n-files))))))) + (else (http-error http-reply/method-not-allowed req)))))) + +(define (index-or-directory-server icon-name) + (let ((directory-serve (directory-server icon-name))) + (lambda (fname file-path req) + (let ((index-fname (string-append fname "index.html"))) + + (if (file-readable? index-fname) + (file-serve index-fname file-path req) + (directory-serve fname file-path req)))))) + +(define (file-server-and-dir icon-name) + (let ((index-or-directory-serve (index-or-directory-server icon-name))) + (lambda (fname file-path req) + (file-serve-or-dir fname file-path req index-or-directory-serve)))) + +;;; Look up user's home directory, generating an HTTP error reply if you lose. + +(define (http-homedir username req) + (with-fatal-error-handler (lambda (c decline) + (apply http-error http-reply/bad-request req + "Couldn't find user's home directory." + (condition-stuff c))) + + (home-dir username))) + + +(define (send-file filename info req) + (with-errno-handler ((errno packet) + ((errno/acces) + (http-error http-reply/forbidden req)) + ((errno/noent) + (http-error http-reply/not-found req))) + (call-with-input-file filename + (lambda (in) + (let ((out (current-output-port))) + (unless (v0.9-request? req) + (begin-http-header out http-reply/ok) + (receive (filename content-encoding) + (file-extension->content-encoding filename) + (if content-encoding + (format out "Content-encoding: ~A\r~%" content-encoding)) + (? ((file-extension->content-type filename) => + (lambda (ct) (format out "Content-type: ~A\r~%" ct))))) + (format out "Last-modified: ~A\r~%" + (time->http-date-string (file-info:mtime info))) + (format out "Content-length: ~D\r~%" (file-info:size info)) + (write-string "\r\n" out)) + (copy-inport->outport in out)))))) + + +;;; Assemble a filename from ROOT and the elts of PATH-LIST. +;;; If the assembled filename contains a .. subdirectory, return #f, +;;; otw return the filename. + +(define dotdot-check + (let ((dotdot-re (make-regexp "(^|/)\\.\\.($|/)"))) ; Matches a .. subdir. + (lambda (root path-list) + (let ((fname (if (null? path-list) root ; Bogus hack. + (string-append (file-name-as-directory root) + (join-strings path-list "/"))))) + (and (not (regexp-exec dotdot-re fname)) ; Check for .. subdir. + fname))))) + + +(define (file-extension->content-type fname) + (switch string-ci=? (file-name-extension fname) + ((".html") "text/html") + ((".gif") "image/gif") + ((".jpg" ".jpeg") "image/jpeg") + ((".tiff" ".tif") "image/tif") + ((".rtf") "text/rtf") + ((".mpeg" ".mpg") "video/mpeg") + ((".au" ".snd") "audio/basic") + ((".wav") "audio/x-wav") + ((".dvi") "application/x-dvi") + ((".tex" ".latex") "application/latex") + ((".zip") "application/zip") + ((".tar") "application/tar") + ((".ps") "application/postscript") + (else #f))) + +(define (file-extension->content-encoding fname) + (cond + ((switch string-ci=? (file-name-extension fname) + ((".Z") "x-compress") + ((".gz") "x-gzip") + (else #f)) + => (lambda (encoding) + (values (file-name-sans-extension fname) encoding))) + (else (values fname #f)))) + +;;; Timeout on network writes? + +(define (copy-inport->outport in out) + (let ((buf (make-string server/buffer-size))) + (let loop () + (? ((read-string! buf in) => (lambda (nchars) + (write-string buf out 0 nchars) + (loop)))))) + (force-output out)) diff --git a/info-gateway.scm b/info-gateway.scm new file mode 100644 index 0000000..9b9978d --- /dev/null +++ b/info-gateway.scm @@ -0,0 +1,642 @@ +;;; GNU info -> HTML gateway for the SU web server. -*- Scheme -*- +;;; Copyright (c) 1996 by Mike Sperber. +;;; based on code with the same purpose by Gaebe Engelhart + + +;;; (info-handler parse-info reference find-icon address) -> handler +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This function creates a path handler that converts GNU info pages +;;; on-the-fly. It is highly parameterizable to accomodate a wide +;;; range of environments. The parameters specify how to find the +;;; source code for the info pages, and how to generate certain +;;; elements of the generated HTML output. +;;; +;;; PARSE-INFO specifies how to parse the URLs that end up in the +;;; handler. +;;; It can be: +;;; +;;; * a procedure which is called with the URL as its parameters. +;;; It is expected to return with two values, FIND-ENTRY and +;;; NODE-NAME. FIND-ENTRY, in turn, can be either a procedure +;;; which gets passed the file name of an info node and is +;;; supposed to return with an absolute name of same. If it is a +;;; list, that list is taken as a list of directories in which to +;;; search for the info files. NODE-NAME is supposed to be the +;;; name of an info node of the form (), extracted +;;; from the URL. +;;; +;;; * a list, in which case that is taken as a list of +;;; directories in which to search for the info files. The node +;;; name of a node is extracted from the URL by just taking the +;;; search component of the URL. +;;; +;;; * #f in which case the info path is taken from the environment +;;; variable INFOPATH, and the node name extraction works as +;;; above. +;;; +;;; REFERENCE specifies how to generate cross-references to other info +;;; nodes. It can be: +;;; +;;; * a procedure which gets called with the URL of the info page +;;; which contains the reference, and the node name of the node +;;; to be referenced. The procedure is expected to return the +;;; text for a link. +;;; +;;; * a string, in which case that is to be a prefix to which the +;;; node name is simply appended to yield the new link. +;;; +;;; * #f in which case all references have the form +;;; info?. +;;; +;;; FIND-ICON specifies to to find the various icons used to decorate +;;; info pages. It can be: +;;; +;;; * a procedure which gets passed one of the tags in +;;; DEFAULT-ICON-ALIST and is supposed to return a link for the +;;; appropriate icon (or #f if no icon is to be used) +;;; +;;; * a string which is taken as a prefix to which one of the +;;; appropriate icon name from DEFAULT-ICON-ALIST is appended. +;;; (Note that these icon names were stolen from the +;;; cern-httpd-3.0 distribution at +;;; http://www.w3.org/pub/WWW/Daemon/.) +;;; +;;; * a list which is taken as an alist of the same format as +;;; DEFAULT-ICON-ALIST. +;;; +;;; * #f in which case no icons are used. +;;; +;;; ADDRESS a string to be appended at the bottom of all info pages +;;; +;;; To install a vanilla info handler for a prefix "info?" that looks +;;; in the environment variable INFOPATH, just use something like +;;; (info-handler #f #f #f "Generated by info-gateway") + +;;; TODO: write a CGI version of this + +(define-condition-type 'info-gateway-error '(error)) + +(define info-gateway-error? (condition-predicate 'info-gateway-error)) + +(define (info-gateway-error msg . irritants) + (apply signal 'info-gateway-error msg irritants)) + +(define default-icon-alist + '((info . "infodoc.gif") + (up . "up.gif") + (next . "next.gif") + (previous . "prev.gif") + (menu . "menu.gif"))) + +(define (info-handler parse-info reference find-icon address) + (let ((icon-name + (cond + ((procedure? find-icon) find-icon) + ((string? find-icon) + (let ((alist + (map (lambda (entry) + (cons (car entry) + (string-append find-icon (cdr entry)))) + default-icon-alist))) + (lambda (tag) + (cond ((assq tag alist) => cdr) + (else #f))))) + ((list? find-icon) + (lambda (tag) + (cond ((assq tag find-icon) => cdr) + (else #f)))) + (else (lambda (tag) #f)))) + (parse-info-url + (cond + ((procedure? parse-info) parse-info) + ((list? parse-info) ; it's an info path + (lambda (url) + (values parse-info + (unescape-uri (http-url:search url))))) + (else + (let ((info-path ((infix-splitter ":") (getenv "INFOPATH")))) + (lambda (url) + (values info-path + (unescape-uri (http-url:search url)))))))) + (make-reference + (cond + ((procedure? reference) reference) + ((string? reference) + (lambda (url node-name) + (string-append reference node-name))) + (else + (lambda (url node-name) + (string-append "info?" node-name)))))) + + (lambda (path req) + (switch string=? (request:method req) + (("GET") + (with-fatal-error-handler + (lambda (c decline) + (cond + ((info-gateway-error? c) + (apply http-error http-reply/internal-error req + (condition-stuff c))) + ((http-error? c) + (apply http-error (car (condition-stuff c)) req + (cddr (condition-stuff c)))) + (else + (decline)))) + + (if (not (v0.9-request? req)) + (begin + (begin-http-header #t http-reply/ok) + (write-string "Content-type: text/html\r\n") + (write-string "\r\n"))) + + (receive (find-entry node-name) (parse-info-url (request:url req)) + (display-node node-name + (file-finder find-entry) + (referencer make-reference (request:url req)) + icon-name)) + + (with-tag #t address () + (write-string address)))) + (else (http-error http-reply/method-not-allowed req)))))) + +(define split-header-line + (let ((split (infix-splitter "(, *)|( +)|( *\t *)")) + (split-field (infix-splitter ": *"))) + (lambda (l) + (let ((fields (map split-field (split l)))) + + (define (search-field regexp) + (cond + ((any (lambda (field) + (string-match regexp (car field))) + fields) + => cadr) + (else #f))) + + (values (search-field "[F|f]ile") + (search-field "[N|n]ode") + (search-field "[U|u]p") + (search-field "[P|p]rev(ious)?") + (search-field "[N|n]ext")))))) + +(define (replace-if-empty-string s v) + (if (zero? (string-length s)) + v + s)) + +(define (string-newline->space s) + (string-map (lambda (c) + (if (char=? c #\newline) + #\space + c)) + s)) + +(define (parse-node-name node-name) + (cond + ((string-match "^\\((.*)\\)(.*)$" (string-newline->space node-name)) + => (lambda (match) + (values + (replace-if-empty-string (match:substring match 1) #f) + (replace-if-empty-string (match:substring match 2) "Top")))) + (else (values #f (string-newline->space node-name))))) + + +(define (unparse-node-name file node) + (let* ((ext (file-name-extension file)) + (file (if (string=? ext ".info") + (file-name-sans-extension file) + file))) + (receive (file node) (if (and (string=? "dir" file) + (not (string=? "" node)) + (not (string=? "Top" node))) + (values node "Top") + (values file node)) + (string-append "(" file ")" node)))) + +(define (display-icon file alt) + (emit-tag #t 'img + (cons 'src file) + (cons 'alt alt) + (cons 'align "bottom"))) + +(define (referencer make-reference old-entry) + (lambda (file node-name label . maybe-icon) + (receive (node-file node) (parse-node-name node-name) + (let ((file (or node-file file))) + (with-tag #t a ((href (make-reference + old-entry + (escape-uri (unparse-node-name file node))))) + (if (and (not (null? maybe-icon)) + (car maybe-icon)) + (display-icon (car maybe-icon) (cadr maybe-icon))) + (emit-text label)))))) + +(define node-prologue (ascii->char 31)) +(define node-epilogue-regexp + (make-regexp + (string-append (regexp-quote (string node-prologue)) + "|" + (regexp-quote (string (ascii->char 12)))))) + +(define (string-starts-with-char? s c) + (and (not (zero? (string-length s))) + (char=? c (string-ref s 0)))) + +(define (node-prologue? s) + (string-starts-with-char? s node-prologue)) +(define (node-epilogue? s) + (regexp-exec node-epilogue-regexp s)) + +;; Document title + +(define (display-title file node up previous next + display-reference icon-name) + + (define (maybe-display-header header icon alt) + (if header + (begin + (newline) + (with-tag #t b () + (display-reference file header header icon alt))))) + + (emit-title #t (string-append "Info Node: " + (unparse-node-name file node))) + (with-tag #t h1 () + (emit-tag #t 'img + (cons 'src (icon-name 'info)) + (cons 'alt "Info Node") + (cons 'align 'bottom)) + (write-string (unparse-node-name file node))) + (emit-tag #t 'hr) + (maybe-display-header next (icon-name 'next) "[Next]") + (maybe-display-header previous (icon-name 'previous) "[Previous]") + (maybe-display-header up (icon-name 'up) "[Up]") + + (if (or next previous up) + (emit-tag #t 'hr))) + +;; Text + + +;; Dealing with cross references +;; info sucks + +(define xref-marker-regexp (make-regexp "\\*[Nn]ote([ \n]|$)")) +(define xref-regexp (make-regexp "\\*[Nn]ote *([^:]*): *([^\t\n,.;:?!]*)")) + +(define max-xref-lines 3) + +(define complete-line + (let ((split-xref-markers (field-splitter xref-marker-regexp)) + (split-xrefs (field-splitter xref-regexp)) + (cr (string #\newline))) + (lambda (line port) + (let loop ((line line) (count max-xref-lines)) + (let ((xref-markers (split-xref-markers line)) + (xrefs (split-xrefs line))) + (if (= (length xref-markers) (length xrefs)) + line + (if (zero? count) + (info-gateway-error "invalid cross reference") + (let ((new-line (read-line port))) + (if (eof-object? new-line) + (info-gateway-error + "unexpected end of info file inside cross reference")) + (loop (string-append line cr new-line) (- count 1)))))))))) + + +(define (display-xref xref file display-reference) + (let* ((match (regexp-exec xref-regexp xref)) + (note (match:substring match 1)) + (node-name (match:substring match 2)) + (node-name (if (string=? "" node-name) note node-name)) + (node-name (substring node-name + (skip-whitespace node-name) + (string-length node-name)))) + (emit-text "See ") + (display-reference file node-name note))) + +(define display-text + (let ((split-xrefs (infix-splitter xref-regexp #f 'split))) + (lambda (line port file display-reference) + (let* ((line (complete-line line port)) + (components (split-xrefs line))) + ;; in components, every 2nd element is a cross reference + ;; also, it always has odd length or length zero + (if (not (null? components)) + (let loop ((components components)) + (emit-text (car components)) + (if (not (null? (cdr components))) + (begin + (display-xref (cadr components) file display-reference) + (loop (cddr components)))))) + (newline))))) + +;; Menus + +(define menu-regexp (make-regexp "^\\* +Menu:")) +(define menu-item-regexp (make-regexp "^\\* +")) + +(define (char-splitter c) + (lambda (s) + (cond ((index s c) + => (lambda (i) + (values (substring s 0 i) + (substring s (+ 1 i) (string-length s))))) + (else (values s ""))))) + +(define colon-split (char-splitter #\:)) + +(define (display-menu-item-header line port file display-reference icon-name) + (let ((menu-line-split (infix-splitter menu-item-regexp))) + (receive (note rest) (colon-split (cadr (menu-line-split line))) + (receive (node-name text) + (cond + ((string-match ": *(.*)" rest) + => (lambda (match) + (values note (match:substring match 1)))) + ((string-match "^ *([^.]*)\\.? *(.*)" rest) + => (lambda (match) + (values (match:substring match 1) + (match:substring match 2)))) + (else + (info-gateway-error "invalid menu item"))) + (emit-tag #t 'dt) + (display-reference file node-name note (icon-name 'menu) "*") + (newline) + (if (and (not (string=? "" text)) + (not (string=? "." text))) + (begin + (emit-tag #t 'dd) + (display-text text port file display-reference))))))) + +(define (display-menu line port file display-reference icon-name) + (emit-close-tag #t 'pre) + + (with-tag #t dl () + (let loop ((line line)) + (if (eof-object? line) + (info-gateway-error "unexpected end of info file")) + + (display-menu-item-header line port file display-reference icon-name) + + (let finish-item-loop () + (if (eof-object? line) + (info-gateway-error "unexpected end of info file")) + + (let ((line (read-line port))) + (cond + ((or (eof-object? line) + (node-epilogue? line) + (string=? "" line)) + (emit-tag #t 'pre) + (dispatch-line line port file display-reference icon-name)) + ((regexp-exec menu-item-regexp line) + (loop line)) + (else + (display-text line port file display-reference) + (finish-item-loop)))))))) + +;; Central dispatch + +(define (dispatch-line line port file display-reference icon-name) + (cond + ((or (eof-object? line) (node-epilogue? line)) #f) + ((string=? "" line) (emit-p #t) #t) + ((regexp-exec menu-regexp line) #t) ;; this should probably be expanded + ((regexp-exec menu-item-regexp line) + (display-menu line port file display-reference icon-name)) + (else + (display-text line port file display-reference) #t))) + +(define (display-body port file display-reference icon-name) + (let loop () + (let ((line (read-line port))) + (if (dispatch-line line port file display-reference icon-name) + (loop))))) + +(define (display-node node-name find-file display-reference icon-name) + (receive (file node) (parse-node-name node-name) + (receive (port file-header node-header up-header prev-header next-header) + (find-node file node find-file) + + (with-tag #t html () + (with-tag #t head () + (display-title file node-header up-header + prev-header next-header + display-reference icon-name)) + (with-tag #t body () + (with-tag #t pre () + (display-body port file display-reference icon-name)))) + + (close-input-port port)))) + +;; Finding nodes + +(define (ensure-node-prologue port msg) + (let ((line (read-line port))) + (if (or (eof-object? line) + (not (node-prologue line))) + (info-gateway-error "invalid info file" msg)))) + +(define (ensure-regexp-line port regexp msg) + (let ((line (read-line port))) + (if (or (eof-object? line) + (not (string-match regexp line))) + (info-gateway-error "invalid info file" msg)))) + +(define (ensure-tag-table-node port) + (ensure-regexp-line port "^Tag Table:" "no tag table")) +(define (ensure-indirect-tag-table-header port) + (ensure-regexp-line port "^\\(Indirect\\)" "no indirect tag")) + +(define split-indirection (infix-splitter " *: *")) +(define (parse-indirection line) + (let ((l (split-indirection line))) + (if (null? (cdr l)) + (info-gateway-error "invalid indirection entry in info file") + (let ((file (car l)) + (seek-pos (string->number (cadr l)))) + (if (not seek-pos) + (info-gateway-error "invalid indirection entry in info file")) + (cons file seek-pos))))) + +(define (read-indirection-table port) + (let loop ((table '())) + (let ((line (read-line port))) + (if (eof-object? line) + (info-gateway-error "invalid info file")) + (if (node-epilogue? line) + (reverse table) + (loop (cons (parse-indirection line) table)))))) + +(define tag-seek-separator (ascii->char 127)) + +(define parse-tag + (let ((split (infix-splitter ", *")) + (split-field (infix-splitter ": ")) + (split-node-info + (infix-splitter (regexp-quote (string tag-seek-separator))))) + + (define (barf) + (info-gateway-error "invalid tag entry in info file")) + + (lambda (line) + (let* ((fields (map split-field (split line))) + (file (cond + ((assoc "File" fields) + => (lambda (p) + (if (null? (cdr p)) (barf)) + (cadr p))) + (else #f)))) + (cond + ((assoc "Node" fields) + => (lambda (p) + (if (null? (cdr p)) (barf)) + (let ((s (split-node-info (cadr p)))) + (if (null? (cdr p)) (barf)) + (let* ((node (car s)) + (seek (string->number (cadr s)))) + (if (not seek) (barf)) + (values node file seek))))) + (else (barf))))))) + +(define (find-tag node port) + (let loop () + (let ((line (read-line port))) + (if (eof-object? line) + (info-gateway-error "invalid info file")) + (if (regexp-exec node-epilogue-regexp line) + (http-error http-reply/not-found #f "node not found")) + (receive (entry-node file seek) (parse-tag line) + (if (string=? node entry-node) + (cons file seek) + (loop)))))) + +(define (find-indirection-entry seek-pos indirection-table) + (let loop ((table indirection-table)) + (if (null? table) + (http-error http-reply/not-found #f "node not found")) + (let* ((entry (car table)) + (pos (cdr entry))) + (if (and (>= seek-pos pos) + (or (null? (cdr table)) + (let* ((next-entry (cadr table)) + (next-pos (cdr next-entry))) + (< seek-pos next-pos)))) + entry + (loop (cdr table)))))) + +(define (file-finder with) + (cond ((procedure? with) with) + ((list? with) + (lambda (file) + (find-info-file file with))))) + +(define (find-node-port-with-tag-entry node tag-entry ? find-file) + (let* ((port (if (input-port? ?) ? #f)) + (indirection-table (if port #f ?)) + (seek-pos (cdr tag-entry)) + (indirection-entry + (and indirection-table + (find-indirection-entry seek-pos indirection-table))) + (seek-pos (if indirection-entry + (- seek-pos (cdr indirection-entry)) + seek-pos)) + ;; that's what the documentation says ... + (seek-pos (if (>= seek-pos 1000) + (- seek-pos 1000) + 0)) + (file (or (car tag-entry) + (and indirection-entry + (car indirection-entry)))) + (port (if file + (begin + (if port (close-input-port port)) + (open-input-file (find-file file))) + port))) + (seek port seek-pos) + port)) + +(define (find-node file node find-file) + (if (not file) + (http-error http-reply/not-found #f + "no file in info node specification")) + + (let* ((fname (find-file file)) + (port (open-input-file fname))) + (let loop ((port port)) + (let ((line (read-line port))) + (if (eof-object? line) + (http-error http-reply/not-found #f "info node not found")) + (if (node-prologue? line) + (let ((header (read-line port))) + (if (eof-object? header) + (info-gateway-error "invalid info file")) + (cond + + ((string-match "^Indirect:" header) + (let ((indirection-table + (read-indirection-table port))) + (ensure-tag-table-node port) + (ensure-indirect-tag-table-header port) + (let ((tag-entry (find-tag node port))) + (close-input-port port) + (loop (find-node-port-with-tag-entry + node tag-entry indirection-table find-file))))) + + ((string-match "^Tag Table:" header) + (let ((tag-entry (find-tag node port))) + (loop (find-node-port-with-tag-entry + node tag-entry port find-file)))) + + ((string-match "^File:" header) + (receive + (file-header node-header up-header prev-header next-header) + (split-header-line header) + (if (string=? node-header node) + (values port + file-header node-header + up-header prev-header next-header) + (loop port)))) + (else (loop port)))) + (loop port)))))) + +;; Finding files + +(define (info-file-alternative-names file) + (receive (dir base ext) (parse-file-name file) + (let* ((base + (cond ((string-match "(.*)-info$" base) + => (lambda (match) + (match:substring match 1))) + (else base))) + (base-ci (downcase-string base)) + (alts-1 (if (string=? base base-ci) + (list base) + (list base base-ci))) + (alts (append alts-1 + (map (lambda (base) + (string-append base ".info")) + alts-1))) + (alts (append alts + (map (lambda (base) + (string-append base "-info")) + alts-1))) + (alts (map (lambda (f) (string-append dir f)) alts)) + (alts (cons file alts))) + alts))) + +(define (find-info-file file info-path) + (let ((alts (info-file-alternative-names file))) + (let path-loop ((path info-path)) + (if (null? path) + (http-error http-reply/not-found #f "info file not found")) + (let alt-loop ((alts alts)) + (if (null? alts) + (path-loop (cdr path)) + (let ((try (string-append (file-name-as-directory (car path)) + (car alts)))) + (if (file-exists? try) + try + (alt-loop (cdr alts))))))))) diff --git a/modules.scm b/modules.scm new file mode 100644 index 0000000..d9783b8 --- /dev/null +++ b/modules.scm @@ -0,0 +1,374 @@ +;;; Scheme 48 module definitions for TCP/IP protocol suites. +;;; Copyright (c) 1995 by Olin Shivers. + +(define-structures + ((smtp (export sendmail %sendmail + expn vrfy mail-help + smtp-transactions + smtp-transactions/no-close + smtp/open smtp/helo smtp/mail smtp/rcpt smtp/data + smtp/send smtp/soml smtp/saml smtp/rset smtp/expn + smtp/help smtp/noop smtp/quit smtp/turn + handle-smtp-reply + read-smtp-reply + parse-smtp-reply + smtp-stuff)) + (smtp-internals (export read-crlf-line ; These two should be in an + write-crlf ; auxiliary module. + + smtp-query + nullary-smtp-command + unary-smtp-command))) + + + (open scsh ; write-string read-string/partial force-output + ; system-name user-login-name and sockets + crlf-io ; read-crlf-line write-crlf + receiving ; values receive + let-opt ; let-optionals + error-package ; error + switch-syntax ; switchq + condhax ; ? for COND + scheme) + (files smtp)) + + +(define-structure crlf-io (export read-crlf-line + write-crlf) + (open ascii ; ascii->char + scsh ; read-line write-string force-output + receiving ; MV return (RECEIVE and VALUES) + let-opt ; let-optionals + scheme) + (files crlf-io)) + + +(define-structures ((switch-syntax (export (switch :syntax) + (switchq :syntax))) + (condhax (export (when :syntax) + (unless :syntax) + (? :syntax)))) + (open scheme) + (files conditionals)) + + +(define-structure rfc822 (export read-rfc822-headers + read-rfc822-field + %read-rfc822-headers + %read-rfc822-field + rejoin-header-lines + get-header-all + get-header-lines + get-header + ) + (open receiving ; MV return (RECEIVE and VALUES) + condhax ; ? for COND + scsh-utilities ; index + let-opt ; let-optionals + strings ; lowercase-string uppercase-string + crlf-io ; read-crlf-line + ascii ; ascii->char + error-package ; error + scsh ; join-strings + scheme) + (files rfc822)) + + +(define-structure strings (export string-map + downcase-string + upcase-string + char-set-index + char-set-rindex + string-reduce + skip-whitespace + string-prefix? + string-suffix?) + (open char-set-package let-opt scheme) + (files stringhax)) + +(define-structure uri-package (export parse-uri + uri-escaped-chars + unescape-uri + escape-uri + resolve-uri + split-uri-path + uri-path-list->path + simplify-uri-path) + (open scsh-utilities + let-opt + receiving + condhax + ascii + strings + char-set-package + bitwise + field-reader-package + scheme) + (files uri)) + +(define-structure url-package (export userhost? ; USERHOST + make-userhost ; record struct + + userhost:user + userhost:password + userhost:host + userhost:port + + set-userhost:user + set-userhost:password + set-userhost:host + set-userhost:port + + parse-userhost ; parse & + userhost->string ; unparse. + + http-url? ; HTTP-URL + make-http-url ; record struct + + http-url:userhost + http-url:path + http-url:search + http-url:frag-id + + set-http-url:userhost + set-http-url:path + set-http-url:search + set-http-url:frag-id + + parse-http-url ; parse & + http-url->string) ; unparse. + + (open defrec-package + receiving + condhax + char-set-package + uri-package + scsh-utilities + httpd-error + scheme) + (files url)) + + +(define-structure httpd-error (export http-error? + http-error + fatal-syntax-error? + fatal-syntax-error + with-fatal-error-handler* + (with-fatal-error-handler :syntax)) + (open conditions signals handle scheme) + (files httpd-error)) + + +(define-structure httpd-core (export server/version + server/protocol + server/admin + set-server/admin! + + http-log + *http-log?* + *http-log-port* + + httpd + + make-request ; HTTP request + request? ; record type. + request:method + request:uri + request:url + request:version + request:headers + request:socket + set-request:method + set-request:uri + set-request:url + set-request:version + set-request:headers + set-request:socket + + version< version<= + v0.9-request? + version->string + + ;; Integer reply codes + reply-code->text + http-reply/ok + http-reply/created + http-reply/accepted + http-reply/prov-info + http-reply/no-content + http-reply/mult-choice + http-reply/moved-perm + http-reply/moved-temp + http-reply/method + http-reply/not-mod + http-reply/bad-request + http-reply/unauthorized + http-reply/payment-req + http-reply/forbidden + http-reply/not-found + http-reply/method-not-allowed + http-reply/none-acceptable + http-reply/proxy-auth-required + http-reply/timeout + http-reply/conflict + http-reply/gone + http-reply/internal-error + http-reply/not-implemented + http-reply/bad-gateway + http-reply/service-unavailable + http-reply/gateway-timeout + + time->http-date-string + begin-http-header + send-http-error-reply + + set-my-fqdn! + set-my-port!) + (open scsh + receiving + let-opt + crlf-io + rfc822 + switch-syntax + condhax + strings + char-set-package + defrec-package + handle + conditions ; condition-stuff + defenum-package + httpd-error + uri-package + url-package + formats + scheme) + (files httpd-core)) + + +;;; For parsing submissions from HTML forms. +(define-structure parse-html-forms (export parse-html-form-query unescape-uri+) + (open scsh scsh-utilities let-opt + receiving uri-package strings condhax scheme) + (files parse-forms)) + + +;;; For writing CGI scripts in Scheme. +(define-structure cgi-script-package (export cgi-form-query) + (open scsh + switch-syntax + error-package + parse-html-forms + scheme) + (files cgi-script)) + +;;; Provides the server interface to CGI scripts. +(define-structure cgi-server-package (export cgi-default-bin-path + cgi-handler + initialise-request-invariant-cgi-env) + (open strings + rfc822 + crlf-io ; WRITE-CRLF + uri-package + url-package ; HTTP-URL record type + httpd-core ; REQUEST record type, HTTP-ERROR & reply codes + ; version stuff + httpd-basic-handlers ; HTTP-HOMEDIR, SERVE-ROOTED-FILE-PATH + httpd-error ; HTTP-ERROR + scsh-utilities ; INDEX + scsh ; syscalls + formats ; format + condhax ; ? is COND + switch-syntax ; SWITCHQ + scheme) + (files cgi-server)) + +(define-structure htmlout-package (export emit-tag + emit-close-tag + + emit-p + emit-title + emit-header ; And so forth... + + with-tag + with-tag* + + escape-html + emit-text) + (open scsh scsh-utilities strings formats ascii receiving scheme) + (files htmlout)) + +(define-structure httpd-basic-handlers (export alist-path-dispatcher + home-dir-handler + tilde-home-dir-handler + rooted-file-handler + rooted-file-or-directory-handler + null-path-handler + serve-rooted-file-path + file-serve + file-server-and-dir + http-homedir + send-file + dotdot-check + file-extension->content-type + copy-inport->outport) + (open scsh ; syscalls + formats ; FORMAT + condhax ; UNLESS, ? for COND + switch-syntax ; Conditionals + httpd-core ; REQUEST record type, HTTP-ERROR & reply codes, + ; v0.9-request, begin-http-header + httpd-error + htmlout-package + conditions ; CONDITION-STUFF + url-package ; HTTP-URL record type + scheme) + (files httpd-handlers)) + + +(define-structure seval-handler-package (export seval-handler) + (open scsh ; syscalls & INDEX + condhax ; WHEN, ? for COND + switch-syntax ; Conditionals + httpd-error + httpd-core ; REQUEST record type, HTTP-ERROR & reply codes, + ; v0.9-request, reply formatting stuff. + uri-package ; UNESCAPE-URI + htmlout-package ; Formatted HTML output + error-package ; ERROR + pp ; Pretty-printer + strings rfc822 + toothless-eval ; EVAL-SAFELY + handle ; IGNORE-ERROR + strings ; SKIP-WHITESPACE + parse-html-forms ; PARSE-HTML-FORM-QUERY + scheme) + (files seval)) + +(define-structure httpd-access-control (export access-denier + access-allower + access-controller + access-controlled-handler) + (open big-scheme + strings + httpd-core + httpd-error + scsh + scheme) + (files httpd-access-control)) + +(define-structure info-gateway (export info-handler + find-info-file + info-gateway-error) + (open big-scheme + conditions signals handle + switch-syntax + condhax + strings + htmlout-package + httpd-core + httpd-error + url-package + uri-package + scsh + scheme) + (files info-gateway)) diff --git a/parse-forms.scm b/parse-forms.scm new file mode 100644 index 0000000..6eabc46 --- /dev/null +++ b/parse-forms.scm @@ -0,0 +1,65 @@ +;;; Code to parse information submitted from HTML forms. -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; See http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html + +;;; Imports and non-R4RS'isms +;;; index (scsh) +;;; let-optionals (let-opt package) +;;; receive (Multiple-value return) +;;; unescape-uri +;;; map-string (strings package) +;;; ? (cond) + +;;; About HTML forms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; The form's field data are turned into a single string, of the form +;;; The form's field data are turned into a single string, of the form +;;; name=val&name=val +;;; where the and parts are URI encoded to hide their +;;; &, =, and + chars, among other things. After URI encoding, the +;;; space chars are converted to + chars, just for fun. It is important +;;; to encode the spaces this way, because the perfectly general %xx escape +;;; mechanism might be insufficiently confusing. This variant encoding is +;;; called "form-url encoding." +;;; +;;; If the form's method is POST, +;;; Browser sends the form's field data in the entity block, e.g., +;;; "button=on&ans=yes". The request's Content-type: is application/ +;;; x-www-form-urlencoded, and the request's Content-length: is the +;;; number of bytes in the form data. +;;; +;;; If the form's method is GET, +;;; Browser sends the form's field data in the URL's part. +;;; (So the server will pass to the CGI script as $QUERY_STRING, +;;; and perhaps also on in argv[]). +;;; +;;; In either case, the data is "form-url encoded" (as described above). + +;;; Form-query parsing +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Parse "foo=x&bar=y" into (("foo" . "x") ("bar" . "y")) +;;; Substrings are plus-decoded and then URI-decoded. This implementation is +;;; slightly sleazy as it will successfully parse a string like "a&b=c&d=f" +;;; into (("a&b" . "c") ("d" . "f")) without a complaint. + +(define (parse-html-form-query q) + (let ((qlen (string-length q))) + (let recur ((i 0)) + (? ((index q #\= i) => + (lambda (j) + (let ((k (or (index q #\& j) qlen))) + (cons (cons (unescape-uri+ q i j) + (unescape-uri+ q (+ j 1) k)) + (recur (+ k 1)))))) + (else '()))))) ; BOGUS STRING -- Issue a warning. + + +;;; Map plus characters to spaces, then do URI decoding. +(define (unescape-uri+ s . maybe-start/end) + (let-optionals maybe-start/end ((start 0) + (end (string-length s))) + (unescape-uri (string-map (lambda (c) (if (char=? c #\+) #\space c)) + (if (and (zero? start) + (= end (string-length s))) + s ; Gratuitous optimisation. + (substring s start end)))))) diff --git a/program-modules.scm b/program-modules.scm new file mode 100644 index 0000000..c0424ac --- /dev/null +++ b/program-modules.scm @@ -0,0 +1,44 @@ +;;; Scheme 48 module definitions for Scheme program execution. +;;; Gebhard Engelhart. + +(define-structure scheme-program-server-package (export scheme-program-handler + runprogram) + (open strings + rfc822 + crlf-io ; WRITE-CRLF + uri-package + url-package ; HTTP-URL record type + httpd-core ; REQUEST record type, HTTP-ERROR & reply codes + ; version stuff + httpd-basic-handlers ; HTTP-HOMEDIR, SERVE-ROOTED-FILE-PATH + httpd-error ; HTTP-ERROR + scsh-utilities ; INDEX + scsh ; syscalls + formats ; format + condhax ; ? is COND + switch-syntax ; SWITCHQ + scheme) + (files scheme-program-server)) + +;;; package to load scheme programs + +(define-structure scheme-programs-package (export testprog + info2www) + (open scsh ; syscalls & INDEX + condhax ; WHEN, ? for COND + switch-syntax ; Conditionals + defrec-package ; Records + htmlout-package ; Formatted HTML output + httpd-core ; REQUEST record type, HTTP-ERROR & reply codes + ; version stuff + uri-package ; UNESCAPE-URI + error-package ; ERROR + pp ; Pretty-printer + strings rfc822 + toothless-eval ; EVAL-SAFELY + handle ; IGNORE-ERROR + strings ; SKIP-WHITESPACE + parse-html-forms ; PARSE-HTML-FORM-QUERY + scheme) + (files testprog + info2www)) \ No newline at end of file diff --git a/rfc822.scm b/rfc822.scm new file mode 100644 index 0000000..7334759 --- /dev/null +++ b/rfc822.scm @@ -0,0 +1,213 @@ +;;; RFC 822 field-parsing code -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. +;;; +;;; +;;; Imports and non-R4RS'isms +;;; downcase-string upcase-string (string->symbol conversion) +;;; read-crlf-line +;;; let-optionals, :optional +;;; receive values (MV return) +;;; "\r\n" in string for cr/lf +;;; ascii->char (defining the tab char) +;;; index +;;; join-strings (reassembling body lines) +;;; error +;;; ? (COND) + +;;; RFC 822 is the "Standard for the format of ARPA Internet text messages" +;;; -- the document that essentially tells how the fields in email headers +;;; (e.g., the Subject: and To: fields) are formatted. This code is for +;;; parsing these headers. Here are two pointers to the document: +;;; Emacs/ange /ftp@ftp.internic.net:/rfc/rfc822.txt +;;; URL ftp://ftp.internic.net/rfc/rfc822.txt +;;; RFC 822 parsing is useful in other contexts as well -- the HTTP protocol +;;; uses it, and it tends to pop up here and there. +;;; +;;; RFC 822 header syntax has two levels: the general syntax for headers, +;;; and the syntax for specific headers. For example, once you have figured +;;; out which chunk of text is the To: line, there are more rules telling +;;; how to split the To: line up into a list of addresses. Another example: +;;; lines with dates, e.g., the Date: header, have a specific syntax for +;;; the time and date. +;;; +;;; This code currently *only* provides routines for parsing the gross +;;; structure -- splitting the message header into its distinct fields. +;;; It would be nice to provide the finer-detail parsers, too. You do it. +;;; -Olin + +;;; A note on line-terminators: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Line-terminating sequences are always a drag, because there's no agreement +;;; on them -- the Net protocols and DOS use cr/lf; Unix uses lf; the Mac +;;; uses cr. One one hand, you'd like to use the code for all of the above, +;;; on the other, you'd also like to use the code for strict applications +;;; that need definitely not to recognise bare cr's or lf's as terminators. +;;; +;;; RFC 822 requires a cr/lf (carriage-return/line-feed) pair to terminate +;;; lines of text. On the other hand, careful perusal of the text shows up +;;; some ambiguities (there are maybe three or four of these, and I'm too +;;; lazy to write them all down). Furthermore, it is an unfortunate fact +;;; that many Unix apps separate lines of RFC 822 text with simple linefeeds +;;; (e.g., messages kept in /usr/spool/mail). As a result, this code takes a +;;; broad-minded view of line-terminators: lines can be terminated by either +;;; cr/lf or just lf, and either terminating sequence is trimmed. +;;; +;;; If you need stricter parsing, you can call the lower-level procedure +;;; %READ-RFC-822-FIELD and %READ-RFC822-HEADERS procs. They take the +;;; read-line procedure as an extra parameter. This means that you can +;;; pass in a procedure that recognises only cr/lf's, or only cr's (for a +;;; Mac app, perhaps), and you can determine whether or not the terminators +;;; get trimmed. However, your read-line procedure must indicate the +;;; header-terminating empty line by returning *either* the empty string or +;;; the two-char string cr/lf (or the EOF object). + +;;; (read-rfc822-field [port]) +;;; (%read-rfc822-field read-line port) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Read one field from the port, and return two values [NAME BODY]: +;;; - NAME Symbol such as 'subject or 'to. The field name is converted +;;; to a symbol using the Scheme implementation's preferred +;;; case. If the implementation reads symbols in a case-sensitive +;;; fashion (e.g., scsh), lowercase is used. This means you can +;;; compare these symbols to quoted constants using EQ?. When +;;; printing these field names out, it looks best if you capitalise +;;; them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)). +;;; - BODY List of strings which are the field's body, e.g. +;;; ("shivers@lcs.mit.edu"). Each list element is one line from +;;; the field's body, so if the field spreads out over three lines, +;;; then the body is a list of three strings. The terminating +;;; cr/lf's are trimmed from each string. +;;; When there are no more fields -- EOF or a blank line has terminated the +;;; header section -- then the procedure returns [#f #f]. +;;; +;;; The %READ-RFC822-FIELD variant allows you to specify your own read-line +;;; procedure. The one used by READ-RFC822-FIELD terminates lines with either +;;; cr/lf or just lf, and it trims the terminator from the line. + +(define htab (ascii->char 9)) + +;;; Convert to a symbol using the Scheme implementation's preferred case, +;;; so we can compare these things against quoted constants. +(define string->symbol-pref + (if (char=? #\a (string-ref (symbol->string 'a) 0)) ; Is it #\a or #\A? + (lambda (s) (string->symbol (downcase-string s))) + (lambda (s) (string->symbol (upcase-string s))))) + +(define (read-rfc822-field . maybe-port) + (let-optionals maybe-port ((port (current-input-port))) + (%read-rfc822-field read-crlf-line port))) + +(define (%read-rfc822-field read-line port) + (let ((line1 (read-line port))) + (if (or (eof-object? line1) + (zero? (string-length line1)) + (string=? line1 "\r\n")) ; In case read-line doesn't trim. + + (values #f #f) ; Blank line or EOF terminates header text. + + (? ((index line1 #\:) => ; Find the colon and + (lambda (colon) ; split out field name. + (let ((name (string->symbol-pref (substring line1 0 colon)))) + ;; Read in continuation lines. + (let lp ((lines (list (substring line1 + (+ colon 1) + (string-length line1))))) + (let ((c (peek-char port))) ; Could return EOF. + (if (or (eqv? c #\space) (eqv? c htab)) + (lp (cons (read-line port) lines)) + (values name (reverse lines)))))))) + (else (error "Illegal RFC 822 field syntax." line1)))))) ; No : + + +;;; (read-rfc822-headers [port]) +;;; (%read-rfc822-headers read-line port) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Read in and parse up a section of text that looks like the header portion +;;; of an RFC 822 message. Return an alist mapping a field name (a symbol +;;; such as 'date or 'subject) to a list of field bodies -- one for +;;; each occurence of the field in the header. So if there are five +;;; "Received-by:" fields in the header, the alist maps 'received-by +;;; to a five element list. Each body is in turn represented by a list +;;; of strings -- one for each line of the field. So a field spread across +;;; three lines would produce a three element body. +;;; +;;; The %READ-RFC822-HEADERS variant allows you to specify your own read-line +;;; procedure. See notes above for reasons why. + +(define (read-rfc822-headers . maybe-port) + (let-optionals maybe-port ((port (current-input-port))) + (%read-rfc822-headers read-crlf-line port))) + +(define (%read-rfc822-headers read-line port) + (let lp ((alist '())) + (receive (field val) (%read-rfc822-field read-line port) + (? (field (? ((assq field alist) => + (lambda (entry) + (set-cdr! entry (cons val (cdr entry))) + (lp alist))) + (else (lp (cons (list field val) alist))))) + + ;; We are done. Reverse the order of each entry and return. + (else (for-each (lambda (entry) + (set-cdr! entry (reverse (cdr entry)))) + alist) + alist))))) + +;;; (rejoin-header-lines alist [separator]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Takes a field alist such as is returned by READ-RFC822-HEADERS and +;;; returns an equivalent alist. Each body (string list) in the input alist +;;; is joined into a single list in the output alist. SEPARATOR is the +;;; string used to join these elements together; it defaults to a single +;;; space " ", but can usefully be "\n" or "\r\n". +;;; +;;; To rejoin a single body list, use scsh's JOIN-STRINGS procedure. + +(define (rejoin-header-lines alist . maybe-separator) + (let-optionals maybe-separator ((sep " ")) + (map (lambda (entry) + (cons (car entry) + (map (lambda (body) (join-strings body sep)) + (cdr entry)))) + alist))) + + +;;; Given a set of RFC822 headers like this: +;;; From: shivers +;;; To: ziggy, +;;; newts +;;; To: gjs, tk +;;; +;;; We have the following definitions: +;;; (get-header-all hdrs 'to) -> ((" ziggy," " newts") (" gjs, tk")) +;;; - All entries, or #f +;;; (get-header-lines hdrs 'to) -> (" ziggy," " newts") +;;; - All lines of the first entry, or #f. +;;; (get-header hdrs 'to) -> "ziggy,\n newts" +;;; - First entry, with the lines joined together by newlines. + +(define (get-header-all headers name) + (let ((entry (assq name headers))) + (and entry (cdr entry)))) + +(define (get-header-lines headers name) + (let ((entry (assq name headers))) + (and entry + (pair? entry) + (cadr entry)))) + +(define (get-header headers name . maybe-sep) + (let ((entry (assq name headers))) + (and entry + (pair? entry) + (join-strings (cadr entry) + (:optional maybe-sep "\n"))))) + + + +;;; Other desireable functionality +;;; - Unfolding long lines. +;;; - Lexing structured fields. +;;; - Unlexing structured fields into canonical form. +;;; - Parsing and unparsing dates. +;;; - Parsing and unparsing addresses. diff --git a/scheme-program-server.scm b/scheme-program-server.scm new file mode 100644 index 0000000..07a52c3 --- /dev/null +++ b/scheme-program-server.scm @@ -0,0 +1,58 @@ +(define scheme-program-handler + (lambda (path req) + (if (pair? path) ; Got to have at least one elt. + (let* ((prog (car path)) + + (search (http-url:search (request:url req))) ; Compute the + (arglist (if (and search (not (index search #\=))) ; argv list. + (split-and-decode-search-spec search) + '())) + (env (exec-env req (cdr path))) ; set global environment vars + (doit (lambda () + ((runprogram prog) arglist)))) + + (and (http-log "----------------------------------------~%") + (http-log " Programmname : ~s~%" prog) + (http-log " search : ~s~%" search) + (http-log " Argumente : ~s~%" arglist) + (http-log "----------------------------------------~%") + + (switch string=? (request:method req) + (("GET" "POST") ; Could do others also. + (wait (fork doit))) + (else (http-error http-reply/method-not-allowed req))))) + + (http-error http-reply/bad-request req "Error ")))) + +(define (runprogram progstring) + (let* ( (progsymbol (read (make-string-input-port progstring))) + (progsymbol1 (string->symbol progstring))) + (and (http-log "[]run-program ~s ~s ~s~%" progstring progsymbol progsymbol1) + (eval progsymbol (interaction-environment))))) + +(define (split-and-decode-search-spec s) + (let recur ((i 0)) + (? ((index s #\+ i) => (lambda (j) (cons (unescape-uri s i j) + (recur (+ j 1))))) + (else (list (unescape-uri s i (string-length s))))))) + +(define url-path) +(define script-path) +(define script-name) +(define (exec-env req path-suffix) + (let* (;; Compute the $SCRIPT_PATH string. + (url-path1 (http-url:path (request:url req))) + (script-path1 (take (- (length url-path1) (length path-suffix)) + url-path1)) + (script-name1 (uri-path-list->path script-path1))) + (and (set! url-path url-path1) + (set! script-path script-path1) + (set! script-name script-name1)))) + +(define (take n lis) + (if (zero? n) '() + (cons (car lis) (take (- n 1) (cdr lis))))) + +(define (drop n lis) + (if (zero? n) lis + (drop (- n 1) (cdr lis)))) diff --git a/server.scm b/server.scm new file mode 100755 index 0000000..b9a8b4e --- /dev/null +++ b/server.scm @@ -0,0 +1,44 @@ +#!/usr/local/bin/scsh \ +-lm modules.scm -lm toothless.scm -dm -o http-top -e top -s +!# + +;;; Scheme Underground Web Server -*- Scheme -*- +;;; Olin Shivers + +;;; To compile as a heap-image: +;;; ,open http-top +;;; (dump-scsh-program top "server") +;;; then insert a #! trigger. + +(define-structure http-top (export top) + (open httpd-core + cgi-server-package + httpd-basic-handlers + seval-handler-package + scsh + scheme) + (begin + + ;; Kitche-sink path handler. + + (define ph + (alist-path-dispatcher + `(("h" . ,(home-dir-handler "public_html")) + ("seval" . ,seval-handler) + ("cgi-bin" . ,(cgi-handler "/usr/local/etc/httpd/cgi-bin"))) + (tilde-home-dir-handler "public_html" + (rooted-file-handler "/usr/local/etc/httpd/htdocs")))) + + + + ;; Crank up a server on port 8001, first resetting our identity to + ;; user "nobody". Initialise the request-invariant part of the CGI + ;; env before starting. + + (define (top args) + (display "We be jammin, now.\n") (force-output) + (cond ((zero? (user-uid)) + (set-gid -2) ; Should be (set-uid (->uid "nobody")) + (set-uid -2))) ; but NeXTSTEP loses. + (initialise-request-invariant-cgi-env) + (httpd ph 8001 "/zu/shivers")))) diff --git a/seval.scm b/seval.scm new file mode 100644 index 0000000..4b5184f --- /dev/null +++ b/seval.scm @@ -0,0 +1,111 @@ +;;; Path handler for uploading Scheme code to the SU web server -*- Scheme -*- +;;; This is really just an handler example demonstrating how to upload code +;;; into the server. +;;; Copyright (c) 1995 by Olin Shivers. + +;;; Imports and non-R4RS'isms +;;; \r and \n in string for cr and lf. +;;; SWITCH conditional, ? for COND +;;; HTTP request record stucture +;;; HTTP-ERROR & reply codes +;;; Basic path handler support +;;; scsh syscalls +;;; Pretty-printing P proc. +;;; htmlout stuff +;;; SAFE-EVAL +;;; ERROR +;;; INDEX +;;; URI decoding +;;; string hacks (SKIP-WHITESPACE) + +;;; HTML forms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This path handler is suitable for receiving code entered into an +;;; HTML text form. The Scheme code being uploaded is being POST'd to us +;;; (from a form). See http-forms.scm for info on the format of this kind +;;; of request. After parsing the request into the submitted string, we +;;; parse *that* into a Scheme sexp with READ, and eval it. + +;;; (do/timeout secs thunk) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Run THUNK, and gun it down if it hasn't finished in SECS seconds. +;;; Returns nothing useful, and THUNK gets executed in a subprocess, +;;; so its side-effects are invisible, as well. This is a clever kludge -- +;;; it uses three subprocesses -- but I don't have interrupts, so I'm hosed. + +(define (do/timeout* secs thunk) + (run (begin (let ((timer (fork (lambda () (sleep secs)))) + (worker (fork thunk))) + (receive (process status) (wait-any) + (ignore-errors + (lambda () + (signal-process (proc:pid (if (eq? worker process) + timer + worker)) + signal/kill)))))))) +(define-syntax do/timeout + (syntax-rules () + ((do/timeout secs body ...) (do/timeout* secs (lambda () body ...))))) + + +;;; The path handler for seval ops. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (seval-handler path req) + (switch string=? (request:method req) + (("POST") ; Could do others also. + + (let ((modern-protocol? (not (v0.9-request? req)))) + (when modern-protocol? + (begin-http-header #t 200) + (write-string "Content-type: text/html\r\n\r\n")) + (with-tag #t HEAD () + (newline) + (emit-title #t "Scheme program output")) + (newline)) + + (with-tag #t BODY () + (newline) + (let ((sexp (read-request-sexp req))) + (do/timeout 10 + (receive vals + ;; Do the computation. + (begin (emit-header #t 2 "Output from execution") + (newline) + (with-tag #t PRE () + (newline) + (force-output) ; In case we're gunned down. + (eval-safely sexp))) + + ;; Pretty-print the returned value(s). + (emit-header #t 2 "Return value(s)") + (with-tag #t PRE () + (for-each p vals))))))) + + (else (http-error http-reply/method-not-allowed #f req)))) + + +;;; Read an HTTP request entity body from stdin. The Content-length: +;;; element of request REQ's header tells how many bytes to this entity +;;; is. The entity should be a URI-encoded form body. Pull out the +;;; program= +;;; string, extract , uri-decode it, parse that into an s-expression, +;;; and return it. + +(define (read-request-sexp req) + (? ((get-header (request:headers req) 'content-length) => + (lambda (cl-str) ; Take the first Content-length: header, + (let* ((cl-start (skip-whitespace cl-str)) ; skip whitespace, + (cl (if cl-start ; & convert to + (string->number (substring cl-str ; a number. + cl-start + (string-length cl-str))) + 0)) ; All whitespace?? -- WTF. + (qs (read-string cl)) ; Read in CL chars, + (q (parse-html-form-query qs)) ; and parse them up. + (s (? ((assoc "program" q) => cdr) + (else (error "No program in entity body."))))) + (http-log "Seval sexp:~%~s~%" s) + (read (make-string-input-port s))))) + (else (http-error http-reply/bad-request req + "No Content-length: field in POST request.")))) diff --git a/smtp.scm b/smtp.scm new file mode 100644 index 0000000..917a06d --- /dev/null +++ b/smtp.scm @@ -0,0 +1,606 @@ +;;; SMTP client code -*- Scheme -*- +;;; Copyright (c) 1995 by Brian D. Carlstrom and Olin Shivers. +;;; , +;;; +;;; See rfc821: /ftp@ftp.internic.net:/rfc/rfc821.txt + +;;; External dependencies and non-R4RS'isms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; system-name user-login-name (for high-level SENDMAIL proc) +;;; receive values (MV return) +;;; write-string read-string/partial (scsh I/O procs) +;;; force-output +;;; scsh's socket module +;;; :optional +;;; error +;;; switchq (Conditional macro) +;;; ? (COND) +;;; read-crlf-line write-crlf +;;; \n \r in strings (Not R4RS) + +;;; SMTP protocol procedures tend to return two values: +;;; - CODE The integer SMTP reply code returned by server for the transaction. +;;; - TEXT A list of strings -- the text messages tagged by the code. +;;; The text strings have the initial code numerals and the terminating +;;; cr/lf's stripped. Codes in the range [1,399] are sucess codes; codes +;;; in the range [400,599] are error codes; codes >= 600 are not part +;;; of the official SMTP spec. This module uses codes >= 600 to indicate +;;; extra-protocol errors. There are two of these: +;;; - 600 Server reply could not be parsed. +;;; The server sent back some sort of incomprehensible garbage reply. +;;; - 621 Premature EOF while reading server reply. +;;; The server shut down in the middle of a reply. +;;; A list of the official protocol return codes is appended at the end of +;;; this file. + +;;; These little cover functions are trivial packagings of the protocol. +;;; You could write your own to handle, e.g., mailing a message to a list +;;; of addresses. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +;;; This is broken -- the (SYSTEM-NAME) proc returns a local name, not a +;;; useful Internet host name. How do we do that? + +;;; (sendmail to-list body [host]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Mail message to recipients in list TO-LIST. Message handed off to server +;;; running on HOST; default is the local host. Returns two values: code and +;;; text-list. However, if only problem with message is that some recipients +;;; were rejected, sendmail sends to the rest of the recipients, and the +;;; partial-success return is [700 loser-alist] where loser-alist +;;; is a list whose elements are of the form (loser-recipient code . text) -- +;;; that is, for each recipient refused by the server, you get the error +;;; data sent back for that guy. The success check is (< code 400). +;;; +;;; BODY is a string or an input port. + +(define (sendmail to-list body . maybe-host) + (call-with-current-continuation + (lambda (bailout) + (let ((local (system-name)) + (socket (smtp/open (:optional maybe-host "localhost")))) + (receive (code text) (smtp-transactions socket ; Do prologue. + (smtp/helo socket local) + (smtp/mail socket (string-append (user-login-name) + "@" local))) + (if (>= code 400) (values code text) ; error + + ;; Send over recipients and collect the losers. + (let ((losers (filter-map + (lambda (to) + (receive (code text) (smtp/rcpt socket to) + (and (>= code 400) ; Error + (? ((>= 600) + (smtp/quit socket) + (bailout code text)) + (else `(,to ,code ,@text)))))) + to-list))) + + ;; Send the message body and wrap things up. + (receive (code text) (smtp-transactions socket + (smtp/data socket body) + (smtp/quit socket)) + (if (and (< code 400) (null? losers)) + (values code text) + (values 700 losers)))))))))) + +;;; Trivial utility -- like map, but filter out #f's. + +(define (filter-map f lis) + (let lp ((ans '()) (lis lis)) + (if (pair? lis) + (lp (? ((f (car lis)) => (lambda (val) (cons val ans))) + (else ans)) + (cdr lis)) + (reverse ans)))) + +(define (%sendmail from local-host to dest-host message) + (let ((socket (smtp/open dest-host))) + (smtp-transactions socket + (smtp/helo socket local-host) + (smtp/mail socket from) + (smtp/rcpt socket to) + (smtp/data socket message) + (smtp/quit socket)))) + + +;;; EXPN, VRFY, MAIL-HELP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These three are simple queries of the server. + +(define (smtp-query socket query arg) + (receive (code text) + (smtp-transactions socket + (smtp/helo socket (system-name)) + (query socket arg)) + (if (not (or (= code 421) (= code 221))) + (smtp/quit socket)) + (values code text))) + +(define (expn name host) + (smtp-query (smtp/open host) smtp/expn name)) + +(define (vrfy name host) + (smtp-query (smtp/open host) smtp/vrfy name)) + +(define (mail-help host . details) + (smtp-query (smtp/open host) smtp/help (apply string-append details))) + + +;;; (smtp-transactions socket ?transaction1 ...) +;;; (smtp-transactions/no-close socket ?transaction1 ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These macros make it easy to do simple sequences of SMTP commands. +;;; +;;; Evaluate a series of expressions ?transaction1, ?transaction2, ... +;;; - Each expression should perform an SMTP transaction, +;;; and return two values: +;;; + CODE (the integer reply code) +;;; + TEXT (list of strings that came with the reply). +;;; +;;; - If the transaction's reply code is 221 or 421 (meaning the socket has +;;; been closed), then the transaction sequence is is aborted, and the +;;; SMTP-TRANSACTIONS form returns the CODE and TEXT values for the current +;;; transaction. +;;; +;;; - If the reply code is an error code (in the four- or five-hundred range), +;;; the transaction sequence is aborted, and the fatal transaction's CODE +;;; and TEXT values are returned. SMTP-TRANSACTIONS will additionally +;;; close the socket for you; SMTP-TRANSACTIONS/NO-CLOSE will not. +;;; +;;; - If the transaction is the last in the transaction sequence, +;;; its CODE and TEXT values are returned. +;;; +;;; - Otherwise, we throw away the current CODE and TEXT values, and +;;; proceed to the next transaction. +;;; +;;; Since SMTP-TRANSACTIONS closes the socket whenever it aborts a sequence, +;;; an SMTP-TRANSACTIONS form terminated with an (smtp/quit socket) transaction +;;; will always close the socket. +;;; +;;; If the socket should be kept open in the case of an abort, use +;;; SMTP-TRANSACTIONS/NO-CLOSE. +;;; +;;; We abort sequences if a transaction results in a 400-class error code. +;;; So, a sequence mailing a message to five people, with 5 RCPT's, would +;;; abort if the mailing address for one of these people was wrong, rather +;;; than proceeding to mail the other four. This may not be what you want; +;;; if so, you'll have to roll your own. + +(define-syntax smtp-transactions + (syntax-rules () + ((smtp-transactions socket ?T1 ?T2 ...) + (let ((s socket)) + (receive (code text) (smtp-transactions/no-close s ?T1 ?T2 ...) + (if (<= 400 code) (smtp/quit s)) + (values code text)))))) + +(define-syntax smtp-transactions/no-close + (syntax-rules () + ((smtp-transactions/no-close socket ?T1 ?T2 ...) + ;; %smtp-transactions/no-close replicates the socket argument, + ;; so we have to force it to be a variable. + (let ((s socket)) + (%smtp-transactions/no-close s ?T1 ?T2 ...))))) + +;;; SOCKET must be a variable, hence replicable. +(define-syntax %smtp-transactions/no-close + (syntax-rules () + ((%smtp-transactions/no-close socket ?T1 ?T2 ?T3 ...) + (receive (code text) ?T1 + (if (or (= code 221) + (= code 421) ; Redundant, I know. + (<= 400 code)) + (values code text) + (%smtp-transactions/no-close socket ?T2 ?T3 ...)))) + + ((%smtp-transactions/no-close socket ?T1) + ?T1))) + +;;; I can't make this nested definition work. I'm not enough of a macro stud. +;(define-syntax smtp-transactions/no-close +; (syntax-rules () +; ((smtp-transactions/no-close socket ?T1 ...) +; (letrec-syntax ((%smtp-transactions/no-close +; (syntax-rules () +; +; ((%smtp-transactions/no-close socket ?T1 ?T2 ...) +; (receive (code text) ?T1 +; (if (or (= code 221) +; (= code 421) ; Redundant, I know. +; (<= 400 code)) +; (values code text) +; (%smtp-transactions/no-close socket ?T2 ...)))) +; +; ((%smtp-transactions/no-close socket ?T1) +; ?T1)))) +; +; ;; %smtp-transactions/no-close replicates the socket argument, +; ;; so we have to force it to be a variable. +; (let ((s socket)) +; (%smtp-transactions/no-close s ?T1 ...)))))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The basics of the protocol + +(define (nullary-smtp-command command) + (lambda (socket) + (let ((port (socket:outport socket))) + (write-string command port) + (write-crlf port)) + (handle-smtp-reply socket))) + + +(define (unary-smtp-command command) + (lambda (socket data) + (let ((port (socket:outport socket))) + (write-string command port) + (display #\space port) + (write-string data port) + (write-crlf port)) + (handle-smtp-reply socket))) + + +(define (smtp/open host . maybe-port) + (let ((sock (socket-connect protocol-family/internet socket-type/stream host + (:optional maybe-port "smtp")))) + (receive (code text) (handle-smtp-reply sock) + (if (< code 400) sock + (error "SMTP socket-open server-reply error" sock code text))))) + +;; HELLO +(define smtp/helo (unary-smtp-command "HELO")) + +;; MAIL FROM: +(define smtp/mail (unary-smtp-command "MAIL FROM:")) + +;; RECIPIENT TO: +(define smtp/rcpt (unary-smtp-command "RCPT TO:")) + +;; DATA +(define smtp/data + (let ((send-DATA-msg (nullary-smtp-command "DATA"))) + (lambda (socket message) ; MESSAGE is a string or an input port. + (receive (code text) (send-DATA-msg socket) + (if (>= code 400) (values code text) ; Error. + + ;; We got a positive acknowledgement for the DATA msg, + ;; now send the message body. + (let ((p (socket:outport socket))) + (? ((string? message) + (receive (data last-char) (smtp-stuff message #f) + (write-string data p))) + + ((input-port? message) + (let lp ((last-char #f)) + (? ((read-string/partial 1024 message) => + (lambda (chunk) + (receive (data last-char) + (smtp-stuff chunk last-char) + (write-string data p) + (lp last-char))))))) + + (else (error "Message must be string or input-port."))) + + (write-string "\r\n.\r\n" p) + (force-output p) + (handle-smtp-reply socket))))))) + +;; SEND FROM: +(define smtp/send (unary-smtp-command "SEND FROM:")) + +;; SEND OR MAIL +(define smtp/soml (unary-smtp-command "SOML FROM:")) + +;; SEND AND MAIL +(define smtp/saml (unary-smtp-command "SOML SAML:")) + +;; RESET +(define smtp/rset (nullary-smtp-command "RSET")) + +;; VERIFY +(define smtp/vrfy (unary-smtp-command "VRFY")) + +;; EXPAND +(define smtp/expn (unary-smtp-command "EXPN")) + +;; HELP

+(define smtp/help + (let ((send-help (unary-smtp-command "HELP"))) + (lambda (socket . details) + (send-help socket (apply string-append details))))) + +;; NOOP +(define smtp/noop (nullary-smtp-command "NOOP")) + +;; QUIT +(define smtp/quit + (let ((quit (nullary-smtp-command "QUIT"))) + (lambda (socket) + (receive (code text) (quit socket) ; Quit & close socket gracefully. + (switchq = code + ((221 421)) + (else (close-socket socket))) ; But close in any event. + (values code text))))) + +;; TURN +(define smtp/turn (nullary-smtp-command "TURN")) + +;;; Read and handle the reply. Return an integer (the reply code), +;;; and a list of the text lines that came tagged by the reply code. +;;; The text lines have the reply-code prefix (first 4 chars) and the +;;; terminating cr/lf's stripped. +;;; +;;; In bdc's analog of this proc, he would read another reply if the code was +;;; in the one-hundred range (1xx). These codes aren't even used in smtp, +;;; according to the RFC. So why? + +(define (handle-smtp-reply socket) + (receive (code text) (read-smtp-reply (socket:inport socket)) + (switchq = code + ((221 421) (close-socket socket))) ; All done. + (values code text))) + +;;; Read a reply from the SMTP server. Returns two values: +;;; - CODE Integer. The reply code. +;;; - TEXT String list. A list of the text lines comprising the reply. +;;; Each line of text is stripped of the initial reply-code +;;; numerals (e.g., the first four chars of the reply), and +;;; the trailing cr/lf. We are in fact generous about what +;;; we take to be a line -- the protocol requires cr/lf +;;; terminators, but we'll accept just lf. This appears to +;;; true to the spirit of the "be strict in what you send, +;;; and generous in what you accept" Internet protocol philosphy. + +(define (read-smtp-reply port) + (let lp ((replies '())) + (let ((ln (read-crlf-line port))) + (if (eof-object? ln) + (values 621 (cons "Premature EOF during smtp reply." + (reverse replies))) + (receive (code line more?) (parse-smtp-reply ln) + (let ((replies (cons line replies))) + (if more? (lp replies) + (values code (reverse replies))))))))) + +;;; Parse a line of SMTP reply. Return three values: +;;; CODE integer - the reply code that prefixes the string. +;;; REST string - the rest of the line. +;;; MORE? boolean - is there more reply to read (i.e., was the numeric +;;; reply code terminated by a "-" character?) + +(define (parse-smtp-reply line) + (if (and (string? line) ; This is all checking + (> (string-length line) 3) ; to see if the line + (char-numeric? (string-ref line 0)) ; is properly formatted. + (char-numeric? (string-ref line 1)) + (char-numeric? (string-ref line 2)) + (let ((c (string-ref line 3))) + (or (char=? c #\space) (char=? c #\-)))) + + (values (string->number (substring line 0 3)) ; It is. + (substring line 4 (string-length line)) + (char=? (string-ref line 3) #\-)) + + (values 600 ; It isn't. + (string-append "Improperly-formatted smtp reply: " line) + #f))) + + +;;; The message body of a piece of email is terminated by the sequence +;;; +;;; If the message body contains this magic sequence, it has to be escaped. +;;; We do this by mapping the sequence to ; +;;; the SMTP receiver undoes this mapping. + +;;; S is a string to stuff, PCHAR was the character read just before S +;;; (which matters if it is a line-feed). If S is the first chunk of the entire +;;; msg, then PCHAR can be #f. Return two values: the stuffed string, and the +;;; last char in S (or PCHAR if S is empty). The last-char value returned can +;;; be used as the PCHAR arg for the following call to SMTP-STUFF. + +(define (smtp-stuff s pchar) + (let* ((slen (string-length s)) + (hits ; Count up all the seqs in the string. + (let lp ((count 0) + (nl? (eqv? pchar #\newline)) ; Was last char a newline? + (i 0)) + (if (< i slen) + (let ((c (string-ref s i))) + (lp (if (and nl? (char=? c #\.)) (+ count 1) count) + (eq? c #\newline) + (+ i 1))) + count)))) + + (values (if (zero? hits) s + ;; Make a new string, and do the dot-stuffing copy. + (let ((ns (make-string (+ hits slen)))) + (let lp ((nl? (eqv? pchar #\newline)) + (i 0) ; S index. + (j 0)) ; NS index. + (if (< i slen) + (let ((c (string-ref s i))) + (string-set! ns j c) + (? ((and nl? (char=? c #\.)) + (string-set! ns (+ j 1) #\.) + (lp #f (+ i 1) (+ j 2))) + (else (lp (char=? c #\newline) (+ i 1) (+ j 1))))))) + ns)) + + (if (zero? slen) pchar (string-ref s (- slen 1)))))) ; LAST-CHAR + +;;; Reply codes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This material taken from the RFC. +;;; +;;; 1yz Positive Preliminary reply +;;; +;;; The command has been accepted, but the requested action +;;; is being held in abeyance, pending confirmation of the +;;; information in this reply. The sender-SMTP should send +;;; another command specifying whether to continue or abort +;;; the action. +;;; +;;; [Note: SMTP does not have any commands that allow this +;;; type of reply, and so does not have the continue or +;;; abort commands.] +;;; +;;; 2yz Positive Completion reply +;;; +;;; The requested action has been successfully completed. A +;;; new request may be initiated. +;;; +;;; 3yz Positive Intermediate reply +;;; +;;; The command has been accepted, but the requested action +;;; is being held in abeyance, pending receipt of further +;;; information. The sender-SMTP should send another command +;;; specifying this information. This reply is used in +;;; command sequence groups. +;;; +;;; 4yz Transient Negative Completion reply +;;; +;;; The command was not accepted and the requested action did +;;; not occur. However, the error condition is temporary and +;;; the action may be requested again. The sender should +;;; return to the beginning of the command sequence (if any). +;;; It is difficult to assign a meaning to "transient" when +;;; two different sites (receiver- and sender- SMTPs) must +;;; agree on the interpretation. Each reply in this category +;;; might have a different time value, but the sender-SMTP is +;;; encouraged to try again. A rule of thumb to determine if +;;; a reply fits into the 4yz or the 5yz category (see below) +;;; is that replies are 4yz if they can be repeated without +;;; any change in command form or in properties of the sender +;;; or receiver. (E.g., the command is repeated identically +;;; and the receiver does not put up a new implementation.) +;;; +;;; 5yz Permanent Negative Completion reply +;;; +;;; The command was not accepted and the requested action did +;;; not occur. The sender-SMTP is discouraged from repeating +;;; the exact request (in the same sequence). Even some +;;; "permanent" error conditions can be corrected, so the +;;; human user may want to direct the sender-SMTP to +;;; reinitiate the command sequence by direct action at some +;;; point in the future (e.g., after the spelling has been +;;; changed, or the user has altered the account status). +;;; +;;;The second digit encodes responses in specific categories: +;;; +;;; x0z Syntax -- These replies refer to syntax errors, +;;; syntactically correct commands that don't fit any +;;; functional category, and unimplemented or superfluous +;;; commands. +;;; +;;; x1z Information -- These are replies to requests for +;;; information, such as status or help. +;;; +;;; x2z Connections -- These are replies referring to the +;;; transmission channel. +;;; +;;; x3z Unspecified as yet. +;;; +;;; x4z Unspecified as yet. +;;; +;;; x5z Mail system -- These replies indicate the status of +;;; the receiver mail system vis-a-vis the requested +;;; transfer or other mail system action. + +;;; Complete list (grouped by function) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; 500 Syntax error, command unrecognized +;;; [This may include errors such as command line too long] +;;; 501 Syntax error in parameters or arguments +;;; 502 Command not implemented +;;; 503 Bad sequence of commands +;;; 504 Command parameter not implemented +;;; +;;; 211 System status, or system help reply +;;; 214 Help message +;;; [Information on how to use the receiver or the meaning of a +;;; particular non-standard command; this reply is useful only +;;; to the human user] +;;; +;;; 220 Service ready +;;; 221 Service closing transmission channel +;;; 421 Service not available, +;;; closing transmission channel +;;; [This may be a reply to any command if the service knows it +;;; must shut down] +;;; +;;; 250 Requested mail action okay, completed +;;; 251 User not local; will forward to +;;; 450 Requested mail action not taken: mailbox unavailable +;;; [E.g., mailbox busy] +;;; 550 Requested action not taken: mailbox unavailable +;;; [E.g., mailbox not found, no access] +;;; 451 Requested action aborted: error in processing +;;; 551 User not local; please try +;;; 452 Requested action not taken: insufficient system storage +;;; 552 Requested mail action aborted: exceeded storage allocation +;;; 553 Requested action not taken: mailbox name not allowed +;;; [E.g., mailbox syntax incorrect] +;;; 354 Start mail input; end with . +;;; 554 Transaction failed +;;; + +;;; State diagram +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; CONNECTION ESTABLISHMENT +;;; S: 220 +;;; F: 421 +;;; HELO +;;; S: 250 +;;; E: 500, 501, 504, 421 +;;; MAIL +;;; S: 250 +;;; F: 552, 451, 452 +;;; E: 500, 501, 421 +;;; RCPT +;;; S: 250, 251 +;;; F: 550, 551, 552, 553, 450, 451, 452 +;;; E: 500, 501, 503, 421 +;;; DATA +;;; I: 354 -> data -> S: 250 +;;; F: 552, 554, 451, 452 +;;; F: 451, 554 +;;; E: 500, 501, 503, 421 +;;; RSET +;;; S: 250 +;;; E: 500, 501, 504, 421 +;;; SEND +;;; S: 250 +;;; F: 552, 451, 452 +;;; E: 500, 501, 502, 421 +;;; SOML +;;; S: 250 +;;; F: 552, 451, 452 +;;; E: 500, 501, 502, 421 +;;; SAML +;;; S: 250 +;;; F: 552, 451, 452 +;;; E: 500, 501, 502, 421 +;;; VRFY +;;; S: 250, 251 +;;; F: 550, 551, 553 +;;; E: 500, 501, 502, 504, 421 +;;; EXPN +;;; S: 250 +;;; F: 550 +;;; E: 500, 501, 502, 504, 421 +;;; HELP +;;; S: 211, 214 +;;; E: 500, 501, 502, 504, 421 +;;; NOOP +;;; S: 250 +;;; E: 500, 421 +;;; QUIT +;;; S: 221 +;;; E: 500 +;;; TURN +;;; S: 250 +;;; F: 502 +;;; E: 500, 503 diff --git a/stringhax.scm b/stringhax.scm new file mode 100644 index 0000000..4739651 --- /dev/null +++ b/stringhax.scm @@ -0,0 +1,63 @@ +;;; Random string-hacking procs -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +(define (string-map f s) + (let* ((slen (string-length s)) + (ns (make-string slen))) + (do ((i (- slen 1) (- i 1))) + ((< i 0) ns) + (string-set! ns i (f (string-ref s i)))))) + +(define (downcase-string s) + (string-map char-downcase s)) + +(define (upcase-string s) + (string-map char-upcase s)) + + +(define (char-set-index str cset . maybe-start) + (let-optionals maybe-start ((start 0)) + (let ((len (string-length str))) + (do ((i start (+ 1 i))) + ((or (>= i len) + (char-set-contains? cset (string-ref str i))) + (and (< i len) i)))))) + +(define (char-set-rindex str cset . maybe-start) + (let ((len (string-length str))) + (let-optionals maybe-start ((start len)) + (do ((i (- start 1) (- i 1))) + ((or (< i 0) + (char-set-contains? cset (string-ref str i))) + (and (>= i 0) i)))))) + +(define (string-reduce nil cons s) + (let ((slen (string-length s))) + (do ((ans nil (cons (string-ref s i) ans)) + (i 0 (+ i 1))) + ((= i slen) ans)))) + +(define (string-prefix? prefix string) + (let ((plen (string-length prefix)) + (slen (string-length string))) + (and (<= plen slen) + (let lp ((i 0)) + (or (= i plen) + (and (char=? (string-ref prefix i) + (string-ref string i)) + (lp (+ i 1)))))))) + +(define (string-suffix? suffix string) + (let ((slen (string-length suffix)) + (len (string-length string))) + (and (<= slen len) + (let lp ((i (- slen 1)) + (j (- len 1))) + (or (< i 0) + (and (char=? (string-ref suffix i) + (string-ref string j)) + (lp (- i 1) (- j 1)))))))) + +(define skip-whitespace + (let ((non-whitespace (char-set-invert char-set:whitespace))) + (lambda (s) (char-set-index s non-whitespace)))) diff --git a/su-httpd.txt b/su-httpd.txt new file mode 100644 index 0000000..9d7cfa9 --- /dev/null +++ b/su-httpd.txt @@ -0,0 +1,352 @@ +The Scheme Underground Web system +Olin Shivers +7/95 +Additions by Mike Sperber, 10/96 + +The Scheme underground Web system is a package of Scheme code that provides +utilities for interacting with the World-Wide Web. This includes: + + - A Web server. + - URI and URL parsers and un-parsers. + - RFC822-style header parsers. + - Code for performing structured html output + - Code to assist in writing CGI Scheme programs + that can be used by any CGI-compliant HTTP server + (such as NCSA's httpd, or the S.U. Web server). + +The code can be obtained via anonymous ftp and is implemented in Scheme 48, +using the system calls and support procedures of scsh, the Scheme Shell. The +code was written to be clear and modifiable -- it is voluminously commented +and all non-R4RS dependencies are described at the beginning of each source +file. + +I do not have the time to write detailed documentation for these packages. +However, they are very thoroughly commented, and I strongly recommend reading +the source files; they were written to be read, and the source code comments +should provide a clear description of the system. The remainder of this note +gives an overview of the server's basic architecture and interfaces. + + +* The Scheme Underground Web Server +The server was designed with three principle goals in mind: + + - Extensibility + The server is designed to make it easy to extend the basic + functionality. In fact, the server is nothing but extensions. There is + no distinction between the set of basic services provided by the server + implementation and user extensions -- they are both implemented in + Scheme, and have equal status. The design is "turtles all the way down." + + - Mobile code + Because the server is written in Scheme 48, it is simple to use the + Scheme 48 module system to upload programs to the server for safe + execution within a protected, server-chosen environment. The server + comes with a simple example upload service to demonstrate this + capability. + + - Clarity of implementation + Because the server is written in a high-level language, it should make + for a clearer exposition of the HTTP protocol and the associated URL + and URI notations than one written in a low-level language such as C. + This also should help to make the server easy to modify and adapt to + different uses. + + +** Basic server structure + +The Web server is started by calling the HTTPD procedure, which takes +one required and two optional arguments: + + (httpd path-handler [port working-directory]) + +The server accepts connections from the given port, which defaults to 80. +The server runs with the working directory set to the given value, +which defaults to + /usr/local/etc/httpd + +The server's basic loop is to wait on the port for a connection from an HTTP +client. When it receives a connection, it reads in and parses the request into +a special request data structure. Then the server forks a child process, who +binds the current I/O ports to the connection socket, and then hands off to +the top-level path handler (the first argument to httpd). The path-handler +procedure is responsible for actually serving the request -- it can be any +arbitrary computation. Its output goes directly back to the HTTP client that +sent the request. + +Before calling the path handler to service the request, the HTTP server +installs an error handler that fields any uncaught error, sends an +error reply to the client, and aborts the request transaction. Hence +any error caused by a path-handler will be handled in a reasonable and +robust fashion. + +The basic server loop, and the associated request data structure are the fixed +architecture of the S.U. Web server; its flexibility lies in the notion of +path handlers. + + +** Path handlers + +A path handler is a procedure taking two arguments: + + (path-handler path req) + +The REQ argument is a request record giving all the details of the +client's request; it has the following structure: + + (define-record request + method ; A string such as "GET", "PUT", etc. + uri ; The escaped URI string as read from request line. + url ; An http URL record (see url.scm). + version ; A (major . minor) integer pair. + headers ; An rfc822 header alist (see rfc822.scm). + socket) ; The socket connected to the client. + + +The PATH argument is the URL's path, parsed and split at slashes into a string +list. For example, if the Web client dereferences URL + + http://clark.lcs.mit.edu:8001/h/shivers/code/web.tar.gz + +then the server would pass the following path to the top-level handler: + + ("h" "shivers" "code" "web.tar.gz") + +The path argument's pre-parsed representation as a string list makes it easy +for the path handler to implement recursive operations dispatch on URL paths. + +Path handlers can do anything they like to respond to HTTP requests; they have +the full range of Scheme to implement the desired functionality. When +handling HTTP requests that have an associated entity body (such as POST), the +body should be read from the current input port. Path handlers should in all +cases write their reply to the current output port. Path handlers should *not* +perform I/O on the request record's socket. Path handlers are frequently +called recursively, and doing I/O directly to the socket might bypass a +filtering or other processing step interposed on the current I/O ports by some +superior path handler. + + +*** Basic path handlers + +Although the user can write any path-handler he likes, the S.U. server comes +with a useful toolbox of basic path handlers that can be used and built upon: + +(alist-path-dispatcher ph-alist default-ph) -> path-handler + This procedure takes a string->path-handler alist, and a default + path handler, and returns a handler that dispatches on its path argument. + When the new path handler is applied to a path ("foo" "bar" "baz"), + it uses the first element of the path -- "foo" -- to index into + the alist. If it finds an associated path handler in the alist, it + hands the request off to that handler, passing it the tail of the path, + ("bar" "baz"). On the other hand, if the path is empty, or the alist + search does not yield a hit, we hand off to the default path handler, + passing it the entire original path, ("foo" "bar" "baz"). + + This procedure is how you say: "If the first element of the URL's path + is `foo', do X; if it's `bar', do Y; otherwise, do Z." If one takes + an object-oriented view of the process, an alist path-handler does + method lookup on the requested operation, dispatching off to the + appropriate method defined for the URL. + + The slash-delimited URI path structure implies an associated + tree of names. The path-handler system and the alist dispatcher + allow you to procedurally define the server's response to any + arbitrary subtree of the path space. + + Example: + A typical top-level path handler is + + (define ph + (alist-path-dispatcher + `(("h" . ,(home-dir-handler "public_html")) + ("cgi-bin" . ,(cgi-handler "/usr/local/etc/httpd/cgi-bin")) + ("seval" . ,seval-handler)) + (rooted-file-handler "/usr/local/etc/httpd/htdocs"))) + + + This means: + - If the path looks like ("h" "shivers" "code" "web.tar.gz"), + pass the path ("shivers" "code" "web.tar.gz") to a + home-directory path handler. + + - If the path looks like ("cgi-bin" "calendar"), + pass ("calendar") off to the CGI path handler. + + - If the path looks like ("seval" ...), the tail of the path + is passed off to the code-uploading seval path handler. + + - Otherwise, the whole path is passed to a rooted file handler, who + will convert it into a filename, rooted at /usr/local/etc/httpd/htdocs, + and serve that file. + + +(home-dir-handler subdir) -> path-handler + This procedure builds a path handler that does basic file serving + out of home directories. If the resulting path handler is passed + a path of ( . ), then it serves the file + // + The path handler only handles GET requests; the filename is not + allowed to contain .. elements. + + +(tilde-home-dir-handler subdir default-path-handler) -> path-handler + This path handler examines the car of the path. If it is a string + beginning with a tilde, e.g., "~ziggy", then the string is taken to + mean a home directory, and the request is served similarly to a + HOME-DIR-HANDLER path handler. Otherwise, the request is passed off in + its entirety to the default path handler. + + This procedure is useful for implementing servers that provide the + semantics of the NCSA httpd server. + + +(cgi-handler cgi-directory) -> path-handler + This procedure returns a path-handler that passes the request off to some + program using the CGI interface. The script name is taken from the + car of the path; it is checked for occurrences of ..'s. If the path is + ("my-prog" "foo" "bar") + then the program executed is + /my-prog + + When the CGI path handler builds the process environment for the + CGI script, several elements (e.g., $PATH and $SERVER_SOFTWARE) + are request-invariant, and can be computed at server start-up time. + This can be done by calling + (initialise-request-invariant-cgi-env) + when the server starts up. This is *not* necessary, but will make CGI + requests a little faster. + + +(rooted-file-handler root-dir) -> path-handler + Returns a path handler that serves files from a particular root in the + file system. Only the GET operation is provided. The path argument + passed to the handler is converted into a filename, and appended to + ROOT-DIR. The file name is checked for .. components, and the + transaction is aborted if it does. Otherwise, the file is served to the + client. + + +(rooted-file-or-directory-handler root-dir icon-name) -> path-handler + The same as rooted-file-handler, except it can also serve + directory index listings for directories that do not contain a + file index.html. ICON-NAME is an object describing how to get at + the various icons required for generating directory listings. It + uses the icons provided by CERN httpd 3.0. ICON-NAME can either + be a string which is used as a prefix for generating the icon + URLs. If it is a procedure, it should accept an icon tag (read + httpd-handlers.scm for reference) and return an icon name. If it + is neither, it will just use the plain icon name, which is almost + guaranteed not to work. + + +(null-path-handler path req) + This path handler is useful as a default handler. It handles no requests, + always returning a "404 Not found" reply to the client. + + +** HTTP errors + +Authors of path-handlers need to be able to handle errors in a reasonably +simple fashion. The S.U. Web server provides a set of error conditions that +correspond to the error replies in the HTTP protocol. These errors can be +raised with the HTTP-ERROR procedure. When the server runs a path handler, +it runs it in the context of an error handler that catches these errors, +sends an error reply to the client, and closes the transaction. + +(http-error reply-code req [extra ...]) + This raises an http error condition. The reply code is one of the + numeric HTTP error reply codes, which are bound to the variables + HTTP-REPLY/OK, HTTP-REPLY/NOT-FOUND, HTTP-REPLY/BAD-REQUEST, and so + forth. The REQ argument is the request record that caused the error. + Any following EXTRA args are passed along for informational purposes. + Different HTTP errors take different types of extra arguments. For + example, the "301 moved permanently" and "302 moved temporarily" + replies use the first two extra values as the URI: and Location: fields + in the reply header, respectively. See the clauses of the + SEND-HTTP-ERROR-REPLY procedure for details. + +(send-http-error-reply reply-code request [extra ...]) + This procedure writes an error reply out to the current output + port. If an error occurs during this process, it is caught, and + the procedure silently returns. The http server's standard error + handler passes all http errors raised during path-handler execution + to this procedure to generate the error reply before aborting the + request transaction. + + +** Simple directory generation + +Most path-handlers that serve files to clients eventually call an internal +procedure named FILE-SERVE, which implements a simple directory-generation +service using the following rules: + + - If the filename has the *form* of a directory (i.e., it ends with a + slash), then FILE-SERVE actually looks for a file named "index.html" + in that directory. + + - If the filename names a directory, but is not in directory form + (i.e., it doesn't end in a slash, as in "/usr/include" or "/usr/raj"), + then FILE-SERVE sends back a "301 moved permanently" message, + redirecting the client to a slash-terminated version of the original + URL. For example, the URL + http://clark.lcs.mit.edu/~shivers + would be redirected to + http://clark.lcs.mit.edu/~shivers/ + + - If the filename names a regular file, it is served to the client. + + +** Support procs + +The source files contain a host of support procedures which will be of utility +to anyone writing a custom path-handler. Read the files first. + +** Local customization + + The http-core package exports a procedure: + + (set-server/admin! admin-name) + + which allows you to set the name of the site administrator. If you + don't set this, Olin may get unwanted mail and visit + disproportionate violence on you in return. + + There is a procedure exported from the httpd-core package: + + (set-my-fqdn! name) + + Call this to crow-bar the server's idea of its own Internet host + name before running the server, and all will be well. + + You may want this for one of several reasons. On NeXTSTEP and on + systems that do DNS via NIS/Yellow Pages, you only get an + unqualified hostname. Also, in case of aliased names, you just + might get the wrong one. Furthermore, you may get screwed in the + presence of a server accelerator such as Squid. + + There is a similar procedure in httpd-core: + + (set-my-port! portnum) + + Call this to set the local port of your server. This may be + important to get redirection right in the presence of a web server + accelerator. + +** Losing + +Be aware of certain Unix problems which may require workarounds: +1. NeXTSTEP's Posix implementation of the getpwnam() routine + will silently tell you that every user has uid 0. This means + that if your server, running as root, does a + (set-uid (user->uid "nobody")) + it will essentially do a + (set-uid 0) + and you will thus still be running as root. + + The fix is to manually find out who user nobody is (he's -2 on my + system), and to hard-wire this into the server: + (set-uid -2) + This problem is NeXTSTEP specific. If you are not using NeXTSTEP, + no problem. + + + diff --git a/toothless.scm b/toothless.scm new file mode 100644 index 0000000..95ff6c9 --- /dev/null +++ b/toothless.scm @@ -0,0 +1,58 @@ +;;; -*- Scheme -*- +;;; This file defines a Scheme 48 module that is R4RS without features that +;;; could examine or effect the file system. You can also use it +;;; as a model of how to execute code in other protected environments +;;; in S48. +;;; +;;; Copyright (c) 1995 by Olin Shivers. + +(define-structure loser-package (export loser) + (open scheme error-package) + (begin (define (loser name) + (lambda x (error "Illegal call" name))))) + +;;; The toothless structure is R4RS without the dangerous procedures. + +(define-structure toothless (interface-of scheme) + (open scheme loser-package) + (begin + (define call-with-input-file (loser "call-with-input-file")) + (define call-with-output-file (loser "call-with-output-file")) + (define load (loser "load")) + (define open-input-file (loser "open-input-file")) + (define open-output-file (loser "open-output-file")) + (define transcript-on (loser "transcript-on")) + (define with-input-from-file (loser "with-input-from-file")) + (define with-input-to-file (loser "with-input-to-file")) + (define eval (loser "eval")) + (define interaction-environment (loser "interaction-environment")) + (define scheme-report-environment (loser "scheme-report-environment")))) + +;;; (EVAL-SAFELEY exp) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Create a brand new package, import the TOOTHLESS structure, and +;;; evaluate EXP in it. When the evaluation is done, you throw away +;;; the environment, so EXP's side-effects don't persist from one +;;; EVAL-SAFELY call to the next. If EXP raises an error exception, +;;; we abort and return #f. + +(define-structure toothless-eval (export eval-safely) + (open evaluation ; eval + package-commands-internal ; config-package, get-reflective-tower + packages ; structure-package, make-simple-package + environments ; environment-ref + handle ; ignore-errors + scheme) + (access toothless) ; Force it to be loaded. + (begin + + (define toothless-struct (environment-ref (config-package) 'toothless)) + (define toothless-package (structure-package toothless-struct)) + + (define (new-safe-package) + (make-simple-package (list toothless-struct) #t + (get-reflective-tower toothless-package) ; ??? + 'safe-env)) + + (define (eval-safely exp) + (ignore-errors (lambda () (eval exp (new-safe-package))))))) diff --git a/uri.scm b/uri.scm new file mode 100644 index 0000000..0abb714 --- /dev/null +++ b/uri.scm @@ -0,0 +1,278 @@ +;;; -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; URI syntax -- [scheme] : path [? search ] [# fragmentid] + +;;; Imports and non-R4RS'isms +;;; let-optionals +;;; receive values (MV return) +;;; ascii->char char->ascii +;;; index rindex +;;; char-set-index char-set-rindex +;;; string-reduce +;;; char-set package +;;; bitwise logical funs and arithmetic-shift +;;; join-strings (scsh field-reader code.) + + +;;; References: +;;; - ftp://ftp.internic.net/rfc/rfc1630.txt +;;; Original RFC +;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/URI_Overview.html +;;; General Web page of URI pointers. + +;;; I wrote a URI parser that slavishly obeyed Tim Berners-Lee's +;;; spec (rfc 1630). This was a waste of time, as most URL's do not +;;; obey his spec, which is incomplete and inconsistent with the URL spec +;;; in any event. This parser is much simpler. It parses a URI into four +;;; fields: +;;; [ ] : [ ? ] [ # fragid ] +;;; The returned fields are *not* unescaped, as the rules for parsing the +;;; component in particular need unescaped text, and are dependent +;;; on . The URL parser is responsible for doing this. +;;; If the , or portions are not specified, +;;; they are #f. Otherwise, , , and are strings; +;;; is a non-empty string list. + +;;; The parsing technique is inwards from both ends. +;;; - First we search forwards for the first reserved char (= ; / # ? : space) +;;; If it's a colon, then that's the part, otw no part. +;;; Remove it. +;;; - Then we search backwards from the end for the last reserved char. +;;; If it's a sharp, then that's the part -- remove it. +;;; - Then we search backwards from the end for the last reserved char. +;;; If it's a question-mark, then that's the part -- remove it. +;;; - What's left is the path. Split at slashes. "" -> ("") +;;; +;;; This scheme is tolerant of the various ways people build broken URI's +;;; out there on the Net. It was given to me by Dan Connolly of the W3C. + +;;; Returns four values: scheme, path, search, frag-id. +;;; Each value is either #f or a string. + +(define uri-reserved (string->char-set "=;/#?: ")) + +(define (parse-uri s) + (let* ((slen (string-length s)) + ;; Search forwards for colon (or intervening reserved char). + (rs1 (char-set-index s uri-reserved)) ; 1st reserved char + (colon (and rs1 (char=? (string-ref s rs1) #\:) rs1)) + (path-start (if colon (+ colon 1) 0)) + + ;; Search backwards for # (or intervening reserved char). + (rs-last (char-set-rindex s uri-reserved)) + (sharp (and rs-last (char=? (string-ref s rs-last) #\#) rs-last)) + + ;; Search backwards for ? (or intervening reserved char). + (rs-penult (if sharp (char-set-rindex s uri-reserved sharp) rs-last)) + (ques (and rs-penult (char=? (string-ref s rs-penult) #\?) rs-penult)) + + (path-end (or ques sharp slen))) + + (values (and colon (substring s 0 colon)) + (split-uri-path s path-start path-end) + (and ques (substring s (+ ques 1) (or sharp slen))) + (and sharp (substring s (+ sharp 1) slen))))) + +;;; Caution: +;;; Don't use this proc until *after* you've parsed the URL -- unescaping +;;; might introduce reserved chars (like slashes and colons) that could +;;; blow your parse. + +(define (unescape-uri s . maybe-start/end) + (let-optionals maybe-start/end ((start 0) + (end (string-length s))) + (let* ((esc-seq? (lambda (i) (and (< (+ i 2) end) + (char=? (string-ref s i) #\%) + (hex-digit? (string-ref s (+ i 1))) + (hex-digit? (string-ref s (+ i 2)))))) + (hits (let lp ((i start) (hits 0)) ; count # of esc seqs. + (if (< i end) + (if (esc-seq? i) + (lp (+ i 3) (+ hits 1)) + (lp (+ i 1) hits)) + hits)))) + + (if (and (zero? hits) (zero? start) (= end (string-length s))) s + + (let* ((nlen (- (- end start) (* hits 2))) + (ns (make-string nlen))) + + (let lp ((i start) (j 0)) + (if (< j nlen) + (lp (? ((esc-seq? i) + (string-set! ns j + (let ((d1 (string-ref s (+ i 1))) + (d2 (string-ref s (+ i 2)))) + (ascii->char (+ (* 16 (hexchar->int d1)) + (hexchar->int d2))))) + (+ i 3)) + (else (string-set! ns j (string-ref s i)) + (+ i 1))) + (+ j 1)))) + ns))))) + +(define hex-digit? + (let ((hex-digits (string->char-set "0123456789abcdefABCDEF"))) + (lambda (c) (char-set-contains? hex-digits c)))) + +(define (hexchar->int c) + (- (char->ascii c) + (if (char-numeric? c) + (char->ascii #\0) + (- (if (char-upper-case? c) + (char->ascii #\A) + (char->ascii #\a)) + 10)))) + +(define int->hexchar + (let ((table '#(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 + #\A #\B #\C #\D #\E #\F))) + (lambda (i) (vector-ref table i)))) + + +;;; Caution: +;;; All reserved chars (e.g., slash, sharp, colon) get escaped: "=;/#?: " +;;; So don't apply this proc to chunks of text with syntactically meaningful +;;; reserved chars (e.g., paths with URI slashes or colons) -- they'll be +;;; escaped, and lose their special meaning. E.g. it would be a mistake +;;; to apply ESCAPE-URI to "//lcs.mit.edu:8001/foo/bar.html" because the +;;; slashes and colons would be escaped. + +(define uri-escaped-chars + (char-set-invert (char-set-union char-set:alphanumeric + (string->char-set "$-_@.&!*\"'(),+")))) + +;;; Takes a set of chars to escape. This is because we sometimes need to +;;; escape larger sets of chars for different parts of a URI. + +(define (escape-uri s . maybe-escaped-chars) + (let-optionals maybe-escaped-chars ((escaped-chars uri-escaped-chars)) + (let ((nlen (string-reduce 0 + (lambda (c i) + (+ i + (if (char-set-contains? uri-escaped-chars c) + 3 1))) + s))) + (if (= nlen (string-length s)) s + (let ((ns (make-string nlen))) + (string-reduce + 0 + (lambda (c i) + (+ i (? ((char-set-contains? uri-escaped-chars c) + (string-set! ns i #\%) + (let* ((d (char->ascii c)) + (dhi (bitwise-and (arithmetic-shift d -4) #xF)) + (dlo (bitwise-and d #xF))) + (string-set! ns (+ i 1) + (int->hexchar dhi)) + (string-set! ns (+ i 2) + (int->hexchar dlo))) + 3) + (else (string-set! ns i c) + 1)))) + s) + ns))))) + + +;;; Four args: context URI's : values, and +;;; main URI's : values. +;;; If the path cannot be resolved, return #f #f (this occurs if +;;; begins with n sequential slashes, and doesn't +;;; have that many sequential slashes anywhere). All paths are +;;; represented as non-empty lists. + +(define (resolve-uri cscheme cp scheme p) + (if scheme (values scheme p) ; If URI has own , it is absolute. + + (if (and (pair? p) (string=? (car p) "")) ; Path P begins with a slash. + + (receive (numsl p) ; Count and strip off initial + (do ((i 1 (+ i 1)) ; slashes (i.e., initial ""'s) + (q (cdr p) (cdr q))) + ((or (null? q) (not (string=? (car q) ""))) + (values i q))) + + ;; Skip through CP until we find that many sequential /'s. + (let lp ((cp-tail cp) + (rhead '()) ; CP prefix, reversed. + (j 0)) ; J counts sequential / + + (? ((and (pair? cp-tail) (string=? (car cp-tail) "")) ; More ""'s + (lp (cdr cp-tail) + (cons (car cp-tail) rhead) + (+ j 0))) + + ((= j numsl) ; Win + (values cscheme (simplify-uri-path (rev-append rhead p)))) + + ((pair? cp-tail) ; Keep looking. + (lp (cdr cp-tail) + (cons (car cp-tail) rhead) + 1)) + + (else (values #f #f))))) ; Lose. + + + ;; P doesn't begin with a slash. + (values cscheme (simplify-uri-path + (rev-append (cdr (reverse cp)) ; Drop non-dir part + p)))))) ; and append P. + + +(define (rev-append a b) ; (append (reverse a) b) + (let rev-app ((a a) (b b)) ; Should be defined in a list-proc + (if (pair? a) ; package, not here. + (rev-app (cdr a) (cons (car a) b)) + b))) + +;;; Cribbed from scsh's fname.scm + +(define (split-uri-path uri start end) ; Split at /'s (infix grammar). + (let split ((i start)) ; "" -> ("") + (? ((>= i end) '("")) + ((index uri #\/ i) => + (lambda (slash) + (cons (substring uri i slash) + (split (+ slash 1))))) + (else (list (substring uri i end)))))) + + +;;; The elements of PLIST must be escaped in case they contain slashes. +;;; This procedure doesn't escape them for you; you must do that yourself: +;;; (uri-path-list->path (map escape-uri pathlist)) + +(define (uri-path-list->path plist) + (join-strings plist "/")) ; Insert slashes between elts of PLIST. + + +;;; Remove . and foo/.. elts from path. After simplification, there are no +;;; . elements, and the only .. elements occur at the beginning of the path +;;; (i.e., they attempt to back up past root). One could argue that this is +;;; illegal, and we should error out in this case, reporting an unresolvable +;;; URL. The URI "spec" is not even slightly clear on this issue. +;;; +;;; URI's are pathetic. The case of /a/b//../c is ambiguous. Do we +;;; 1) not simplify across multi-slashes? +;;; 2) Flush the "empty" dir, giving /a/b//c +;;; 3) Flush across multi-slashes, giving /a/c +;;; What is the meaning of //../a ? /../b ? /../../c ? + +(define (simplify-uri-path p) ; P must be non-null. + (reverse (let lp ((path-list p) + (ans '())) + (let ((elt (car path-list)) + (path-list (cdr path-list))) + (? ((pair? path-list) + (? ((string=? "." elt) ; Kill . + (lp path-list ans)) + ((string=? ".." elt) + (if (pair? ans) + (lp path-list (cddr ans)) + (lp path-list (cons elt ans)))) + (else + (lp path-list (cons elt ans))))) + ;; Last element of list. + ((string=? ".." elt) + (if (null? ans) '("..") (cddr ans))) + (else (cons elt ans))))))) diff --git a/url.scm b/url.scm new file mode 100644 index 0000000..85ae438 --- /dev/null +++ b/url.scm @@ -0,0 +1,150 @@ +;;; URL parsing and unparsing -*- Scheme -*- +;;; Copyright (c) 1995 by Olin Shivers. + +;;; I'm only implementing http URL's right now. + +;;; References: +;;; - ftp://ftp.internic.net/rfc/rfc1738.txt +;;; Original RFC +;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/Overview.html +;;; General Web page of URI pointers. + + +;;; Unresolved issues: +;;; - The userhost parser shouldn't substitute default values -- +;;; that should happen in a separate step. + +;;; Imports and non-R4RS'isms +;;; define-record Record structures +;;; receive values MV return +;;; URI support +;;; index + +;;; The steps in hacking a URL are: +;;; - Take the UID, parse it, and resolve it with the context UID, if any. +;;; - Consult the UID's . Pick the appropriate URL parser and parse. + + +;;; Userhost strings: //:@:/ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; A USERHOST record describes path-prefixes of the form +;;; //:@:/ +;;; These are frequently used as the initial prefix of URL's describing +;;; Internet resources. + +(define-record userhost ; Each slot is a decoded string or #f. + user + password + host + port) + +;;; Parse a URI path into a userhost record. Default values are taken +;;; from the userhost record DEFAULT. Returns a userhost record if it +;;; wins, and #f if it cannot parse the path. CDDDR drops the userhost +;;; portion of the path. + +(define (parse-userhost path default) + (if (and (pair? path) ; The thing better begin + (string=? (car path) "") ; with // (i.e., have two + (pair? (cdr path)) ; initial "" elements). + (string=? (cadr path) "")) + + (let* ((uhs (caddr path)) ; Userhost string. + (uhs-len (string-length uhs)) + ; Usr:passwd at-sign, + (at (index uhs #\@)) ; if any. + + (colon1 (and at (index uhs #\:))) ; Usr:passwd colon, + (colon1 (and colon1 (< colon1 at) colon1)) ; if any. + + (colon2 (index uhs #\: (or at 0)))) ; Host:port colon, + ; if any. + (make-userhost (if at + (unescape-uri uhs 0 (or colon1 at)) + (userhost:user default)) + (if colon1 + (unescape-uri uhs (+ colon1 1) at) + (userhost:password default)) + (unescape-uri uhs (if at (+ at 1) 0) + (or colon2 uhs-len)) + (if colon2 + (unescape-uri uhs (+ colon2 1) uhs-len) + (userhost:port default)))) + + (fatal-syntax-error "URL must begin with //..." path))) + +;;; Unparser + +(define userhost-escaped-chars + (char-set-union uri-escaped-chars ; @ and : are also special + (string->char-set "@:"))) ; in UH strings. + +(define (userhost->string uh) + (let* ((us (userhost:user uh)) + (pw (userhost:password uh)) + (ho (userhost:host uh)) + (po (userhost:port uh)) + + ;; Encode before assembly in case pieces contain colons or at-signs. + (e (lambda (s) (escape-uri s userhost-escaped-chars))) + + (user/passwd (if us `(,(e us) . ,(if pw `(":" ,(e pw) "@") '("@"))) + '())) + (host/port (if ho `(,(e ho) . ,(if po `(":" ,(e po)) '())) + '()))) + + (apply string-append (append user/passwd host/port)))) + + +;;; HTTP URL parsing +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; The PATH slot of this record is the URL's path split at slashes, +;;; e.g., "foo/bar//baz/" => ("foo" "bar" "" "baz" "") +;;; These elements are in raw, unescaped format. To convert back to +;;; a string, use (uri-path-list->path (map escape-uri pathlist)). + +(define-record http-url + userhost ; Initial //anonymous@clark.lcs.mit.edu:80/ + path ; Rest of path, split at slashes & decoded. + search + frag-id) + +;;; The URI parser maps a string to four parts: +;;; : ? # +;;; , , and are strings; is a non-empty +;;; string list -- the URI's path split at slashes. Optional parts of the +;;; URI, when missing, are specified as #f. If is "http", then the +;;; other three parts can be passed to PARSE-HTTP-URL, which parses them +;;; into a HTTP-URL record (or #f if the string cannot be parsed). All strings +;;; come back from the URI parser encoded. SEARCH and FRAG-ID are left +;;; that way; this parser decodes the path elements. +;;; +;;; Return #f if the URL could not be parsed. + +(define (parse-http-url path search frag-id) + (let ((uh (parse-userhost path default-http-userhost))) + (if (or (userhost:user uh) (userhost:password uh)) + (fatal-syntax-error + "HTTP URL's may not specify a user or password field" path)) + + (make-http-url uh (map unescape-uri (cdddr path)) search frag-id))) + + +;;; Default http port is 80. +(define default-http-userhost (make-userhost #f #f #f "80")) + + +;;; Unparse. + +(define (http-url->string url) + (string-append "http://" + (userhost->string (http-url:userhost url)) + "/" + (uri-path-list->path (map escape-uri (http-url:path url))) + (? ((http-url:search url) => + (lambda (s) (string-append "?" s))) + (else "")) + (? ((http-url:frag-id url) => + (lambda (fi) (string-append "#" fi))) + (else ""))))