documentation added
uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package
This commit is contained in:
parent
2ff53cca7d
commit
5a15d24738
|
@ -253,7 +253,7 @@
|
||||||
;;; but I hand-coded it because it's short, and I didn't want invoke the
|
;;; but I hand-coded it because it's short, and I didn't want invoke the
|
||||||
;;; regexp machinery for something so simple.
|
;;; regexp machinery for something so simple.
|
||||||
|
|
||||||
(define non-whitespace (char-set-invert char-set:whitespace))
|
(define non-whitespace (char-set-complement char-set:whitespace))
|
||||||
|
|
||||||
(define (string->words s)
|
(define (string->words s)
|
||||||
(let recur ((start 0))
|
(let recur ((start 0))
|
||||||
|
|
|
@ -87,7 +87,7 @@
|
||||||
string-prefix?
|
string-prefix?
|
||||||
string-suffix?
|
string-suffix?
|
||||||
trim-spaces)
|
trim-spaces)
|
||||||
(open char-set-package let-opt scheme)
|
(open char-set-lib let-opt scheme)
|
||||||
(files stringhax))
|
(files stringhax))
|
||||||
|
|
||||||
(define-structure uri-package (export parse-uri
|
(define-structure uri-package (export parse-uri
|
||||||
|
@ -105,7 +105,7 @@
|
||||||
condhax
|
condhax
|
||||||
ascii
|
ascii
|
||||||
strings
|
strings
|
||||||
char-set-package
|
char-set-lib
|
||||||
bitwise
|
bitwise
|
||||||
field-reader-package
|
field-reader-package
|
||||||
scheme)
|
scheme)
|
||||||
|
@ -147,7 +147,7 @@
|
||||||
receiving
|
receiving
|
||||||
condhax
|
condhax
|
||||||
string-lib
|
string-lib
|
||||||
char-set-package
|
char-set-lib
|
||||||
uri-package
|
uri-package
|
||||||
scsh-utilities
|
scsh-utilities
|
||||||
httpd-error
|
httpd-error
|
||||||
|
@ -241,7 +241,7 @@
|
||||||
switch-syntax
|
switch-syntax
|
||||||
condhax
|
condhax
|
||||||
strings
|
strings
|
||||||
char-set-package
|
char-set-lib
|
||||||
defrec-package
|
defrec-package
|
||||||
define-record-types
|
define-record-types
|
||||||
handle
|
handle
|
||||||
|
|
19
rfc822.scm
19
rfc822.scm
|
@ -73,16 +73,20 @@
|
||||||
;;; printing these field names out, it looks best if you capitalise
|
;;; printing these field names out, it looks best if you capitalise
|
||||||
;;; them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)).
|
;;; them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)).
|
||||||
;;; - BODY List of strings which are the field's body, e.g.
|
;;; - BODY List of strings which are the field's body, e.g.
|
||||||
;;; ("shivers@lcs.mit.edu"). Each list element is one line from
|
;;; ("shivers@lcs.mit.edu"). Each list element is one line
|
||||||
;;; the field's body, so if the field spreads out over three lines,
|
;;; from the field's body, so if the field spreads out
|
||||||
;;; then the body is a list of three strings. The terminating
|
;;; over three lines, then the body is a list of three
|
||||||
;;; cr/lf's are trimmed from each string.
|
;;; strings. The terminating cr/lf's are trimmed from each
|
||||||
|
;;; string. A leading space or a leading horizontal tab
|
||||||
|
;;; is also trimmed, but one and only one.
|
||||||
;;; When there are no more fields -- EOF or a blank line has terminated the
|
;;; When there are no more fields -- EOF or a blank line has terminated the
|
||||||
;;; header section -- then the procedure returns [#f #f].
|
;;; header section -- then the procedure returns [#f #f].
|
||||||
;;;
|
;;;
|
||||||
;;; The %READ-RFC822-FIELD variant allows you to specify your own read-line
|
;;; The %READ-RFC822-FIELD variant allows you to specify your own
|
||||||
;;; procedure. The one used by READ-RFC822-FIELD terminates lines with either
|
;;; read-line procedure. The one used by READ-RFC822-FIELD terminates
|
||||||
;;; cr/lf or just lf, and it trims the terminator from the line.
|
;;; lines with either cr/lf or just lf, and it trims the terminator
|
||||||
|
;;; from the line. Your read-line procedure should trim the terminator
|
||||||
|
;;; of a line so an empty line is returned just as an empty string.
|
||||||
|
|
||||||
(define htab (ascii->char 9))
|
(define htab (ascii->char 9))
|
||||||
|
|
||||||
|
@ -113,6 +117,7 @@
|
||||||
(+ colon 1)
|
(+ colon 1)
|
||||||
(string-length line1)))))
|
(string-length line1)))))
|
||||||
(let ((c (peek-char port))) ; Could return EOF.
|
(let ((c (peek-char port))) ; Could return EOF.
|
||||||
|
;;; RFC822: continuous lines has to start with a space or a htab
|
||||||
(if (or (eqv? c #\space) (eqv? c htab))
|
(if (or (eqv? c #\space) (eqv? c htab))
|
||||||
(lp (cons (read-line port) lines))
|
(lp (cons (read-line port) lines))
|
||||||
(values name (reverse lines))))))))
|
(values name (reverse lines))))))))
|
||||||
|
|
41
uri.scm
41
uri.scm
|
@ -47,8 +47,9 @@
|
||||||
;;; This scheme is tolerant of the various ways people build broken URI's
|
;;; This scheme is tolerant of the various ways people build broken URI's
|
||||||
;;; out there on the Net. It was given to me by Dan Connolly of the W3C.
|
;;; out there on the Net. It was given to me by Dan Connolly of the W3C.
|
||||||
|
|
||||||
;;; Returns four values: scheme, path, search, frag-id.
|
;;; Returns four values: scheme, path, search, frag-id. Each value is
|
||||||
;;; Each value is either #f or a string.
|
;;; either #f or a string except of the path, which is a nonempty list
|
||||||
|
;;; of string (as mentioned above).
|
||||||
|
|
||||||
|
|
||||||
;;; MG: I think including = here will break up things, since it may be
|
;;; MG: I think including = here will break up things, since it may be
|
||||||
|
@ -98,19 +99,23 @@
|
||||||
|
|
||||||
(if (and (zero? hits) (zero? start) (= end (string-length s))) s
|
(if (and (zero? hits) (zero? start) (= end (string-length s))) s
|
||||||
|
|
||||||
(let* ((nlen (- (- end start) (* hits 2)))
|
(let* ((nlen (- (- end start) (* hits 2))) ; the new
|
||||||
(ns (make-string nlen)))
|
; length of the
|
||||||
|
; unescaped
|
||||||
|
; string
|
||||||
|
(ns (make-string nlen))) ; the result
|
||||||
|
|
||||||
(let lp ((i start) (j 0))
|
(let lp ((i start) (j 0)) ; sweap over the string
|
||||||
(if (< j nlen)
|
(if (< j nlen)
|
||||||
(lp (? ((esc-seq? i)
|
(lp (? ((esc-seq? i) ; unescape
|
||||||
|
; escape-sequence
|
||||||
(string-set! ns j
|
(string-set! ns j
|
||||||
(let ((d1 (string-ref s (+ i 1)))
|
(let ((d1 (string-ref s (+ i 1)))
|
||||||
(d2 (string-ref s (+ i 2))))
|
(d2 (string-ref s (+ i 2))))
|
||||||
(ascii->char (+ (* 16 (hexchar->int d1))
|
(ascii->char (+ (* 16 (hexchar->int d1))
|
||||||
(hexchar->int d2)))))
|
(hexchar->int d2)))))
|
||||||
(+ i 3))
|
(+ i 3))
|
||||||
(else (string-set! ns j (string-ref s i))
|
(else (string-set! ns j (string-ref s i))
|
||||||
(+ i 1)))
|
(+ i 1)))
|
||||||
(+ j 1))))
|
(+ j 1))))
|
||||||
ns)))))
|
ns)))))
|
||||||
|
@ -119,10 +124,11 @@
|
||||||
(let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))
|
(let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))
|
||||||
(lambda (c) (char-set-contains? hex-digits c))))
|
(lambda (c) (char-set-contains? hex-digits c))))
|
||||||
|
|
||||||
(define (hexchar->int c)
|
; make use of the fact that numbers and characters are in order in the ascii table
|
||||||
|
(define (hexchar->int c)
|
||||||
(- (char->ascii c)
|
(- (char->ascii c)
|
||||||
(if (char-numeric? c)
|
(if (char-numeric? c)
|
||||||
(char->ascii #\0)
|
(char->ascii #\0)
|
||||||
(- (if (char-upper-case? c)
|
(- (if (char-upper-case? c)
|
||||||
(char->ascii #\A)
|
(char->ascii #\A)
|
||||||
(char->ascii #\a))
|
(char->ascii #\a))
|
||||||
|
@ -143,8 +149,8 @@
|
||||||
;;; slashes and colons would be escaped.
|
;;; slashes and colons would be escaped.
|
||||||
|
|
||||||
(define uri-escaped-chars
|
(define uri-escaped-chars
|
||||||
(char-set-invert (char-set-union char-set:alphanumeric
|
(char-set-union (char-set-complement char-set:letter+digit)
|
||||||
(string->char-set "$-_@.&!*\"'(),+"))))
|
(string->char-set "$-_@.&!*\"'(),+")))
|
||||||
|
|
||||||
;;; Takes a set of chars to escape. This is because we sometimes need to
|
;;; Takes a set of chars to escape. This is because we sometimes need to
|
||||||
;;; escape larger sets of chars for different parts of a URI.
|
;;; escape larger sets of chars for different parts of a URI.
|
||||||
|
@ -154,15 +160,18 @@
|
||||||
(let ((nlen (string-reduce 0
|
(let ((nlen (string-reduce 0
|
||||||
(lambda (c i)
|
(lambda (c i)
|
||||||
(+ i
|
(+ i
|
||||||
(if (char-set-contains? uri-escaped-chars c)
|
(if (char-set-contains? escaped-chars c)
|
||||||
3 1)))
|
3 1)))
|
||||||
s)))
|
s))) ; new length of escaped string
|
||||||
(if (= nlen (string-length s)) s
|
(if (= nlen (string-length s)) s
|
||||||
(let ((ns (make-string nlen)))
|
(let ((ns (make-string nlen)))
|
||||||
(string-reduce
|
(string-reduce
|
||||||
0
|
0
|
||||||
(lambda (c i)
|
(lambda (c i) ; replace each occurance of an
|
||||||
(+ i (? ((char-set-contains? uri-escaped-chars c)
|
; character to escape with %ff where ff
|
||||||
|
; is the ascii-code in hexadecimal
|
||||||
|
; notation
|
||||||
|
(+ i (? ((char-set-contains? escaped-chars c)
|
||||||
(string-set! ns i #\%)
|
(string-set! ns i #\%)
|
||||||
(let* ((d (char->ascii c))
|
(let* ((d (char->ascii c))
|
||||||
(dhi (bitwise-and (arithmetic-shift d -4) #xF))
|
(dhi (bitwise-and (arithmetic-shift d -4) #xF))
|
||||||
|
|
12
url.scm
12
url.scm
|
@ -38,10 +38,12 @@
|
||||||
host
|
host
|
||||||
port)
|
port)
|
||||||
|
|
||||||
;;; Parse a URI path into a userhost record. Default values are taken
|
;;; Parse a URI path (a list representing a path, not a string!) into
|
||||||
;;; from the userhost record DEFAULT. Returns a userhost record if it
|
;;; a userhost record. Default values are taken from the userhost
|
||||||
;;; wins, and #f if it cannot parse the path. CDDDR drops the userhost
|
;;; record DEFAULT except for the host. Returns a userhost record if
|
||||||
;;; portion of the path.
|
;;; it wins, and #f if it cannot parse the path. CADDR drops the
|
||||||
|
;;; userhost portion of the path. In fact, fatal-syntax-error is
|
||||||
|
;;; called, if the path doesn't start with '//'.
|
||||||
|
|
||||||
(define (parse-userhost path default)
|
(define (parse-userhost path default)
|
||||||
(if (and (pair? path) ; The thing better begin
|
(if (and (pair? path) ; The thing better begin
|
||||||
|
@ -110,7 +112,7 @@
|
||||||
search
|
search
|
||||||
frag-id)
|
frag-id)
|
||||||
|
|
||||||
;;; The URI parser maps a string to four parts:
|
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
|
||||||
;;; <scheme> : <path> ? <search> # <frag-id>
|
;;; <scheme> : <path> ? <search> # <frag-id>
|
||||||
;;; <scheme>, <search>, and <frag-id> are strings; <path> is a non-empty
|
;;; <scheme>, <search>, and <frag-id> are strings; <path> is a non-empty
|
||||||
;;; string list -- the URI's path split at slashes. Optional parts of the
|
;;; string list -- the URI's path split at slashes. Optional parts of the
|
||||||
|
|
Loading…
Reference in New Issue