2000-09-26 10:35:26 -04:00
|
|
|
;;; Code to parse information submitted from HTML forms. -*- Scheme -*-
|
|
|
|
;;; Copyright (c) 1995 by Olin Shivers.
|
|
|
|
|
|
|
|
;;; See http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
|
|
|
|
|
|
|
|
;;; Imports and non-R4RS'isms
|
2001-04-27 12:19:34 -04:00
|
|
|
;;; string-index (string srfi)
|
2000-09-26 10:35:26 -04:00
|
|
|
;;; let-optionals (let-opt package)
|
|
|
|
;;; receive (Multiple-value return)
|
|
|
|
;;; unescape-uri
|
|
|
|
;;; map-string (strings package)
|
|
|
|
;;; ? (cond)
|
|
|
|
|
|
|
|
;;; About HTML forms
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; The form's field data are turned into a single string, of the form
|
|
|
|
;;; The form's field data are turned into a single string, of the form
|
|
|
|
;;; name=val&name=val
|
|
|
|
;;; where the <name> and <val> parts are URI encoded to hide their
|
|
|
|
;;; &, =, and + chars, among other things. After URI encoding, the
|
|
|
|
;;; space chars are converted to + chars, just for fun. It is important
|
|
|
|
;;; to encode the spaces this way, because the perfectly general %xx escape
|
|
|
|
;;; mechanism might be insufficiently confusing. This variant encoding is
|
|
|
|
;;; called "form-url encoding."
|
|
|
|
;;;
|
|
|
|
;;; If the form's method is POST,
|
|
|
|
;;; Browser sends the form's field data in the entity block, e.g.,
|
|
|
|
;;; "button=on&ans=yes". The request's Content-type: is application/
|
|
|
|
;;; x-www-form-urlencoded, and the request's Content-length: is the
|
|
|
|
;;; number of bytes in the form data.
|
|
|
|
;;;
|
|
|
|
;;; If the form's method is GET,
|
|
|
|
;;; Browser sends the form's field data in the URL's <search> part.
|
|
|
|
;;; (So the server will pass to the CGI script as $QUERY_STRING,
|
|
|
|
;;; and perhaps also on in argv[]).
|
|
|
|
;;;
|
|
|
|
;;; In either case, the data is "form-url encoded" (as described above).
|
|
|
|
|
|
|
|
;;; Form-query parsing
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Parse "foo=x&bar=y" into (("foo" . "x") ("bar" . "y"))
|
|
|
|
;;; Substrings are plus-decoded and then URI-decoded. This implementation is
|
|
|
|
;;; slightly sleazy as it will successfully parse a string like "a&b=c&d=f"
|
|
|
|
;;; into (("a&b" . "c") ("d" . "f")) without a complaint.
|
|
|
|
|
|
|
|
(define (parse-html-form-query q)
|
|
|
|
(let ((qlen (string-length q)))
|
|
|
|
(let recur ((i 0))
|
2001-04-27 12:19:34 -04:00
|
|
|
(cond
|
|
|
|
((>= i qlen) '())
|
|
|
|
((string-index q #\= i) =>
|
|
|
|
(lambda (j)
|
|
|
|
(let ((k (or (string-index q #\& j) qlen)))
|
|
|
|
(cons (cons (unescape-uri+ q i j)
|
|
|
|
(unescape-uri+ q (+ j 1) k))
|
|
|
|
(recur (+ k 1))))))
|
2000-09-26 10:35:26 -04:00
|
|
|
(else '()))))) ; BOGUS STRING -- Issue a warning.
|
|
|
|
|
|
|
|
|
|
|
|
;;; Map plus characters to spaces, then do URI decoding.
|
|
|
|
(define (unescape-uri+ s . maybe-start/end)
|
|
|
|
(let-optionals maybe-start/end ((start 0)
|
|
|
|
(end (string-length s)))
|
|
|
|
(unescape-uri (string-map (lambda (c) (if (char=? c #\+) #\space c))
|
|
|
|
(if (and (zero? start)
|
|
|
|
(= end (string-length s)))
|
|
|
|
s ; Gratuitous optimisation.
|
|
|
|
(substring s start end))))))
|