Complete and up-to-date documentation for the RFC822 library.
This commit is contained in:
		
							parent
							
								
									9b11ac1572
								
							
						
					
					
						commit
						1b31924b80
					
				|  | @ -1,104 +1,73 @@ | |||
| \chapter{Handle RFC822 headers}\label{cha:rfc822} | ||||
| \chapter{RFC~822 Library}\label{cha:rfc822} | ||||
| % | ||||
| \begin{description} | ||||
| \item[Used files:] rfc822.scm | ||||
| \item[Name of the package:] rfc822 | ||||
| \end{description} | ||||
| % | ||||
| \section{What users want to know} | ||||
| The \ex{rfc822} structure provides rudimentary support for parsing | ||||
| headers according to RFC 822 \textit{Standard for the format of ARPA | ||||
|   Internet text messages}.  These headers show up in SMTP messages, | ||||
| HTTP headers, etc. | ||||
| 
 | ||||
| \section{A note on line-terminators} | ||||
| Line-terminating sequences are always a drag, because there's no | ||||
| agreement on them -- the Net protocols and DOS use | ||||
| carriage-return/line-feed (\ex{cr}/\ex{lf}); Unix uses \ex{lf}; the | ||||
| Mac uses \ex{cr}. One one hand, you'd like to use the code for all of | ||||
| the above, on the other, you'd also like to use the code for strict | ||||
| applications that need definitely not to recognise bare \ex{cr}'s or | ||||
| \ex{lf}'s as terminators. | ||||
| 
 | ||||
| RFC 822 requires a \ex{cr}/\ex{lf} (carriage-return/line-feed) pair to | ||||
| terminate lines of text. On the other hand, careful perusal of the | ||||
| text shows up some ambiguities (there are maybe three or four of | ||||
| these, and I'm too lazy to write them all down). Furthermore, it is an | ||||
| unfortunate fact that many Unix apps separate lines of RFC~822 text | ||||
| with simple linefeeds (e.g., messages kept in \ex{/usr/spool/mail}). | ||||
| As a result, this code takes a broad-minded view of line-terminators: | ||||
| lines can be terminated by either \ex{cr}/\ex{lf} or just \ex{lf}, and | ||||
| either terminating sequence is trimmed. | ||||
| 
 | ||||
| If you need stricter parsing, you can call the lower-level procedure | ||||
| \ex{\%read\=rfc822\=field} and \ex{\%read\=rfc822\=headers}. They take | ||||
| the read-line procedure as an extra parameter. This means that you can | ||||
| pass in a procedure that recognises only \ex{cr}/\ex{lf}'s, or only | ||||
| \ex{cr}'s (for a Mac app, perhaps), and you can determine whether or | ||||
| not the terminators get trimmed. However, your read-line procedure | ||||
| must indicate the header-terminating empty line by returning \emph{either} | ||||
| the empty string or the two-char string \ex{cr}/\ex{lf} (or the EOF object). | ||||
| 
 | ||||
| \section{Description of the procedures} | ||||
| 
 | ||||
| \defun{read-rfc822-field} {\ovar{port}} {name body} | ||||
| \begin{defundescx}{\%read-rfc822-field } {read-line port} {name body} | ||||
| \defun{read-rfc822-field} {[port] [read-line]} {name body} | ||||
| \begin{desc} | ||||
|    | ||||
|   Read one field from the port, and return two values: | ||||
| 
 | ||||
|   \begin{description} | ||||
|   \item[\var{name}] Symbol such as \ex{'subject} or \ex{'to}. The | ||||
|     field name is converted to a symbol using the Scheme | ||||
|     implementation's preferred case. If the implementation reads | ||||
|     symbols in a case-sensitive fashion (e.g., scsh), lowercase is | ||||
|     used. This means you can compare these symbols to quoted constants | ||||
|     using \ex{eq?}. When printing these field names out, it looks best | ||||
|     if you capitalize them with \ex{(capitalize\=string (symbol->string field\=name))}. | ||||
|      | ||||
|   \item[\var{body}] List of strings which are the field's body, e.g. | ||||
|     (``shivers\discretionary{@}{}{@}lcs.mit.edu''). Each list element is one line from | ||||
|     the field's body, so if the field spreads out over three lines, | ||||
|     then the body is a list of three strings. The terminating | ||||
|     \ex{cr}/\ex{lf}'s are trimmed from each string. A leading space or | ||||
|     a leading horizontal tab is also trimmed, but one and onyl one. | ||||
|   \item[\var{name}] This is a symbol describing the RFC 822 field | ||||
|     name, such as \ex{subject} or \ex{to}.  The symbol consists of all | ||||
|     lower-case letters.\footnote{In fact, it \ex{read-rfc822-field} | ||||
|       uses the preferred case for symbols of the underlying Scheme | ||||
|       implementation which, in the case of scsh, happens to be lower-case.} | ||||
|   \item[\var{body}] This is list of strings which are the field's | ||||
|     body, e.g.  Each list element is one line from the field's body, | ||||
|     so if the field spreads out over three lines, then the body is a | ||||
|     list of three strings. The terminating \ex{cr}/\ex{lf}'s are | ||||
|     trimmed from each string.  Note that header bodies frequently contain | ||||
|     space after the colon like this: | ||||
|     % | ||||
| \begin{verbatim} | ||||
| Subject: RFC 822 can format itself in the ARPA | ||||
| \end{verbatim} | ||||
|     % | ||||
|     In this case, \var{body} will be | ||||
| \begin{verbatim} | ||||
| (" RFC 822 can format itself in the ARPA") | ||||
| \end{verbatim} | ||||
|   \end{description} | ||||
|      | ||||
|   When there are no more fields -- EOF or a blank line has terminated | ||||
|   the header section -- then the procedure returns [\sharpf\ \sharpf]. | ||||
|   | ||||
|   The \ex{\%read-rfc822-field} variant allows you to specify your own | ||||
|   read-line procedure. The one used by \ex{read-rfc822-field} | ||||
|   terminates lines with either \ex{cr}/\ex{lf} or just \ex{lf}, and it | ||||
|   trims the terminator from the line. Your read-line procedure should | ||||
|   trim the terminator of the line, so an empty line is returned as an | ||||
|   empty string. | ||||
|    | ||||
|   The procedures raise an error if the syntax of the read field (the | ||||
|   line returned by the read-line-function) is illegal (regarding | ||||
|   RFC~822). | ||||
| \end{defundescx} | ||||
|   % | ||||
|   When there are no more fields---EOF or a blank line has terminated | ||||
|   the header section---then \ex{read-rfc822-field} returns [\sharpf\ \sharpf]. | ||||
| 
 | ||||
| \defun{read-rfc822-headers} {\ovar{port}} {association list} | ||||
| \begin{defundescx}{\%read-rfc822-headers} {read-line port} | ||||
|   {association list} | ||||
|   \var{Port} is an optional input port to read from---it defaults to | ||||
|   the value of \ex{(current-input-port)}. | ||||
|    | ||||
|   Read in and parse up a section of text that looks like the header | ||||
|   portion of an RFC~822 message. Return an association list mapping a | ||||
|   field name (a symbol such as 'date or 'subject) to a list of field | ||||
|   bodies -- one for each occurence of the field in the header. So if | ||||
|   there are five ``Received-by:'' fields in the header, the alist maps | ||||
|   'received-by to a five element list. Each body is in turn | ||||
|   represented by a list of strings -- one for each line of the field. | ||||
|   So a field spread across three lines would produce a three element | ||||
|   body. | ||||
|   \var{Read-line} is an optional parameter specifying a procedure of | ||||
|   one argument (the input port) used to read the raw header lines. | ||||
|   The default used by \ex{read-rfc822-field} terminates lines with | ||||
|   either \ex{cr}/\ex{lf} or just \ex{lf}, and it trims the terminator | ||||
|   from the line.  This procedure should trim the terminator of the | ||||
|   line, so an empty line is returned as an empty string. | ||||
|    | ||||
|   The \ex{\%read-rfc822-headers} variant allows you to specify your | ||||
|   own read-line procedure. See \emph{A note on line-terminators} above | ||||
|   for reasons why. | ||||
|   The procedure raises an error if the syntax of the read field (the | ||||
|   line returned by the read-line-function) is illegal according to | ||||
|   RFC~822. | ||||
| \end{desc} | ||||
| 
 | ||||
|   Hint: If you want to get familiar with these procedures, you might | ||||
|   find \ex{make\=string\=input\=port}, that makes a port out of a | ||||
|   string, helpful. | ||||
| \end{defundescx} | ||||
| \defun{read-rfc822-headers} {[port] [read-line]} {association-list} | ||||
| \begin{desc} | ||||
|   This procedure reads in and parses a section of text that looks like | ||||
|   the header portion of an RFC~822 message.  It returns an association | ||||
|   list mapping a field name (a symbol such as 'date or 'subject) to a | ||||
|   list of field bodies----one for each occurence of the field in the | ||||
|   header. So if there are five \ex{Received-by} fields in the header, | ||||
|   the alist maps \ex{received-by} to a five-element list. Each body is | ||||
|   in turn represented by a list of strings----one for each line of the | ||||
|   field.  So a field spread across three lines would produce a | ||||
|   three-element body. | ||||
|    | ||||
|   \var{Port} and \var{read-line} are as with \ex{read-rfc822-field}. | ||||
| \end{desc} | ||||
| 
 | ||||
| \begin{defundesc}{rejoin-header-lines} {alist \ovar{seperator}} | ||||
|   {association list} | ||||
| \defun{rejoin-header-lines} {alist [seperator]}  {association list} | ||||
| \begin{desc} | ||||
|    | ||||
|   Takes a field \var{alist} such as is returned by | ||||
|   \ex{read-rfc822-headers} and returns an equivalent association list. | ||||
|  | @ -110,8 +79,8 @@ the empty string or the two-char string \ex{cr}/\ex{lf} (or the EOF object). | |||
|    | ||||
|   To rejoin a single body list, use scsh's \ex{join-strings} | ||||
|   procedure. | ||||
| \end{defundesc} | ||||
| 
 | ||||
| \end{desc} | ||||
| % | ||||
| For the following definitions' examples, let's use this set of of | ||||
| RFC~822 headers: | ||||
| \begin{alltt} | ||||
|  | @ -122,51 +91,37 @@ RFC~822 headers: | |||
| \end{alltt} | ||||
| % | ||||
| 
 | ||||
| \begin{defundesc}{get-header-all} {headers name} {string list list} | ||||
| \defun{get-header-all} {headers name} {string list list} | ||||
| \begin{desc} | ||||
|   Returns all entries or \sharpf, e.g.\ | ||||
|   \codex{(get-header-all hdrs 'to)} | ||||
|   results to | ||||
|   returns | ||||
|   \codex{'((" ziggy," "  newts") (" gjs, tk"))} | ||||
| \end{defundesc} | ||||
| \end{desc} | ||||
| 
 | ||||
| \begin{defundesc}{get-header-lines} {headers name} {string list} | ||||
| \defun{get-header-lines} {headers name} {string list} | ||||
| \begin{desc} | ||||
|   Returns all lines of the first entry or \sharpf, e.g.\ | ||||
|   \codex{(get-header-lines hdrs 'to)} | ||||
|   results to | ||||
|   \codex{'(" ziggy," "  newts")} | ||||
| \end{defundesc} | ||||
|   returns | ||||
|   \codex{(" ziggy," "  newts")} | ||||
| \end{desc} | ||||
| 
 | ||||
| \begin{defundesc}{get-headers} {headers name \ovar{seperator}} {string} | ||||
| \defun{get-header} {headers name [separator]} {string} | ||||
| \begin{desc} | ||||
|   Returns the first entry with the lines joined together by seperator | ||||
|   (newline by default), e.g.\ | ||||
|   \codex{(get-header hdrs 'to)} | ||||
|   results to | ||||
|   returns | ||||
| \begin{alltt} | ||||
| " ziggy, | ||||
|   newts" | ||||
| \end{alltt} | ||||
| % | ||||
|   Note, that \ex{newts} is led by two spaces. | ||||
| \end{defundesc} | ||||
| 
 | ||||
| 
 | ||||
| \begin{defundesc}{string->symbol-pref}{string}{symbol} | ||||
|   Takes a \string and converts it to a symbol using the Scheme | ||||
|   implementation's preferred case. (The preferred case is recognized by | ||||
|   a doing once a \ex{symbol->string} conversion of \ex{'a}.) | ||||
| \end{defundesc} | ||||
| 
 | ||||
| \section{Desireable functionalities} | ||||
| 
 | ||||
| \begin{itemize} | ||||
| \item Unfolding long lines. | ||||
| \item Lexing structured fields. | ||||
| \item Unlexing structured fields into canonical form. | ||||
| \item Parsing and unparsing dates. | ||||
| \item Parsing and unparsing addresses. | ||||
| \end{itemize} | ||||
| \end{desc} | ||||
| 
 | ||||
| %%% Local Variables:  | ||||
| %%% mode: latex | ||||
| %%% TeX-master: man.tex | ||||
| %%% TeX-master: "man" | ||||
| %%% End:  | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 sperber
						sperber