569 lines
20 KiB
Scheme
569 lines
20 KiB
Scheme
|
;;; Field and record parsing utilities for scsh.
|
||
|
;;; Copyright (c) 1994 by Olin Shivers.
|
||
|
|
||
|
;;; Notes:
|
||
|
;;; - Comment on the dependencies here...
|
||
|
;;; - Redefine READ-LINE using READ-DELIMITED.
|
||
|
;;; - Awk should deal with case-insensitivity.
|
||
|
;;; - Should I change the field-splitters to return lists? It's the
|
||
|
;;; right thing, and costs nothing in terms of efficiency.
|
||
|
|
||
|
;;; Looping primitives:
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; It is nicer for loops that loop over a bunch of different things
|
||
|
;;; if you can encapsulate the idea of iterating over a data structure
|
||
|
;;; with a
|
||
|
;;; (next-element state) -> elt next-state
|
||
|
;;; (more-elements? state) -? #t/#f
|
||
|
;;; generator/termination-test pair. You can use the generator with REDUCE
|
||
|
;;; to make a list; you can stick it into a loop macro to loop over the
|
||
|
;;; elements. For example, if we had an extensible Yale-loop style loop macro,
|
||
|
;;; we could have a loop clause like
|
||
|
;;;
|
||
|
;;; (loop (for field in-infix-delimited-string ":" path)
|
||
|
;;; (do (display field) (newline)))
|
||
|
;;;
|
||
|
;;; and it would be simple to expand this into code using the generator.
|
||
|
;;; With procedural inlining, you can get pretty optimal loops over data
|
||
|
;;; structures this way.
|
||
|
;;;
|
||
|
;;; As of now, you are forced to parse fields into a buffer, and loop
|
||
|
;;; over that. This is inefficient of time and space. If I ever manage to do
|
||
|
;;; an extensible loop macro for Scheme 48, I'll have to come back to this
|
||
|
;;; package and rethink how to provide this functionality.
|
||
|
|
||
|
;;; Forward-progress guarantees and empty string matches.
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; A loop that pulls text off a string by matching a regexp against
|
||
|
;;; that string can conceivably get stuck in an infinite loop if the
|
||
|
;;; regexp matches the empty string. For example, the regexps
|
||
|
;;; ^, $, .*, foo|[^f]* can all match the empty string.
|
||
|
;;;
|
||
|
;;; The regexp-loop routines in this code are careful to handle this case.
|
||
|
;;; If a regexp matches the empty string, the next search starts, not from
|
||
|
;;; the end of the match (which in the empty string case is also the
|
||
|
;;; beginning -- there's the rub), but from the next character over.
|
||
|
;;; This is the correct behaviour. Regexps match the longest possible
|
||
|
;;; string at a given location, so if the regexp matched the empty string
|
||
|
;;; at location i, then it is guaranteed they could not have matched
|
||
|
;;; a longer pattern starting with character #i. So we can safely begin
|
||
|
;;; our search for the next match at char i+1.
|
||
|
;;;
|
||
|
;;; So every iteration through the loop makes some forward progress,
|
||
|
;;; and the loop is guaranteed to terminate.
|
||
|
;;;
|
||
|
;;; This has the effect you want with field parsing. For example, if you split
|
||
|
;;; a string with the empty pattern, you will explode the string into its
|
||
|
;;; individual characters:
|
||
|
;;; ((suffix-splitter "") "foo") -> #("" "f" "o" "o")
|
||
|
;;; However, even though this boundary case is handled correctly, we don't
|
||
|
;;; recommend using it. Say what you mean -- just use a field splitter:
|
||
|
;;; ((field-splitter ".") "foo") -> #("f" "o" "o")
|
||
|
|
||
|
|
||
|
|
||
|
;;; (join-strings string-list [delimiter grammar]) => string
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; Paste strings together using the delimiter string.
|
||
|
;;;
|
||
|
;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz"
|
||
|
;;;
|
||
|
;;; DELIMITER defaults to a single space " "
|
||
|
;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix.
|
||
|
|
||
|
;;; (join-strings strings [delim grammar])
|
||
|
|
||
|
(define (join-strings strings . args)
|
||
|
(if (pair? strings)
|
||
|
(receive (delim grammar) (parse-optionals args " " 'infix)
|
||
|
(check-arg string? delim join-strings)
|
||
|
(let ((strings (reverse strings)))
|
||
|
(let lp ((strings (cdr strings))
|
||
|
(ans (case grammar
|
||
|
((infix) (list (car strings)))
|
||
|
((suffix) (list (car strings) delim))
|
||
|
(else (error "Illegal grammar" grammar)))))
|
||
|
(if (pair? strings)
|
||
|
(lp (cdr strings)
|
||
|
(cons (car strings) (cons delim ans)))
|
||
|
|
||
|
; All done
|
||
|
(apply string-append ans)))))
|
||
|
|
||
|
"")) ; Special-cased for infix grammar.
|
||
|
|
||
|
;;; FIELD PARSERS
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; This section defines routines to split a string into fields.
|
||
|
;;; You can parse by specifying a pattern that *separates* fields,
|
||
|
;;; a pattern that *terminates* fields, or a pattern that *matches*
|
||
|
;;; fields.
|
||
|
|
||
|
(define (->delim-matcher x)
|
||
|
(if (procedure? x) x ; matcher proc
|
||
|
(let ((re (cond ((regexp? x) x) ; regexp pattern
|
||
|
((string? x) (make-regexp x)) ; regexp string
|
||
|
(else (error "Illegal pattern/parser" x)))))
|
||
|
|
||
|
;; The matcher proc.
|
||
|
(lambda (s i)
|
||
|
(cond ((regexp-exec re s i) =>
|
||
|
(lambda (m) (values (match:start m 0) (match:end m 0))))
|
||
|
(else (values #f #f)))))))
|
||
|
|
||
|
;;; (infix-splitter [re num-fields handle-delim]) -> parser
|
||
|
;;; (suffix-splitter [re num-fields handle-delim]) -> parser
|
||
|
;;; (sloppy-suffix-splitter [re num-fields handle-delim]) -> parser
|
||
|
;;; (field-splitter [re num-fields]) -> parser
|
||
|
;;;
|
||
|
;;; (parser string [start]) -> string-list
|
||
|
|
||
|
(define (make-field-parser-generator default-delim-matcher loop-proc)
|
||
|
;; This is the parser-generator
|
||
|
(lambda args
|
||
|
(receive (delim-spec num-fields handle-delim)
|
||
|
(parse-optionals args default-delim-matcher #f 'trim)
|
||
|
|
||
|
;; Process and error-check the args
|
||
|
(let ((match-delim (->delim-matcher delim-spec))
|
||
|
(cons-field (case handle-delim ; Field is s[i,j).
|
||
|
((trim) ; Delimiter is s[j,k).
|
||
|
(lambda (s i j k fields)
|
||
|
(cons (substring s i j) fields)))
|
||
|
((split)
|
||
|
(lambda (s i j k fields)
|
||
|
(cons (substring s j k)
|
||
|
(cons (substring s i j) fields))))
|
||
|
((concat)
|
||
|
(lambda (s i j k fields)
|
||
|
(cons (substring s i k)
|
||
|
fields)))
|
||
|
(else
|
||
|
(error "Illegal handle-delim spec"
|
||
|
handle-delim)))))
|
||
|
|
||
|
(receive (num-fields nfields-exact?)
|
||
|
(cond ((not num-fields) (values #f #f))
|
||
|
((not (integer? num-fields))
|
||
|
(error "Illegal NUM-FIELDS value" num-fields))
|
||
|
((<= num-fields 0) (values (- num-fields) #f))
|
||
|
(else (values num-fields #t)))
|
||
|
|
||
|
;; This is the parser.
|
||
|
(lambda (s . maybe-start)
|
||
|
(reverse (loop-proc s (optional-arg maybe-start 0)
|
||
|
match-delim cons-field
|
||
|
num-fields nfields-exact?))))))))
|
||
|
|
||
|
(define default-field-matcher (->delim-matcher "[^ \t\n]+"))
|
||
|
|
||
|
;;; (field-splitter [field-spec num-fields])
|
||
|
|
||
|
(define (field-splitter . args)
|
||
|
(receive (field-spec num-fields)
|
||
|
(parse-optionals args default-field-matcher #f)
|
||
|
|
||
|
;; Process and error-check the args
|
||
|
(let ((match-field (->delim-matcher field-spec)))
|
||
|
(receive (num-fields nfields-exact?)
|
||
|
(cond ((not num-fields) (values #f #f))
|
||
|
((not (integer? num-fields))
|
||
|
(error "Illegal NUM-FIELDS value"
|
||
|
field-splitter num-fields))
|
||
|
((<= num-fields 0) (values (- num-fields) #f))
|
||
|
(else (values num-fields #t)))
|
||
|
|
||
|
;; This is the parser procedure.
|
||
|
(lambda (s . maybe-start)
|
||
|
(reverse (fieldspec-field-loop s (optional-arg maybe-start 0)
|
||
|
match-field num-fields nfields-exact?)))))))
|
||
|
|
||
|
|
||
|
;;; These four procedures implement the guts of each parser
|
||
|
;;; (field, infix, suffix, and sloppy-suffix).
|
||
|
;;;
|
||
|
;;; The CONS-FIELD argument is a procedure that parameterises the
|
||
|
;;; HANDLE-DELIM action for the field parser.
|
||
|
;;;
|
||
|
;;; The MATCH-DELIM argument is used to match a delimiter.
|
||
|
;;; (MATCH-DELIM S I) returns two integers [start, end] marking
|
||
|
;;; the next delimiter after index I in string S. If no delimiter is
|
||
|
;;; found, it returns [#f #f].
|
||
|
|
||
|
;;; In the main loop of each parser, the loop variable LAST-NULL? tells if the
|
||
|
;;; previous delimiter-match matched the empty string. If it did, we start our
|
||
|
;;; next delimiter search one character to the right of the match, so we won't
|
||
|
;;; loop forever. This means that an empty delimiter regexp "" simply splits
|
||
|
;;; the string at each character, which is the correct thing to do.
|
||
|
;;;
|
||
|
;;; These routines return the answer as a reversed list.
|
||
|
|
||
|
|
||
|
(define (fieldspec-field-loop s start match-field num-fields nfields-exact?)
|
||
|
(let ((end (string-length s)))
|
||
|
(let lp ((i start) (nfields 0) (fields '()) (last-null? #f))
|
||
|
(let ((j (if last-null? (+ i 1) i)) ; Where to start next delim search.
|
||
|
|
||
|
;; Check to see if we made our quota before returning answer.
|
||
|
(finish-up (lambda ()
|
||
|
(if (and num-fields (< nfields num-fields))
|
||
|
(error "Too few fields in record." num-fields s)
|
||
|
fields))))
|
||
|
|
||
|
(cond ((> j end) (finish-up)) ; We are done. Finish up.
|
||
|
|
||
|
;; Read too many fields. Bomb out.
|
||
|
((and nfields-exact? (> nfields num-fields))
|
||
|
(error "Too many fields in record." num-fields s))
|
||
|
|
||
|
;; Made our lower-bound quota. Quit early.
|
||
|
((and num-fields (= nfields num-fields) (not nfields-exact?))
|
||
|
(if (= i end) fields ; Special case hackery.
|
||
|
(cons (substring s i end) fields)))
|
||
|
|
||
|
;; Match off another field & loop.
|
||
|
(else (receive (m0 m1) (match-field s j)
|
||
|
(if m0 (lp m1 (+ nfields 1)
|
||
|
(cons (substring s m0 m1) fields)
|
||
|
(= m0 m1))
|
||
|
(finish-up))))))))) ; No more matches. Finish up.
|
||
|
|
||
|
|
||
|
(define (infix-field-loop s start match-delim cons-field
|
||
|
num-fields nfields-exact?)
|
||
|
(let ((end (string-length s)))
|
||
|
(if (= start end) '() ; Specially hack empty string.
|
||
|
|
||
|
(let lp ((i start) (nfields 0) (fields '()) (last-null? #f))
|
||
|
(let ((finish-up (lambda ()
|
||
|
;; s[i,end) is the last field. Terminate the loop.
|
||
|
(cond ((and num-fields (< (+ nfields 1) num-fields))
|
||
|
(error "Too few fields in record."
|
||
|
num-fields s))
|
||
|
|
||
|
((and nfields-exact?
|
||
|
(>= nfields num-fields))
|
||
|
(error "Too many fields in record."
|
||
|
num-fields s))
|
||
|
|
||
|
(else
|
||
|
(cons (substring s i end) fields)))))
|
||
|
|
||
|
(j (if last-null? (+ i 1) i))) ; Where to start next search.
|
||
|
|
||
|
(cond
|
||
|
;; If we've read NUM-FIELDS fields, quit early .
|
||
|
((and num-fields (= nfields num-fields))
|
||
|
(if nfields-exact?
|
||
|
(error "Too many fields in record." num-fields s)
|
||
|
(cons (substring s i end) fields)))
|
||
|
|
||
|
|
||
|
((<= j end) ; Match off another field.
|
||
|
(receive (m0 m1) (match-delim s j)
|
||
|
(if m0
|
||
|
(lp m1 (+ nfields 1)
|
||
|
(cons-field s i m0 m1 fields)
|
||
|
(= m0 m1))
|
||
|
(finish-up)))) ; No more delimiters - finish up.
|
||
|
|
||
|
;; We've run off the end of the string. This is a weird
|
||
|
;; boundary case occuring with empty-string delimiters.
|
||
|
(else (finish-up))))))))
|
||
|
|
||
|
|
||
|
|
||
|
;;; Match off an optional initial delimiter,
|
||
|
;;; then jump off to the suffix parser.
|
||
|
|
||
|
(define (sloppy-suffix-field-loop s start match-delim cons-field
|
||
|
num-fields nfields-exact?)
|
||
|
;; If sloppy-suffix, skip an initial delimiter if it's there.
|
||
|
(let ((start (receive (i j) (match-delim s start)
|
||
|
(if (and i (zero? i)) j start))))
|
||
|
(suffix-field-loop s start match-delim cons-field
|
||
|
num-fields nfields-exact?)))
|
||
|
|
||
|
|
||
|
(define (suffix-field-loop s start match-delim cons-field
|
||
|
num-fields nfields-exact?)
|
||
|
(let ((end (string-length s)))
|
||
|
|
||
|
(let lp ((i start) (nfields 0) (fields '()) (last-null? #f))
|
||
|
(let ((j (if last-null? (+ i 1) i))) ; Where to start next delim search.
|
||
|
(cond ((= i end) ; We are done.
|
||
|
(if (and num-fields (< nfields num-fields)) ; Didn't make quota.
|
||
|
(error "Too few fields in record." num-fields s)
|
||
|
fields))
|
||
|
|
||
|
;; Read too many fields. Bomb out.
|
||
|
((and nfields-exact? (= nfields num-fields))
|
||
|
(error "Too many fields in record." num-fields s))
|
||
|
|
||
|
;; Made our lower-bound quota. Quit early.
|
||
|
((and num-fields (= nfields num-fields) (not nfields-exact?))
|
||
|
(cons (substring s i end) fields))
|
||
|
|
||
|
(else ; Match off another field.
|
||
|
(receive (m0 m1) (match-delim s j)
|
||
|
(if m0 (lp m1 (+ nfields 1)
|
||
|
(cons-field s i m0 m1 fields)
|
||
|
(= m0 m1))
|
||
|
(error "Missing field terminator" s)))))))))
|
||
|
|
||
|
|
||
|
;;; Now, build the exported procedures: {infix,suffix,sloppy-suffix}-splitter.
|
||
|
|
||
|
(define default-suffix-matcher (->delim-matcher "[ \t\n]+|$"))
|
||
|
(define default-infix-matcher (->delim-matcher "[ \t\n]+"))
|
||
|
|
||
|
(define infix-splitter
|
||
|
(make-field-parser-generator default-infix-matcher infix-field-loop))
|
||
|
(define suffix-splitter
|
||
|
(make-field-parser-generator default-suffix-matcher suffix-field-loop))
|
||
|
(define sloppy-suffix-splitter
|
||
|
(make-field-parser-generator default-suffix-matcher sloppy-suffix-field-loop))
|
||
|
|
||
|
|
||
|
|
||
|
;;; Delimited readers
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; We repeatedly allocate a buffer and fill it with READ-DELIMITED!
|
||
|
;;; until we hit a delimiter or EOF. Each time through the loop, we
|
||
|
;;; double the total buffer space, so the loop terminates with a log
|
||
|
;;; number of reads, but uses at most double the optimal buffer space.
|
||
|
|
||
|
(define (read-delimited delims . maybe-port)
|
||
|
(let ((smart-substring (lambda (s end)
|
||
|
(if (= end (string-length s)) s
|
||
|
(substring s 0 end))))
|
||
|
(delims (->char-set delims)))
|
||
|
|
||
|
;; BUFLEN is total amount of buffer space allocated to date.
|
||
|
(let lp ((strs '()) (buflen 80) (buf (make-string 80)))
|
||
|
(cond ((apply read-delimited! delims buf maybe-port) =>
|
||
|
(lambda (i)
|
||
|
(if (null? strs) ; Gratuitous optimisation.
|
||
|
(smart-substring buf i)
|
||
|
(apply string-append
|
||
|
(reverse (if (eof-object? i)
|
||
|
strs
|
||
|
(cons (smart-substring buf i)
|
||
|
strs)))))))
|
||
|
|
||
|
(else (lp (cons buf strs)
|
||
|
(+ buflen buflen)
|
||
|
(make-string buflen)))))))
|
||
|
|
||
|
|
||
|
;;; (read-delimited! delims buf [port start end])
|
||
|
|
||
|
(define (read-delimited! delims buf . args) ; [port start end]
|
||
|
(receive (port start end)
|
||
|
(parse-optionals args (current-input-port) 0 (string-length buf))
|
||
|
(check-arg input-port? port read-delimited!)
|
||
|
(let ((delims (->char-set delims)))
|
||
|
; (if (fd-inport? port) ; ???
|
||
|
;
|
||
|
; ;; Handle fdports in C code for speed.
|
||
|
; (receive (err val)
|
||
|
; (%read-delimited-fdport!/errno delims buf port start end)
|
||
|
; (if err
|
||
|
; (errno-error err read-delimited!)
|
||
|
; val))
|
||
|
|
||
|
;; This is the code for other kinds of ports.
|
||
|
(let lp ((i start))
|
||
|
(and (< i end)
|
||
|
(let ((c (peek-char port)))
|
||
|
(if (or (eof-object? c)
|
||
|
(char-set-contains? delims c))
|
||
|
(- i start)
|
||
|
(begin (string-set! buf i (read-char port))
|
||
|
(lp (+ i 1))))))))))
|
||
|
;)
|
||
|
|
||
|
;(define-foreign %read-delimited-fdport!/errno (read_delim (string-desc delims)
|
||
|
; (string-desc buf)
|
||
|
; (desc port) ;???
|
||
|
; (fixnum start)
|
||
|
; (fixnum end))
|
||
|
; desc ; errno or #f
|
||
|
; desc) ; nread or #f or eof-object
|
||
|
|
||
|
(define (skip-char-set cset . maybe-port)
|
||
|
(let ((port (optional-arg maybe-port (current-input-port))))
|
||
|
(let lp ()
|
||
|
(let ((c (peek-char port)))
|
||
|
(cond ((and (char? c) (char-set-contains? cset c))
|
||
|
(read-char port)
|
||
|
(lp))
|
||
|
(else c))))))
|
||
|
|
||
|
|
||
|
|
||
|
;;; Reading records
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
(define default-record-delims (char-set #\newline))
|
||
|
|
||
|
;;; (record-reader [delims elide? handle-delim]) -> reader
|
||
|
;;; (reader [port]) -> string or eof
|
||
|
|
||
|
(define (record-reader . args)
|
||
|
(receive (delims elide? handle-delim)
|
||
|
(parse-optionals args default-record-delims #f 'trim)
|
||
|
(let ((delims (->char-set delims)))
|
||
|
|
||
|
(case handle-delim
|
||
|
((trim) ; TRIM-delimiter reader.
|
||
|
(lambda maybe-port
|
||
|
(let ((s (apply read-delimited delims maybe-port)))
|
||
|
(if (not (eof-object? s))
|
||
|
(if elide?
|
||
|
(apply skip-char-set delims maybe-port) ; Snarf delims.
|
||
|
(apply read-char maybe-port))) ; Just snarf one.
|
||
|
s)))
|
||
|
|
||
|
((concat split) ; CONCAT-delimiter & SPLIT-delimiter reader.
|
||
|
(let ((not-delims (char-set-invert delims)))
|
||
|
(lambda maybe-port
|
||
|
(let ((s (apply read-delimited delims maybe-port)))
|
||
|
(if (eof-object? s) s
|
||
|
(let ((delim (if elide?
|
||
|
(apply read-delimited not-delims maybe-port)
|
||
|
(string (apply read-char maybe-port)))))
|
||
|
(if (eq? handle-delim 'split)
|
||
|
(values s delim)
|
||
|
(if (eof-object? delim) s
|
||
|
(string-append s delim)))))))))
|
||
|
|
||
|
(else
|
||
|
(error "Illegal delimiter-action" handle-delim))))))
|
||
|
|
||
|
|
||
|
;;; {string, char, char-set, char predicate} -> char-set
|
||
|
|
||
|
(define (->char-set x)
|
||
|
(cond ((char-set? x) x)
|
||
|
((string? x) (string->char-set x))
|
||
|
((char? x) (char-set x))
|
||
|
((procedure? x) (predicate->char-set x))
|
||
|
(else (error "->char-set: Not a charset, string, char, or predicate."
|
||
|
x))))
|
||
|
|
||
|
|
||
|
|
||
|
(define blank-line-regexp (make-regexp "^[ \t]*\n$"))
|
||
|
|
||
|
;;; (read-paragraph [port])
|
||
|
(define (read-paragraph . maybe-port)
|
||
|
(let ((port (optional-arg maybe-port (current-input-port))))
|
||
|
|
||
|
;; First, skip all blank lines.
|
||
|
(let lp ()
|
||
|
(let ((line (read-line port #t)))
|
||
|
(cond ((eof-object? line) line)
|
||
|
((regexp-exec blank-line-regexp line) (lp))
|
||
|
|
||
|
;; Then, read in non-blank lines.
|
||
|
(else (let ((lines (let lp ((lines (list line)))
|
||
|
(let ((line (read-line port #t)))
|
||
|
(cond ((or (eof-object? line)
|
||
|
(regexp-exec blank-line-regexp
|
||
|
line))
|
||
|
lines)
|
||
|
(else (lp (cons line lines))))))))
|
||
|
|
||
|
;; Return the paragraph
|
||
|
(apply string-append (reverse lines)))))))))
|
||
|
|
||
|
;;; Reading and parsing records
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; (field-reader [field-parser rec-reader]) -> reader
|
||
|
;;; (reader [port]) -> [raw-record parsed-record] or [eof #()]
|
||
|
;;;
|
||
|
;;; This is the field reader, which is basically just a composition of
|
||
|
;;; RECORD-READER and FIELD-PARSER.
|
||
|
|
||
|
(define default-field-parser (field-splitter))
|
||
|
|
||
|
(define (field-reader . args)
|
||
|
(receive (parser rec-reader)
|
||
|
(parse-optionals args default-field-parser read-line)
|
||
|
(lambda maybe-port
|
||
|
(let ((record (apply rec-reader maybe-port)))
|
||
|
(if (eof-object? record)
|
||
|
(values record '#())
|
||
|
(values record (parser record)))))))
|
||
|
|
||
|
|
||
|
|
||
|
;;; Parse fields by regexp
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; This code parses up a record into fields by matching a regexp specifying
|
||
|
;;; the field against the record. The regexp describes the *field*. In the
|
||
|
;;; other routines, the regexp describes the *delimiters*. They are
|
||
|
;;; complimentary.
|
||
|
|
||
|
;;; Repeatedly do (APPLY PROC M STATE) to generate new state values,
|
||
|
;;; where M is a regexp match structure made from matching against STRING.
|
||
|
|
||
|
;(define (regexp-reduce string start regexp proc . state)
|
||
|
; (let ((end (string-length string))
|
||
|
; (regexp (if (string? regexp)
|
||
|
; (make-regexp regexp)
|
||
|
; regexp)))
|
||
|
;
|
||
|
; (let lp ((i start) (state state) (last-null? #f))
|
||
|
; (let ((j (if last-null? (+ i 1) i)))
|
||
|
; (cond ((and (<= j end) (regexp-exec regexp string j)) =>
|
||
|
; (lambda (m)
|
||
|
; (receive state (apply proc m state)
|
||
|
; (lp (match:end m) state (= (match:start m) (match:end m))))))
|
||
|
; (else (apply values state)))))))
|
||
|
;
|
||
|
;(define (all-regexp-matches regexp string)
|
||
|
; (reverse (regexp-reduce string 0 regexp
|
||
|
; (lambda (m ans) (cons (match:substring m 0) ans))
|
||
|
; '())))
|
||
|
|
||
|
;;; Previously in newports.scm
|
||
|
|
||
|
;;; Read in a line of data. Input is terminated by either a newline or EOF.
|
||
|
;;; The newline is trimmed from the string.
|
||
|
|
||
|
(define (read-line . rest)
|
||
|
(let ((port (if (null? rest) (current-input-port) (car rest))) ; Optional arg
|
||
|
(retain-newline? (and (not (null? rest)) ; parsing.
|
||
|
(not (null? (cdr rest)))
|
||
|
(cadr rest)))
|
||
|
|
||
|
;; S[I] := C. If this overflows S, grow it.
|
||
|
(deposit (lambda (s i c)
|
||
|
(let ((s (if (< i (string-length s)) s
|
||
|
(string-append s s)))) ; doubling hack
|
||
|
(string-set! s i c)
|
||
|
s)))
|
||
|
|
||
|
;; Precisely resize S to size NUMCHARS.
|
||
|
(trim (lambda (s numchars)
|
||
|
(if (= numchars (string-length s)) s
|
||
|
(substring s 0 numchars)))))
|
||
|
|
||
|
(let lp ((s (make-string 81)) (numchars 0))
|
||
|
(let ((c (read-char port)))
|
||
|
(cond ((eof-object? c)
|
||
|
(if (zero? numchars) c
|
||
|
(trim s numchars)))
|
||
|
|
||
|
((char=? c #\newline)
|
||
|
(if retain-newline?
|
||
|
(trim (deposit s numchars c)
|
||
|
(+ numchars 1))
|
||
|
(trim s numchars)))
|
||
|
|
||
|
(else (lp (deposit s numchars c)
|
||
|
(+ numchars 1))))))))
|
||
|
|