scsh-0.5/scsh/glob.scm

269 lines
8.9 KiB
Scheme

;;; Code for processing file names with a glob pattern.
;;; Copyright (c) 1994 by David Albertz (dalbertz@clark.lcs.mit.edu).
;;; Copyright (c) 1994 by Olin Shivers (shivers@clark.lcs.mit.edu).
;;; This code is freely available for use by anyone for any purpose,
;;; so long as you don't charge money for it, remove this notice, or
;;; hold us liable for any results of its use. --enjoy.
;;; Usage: (glob pattern-list)
;;; pattern-list := a list of glob-pattern strings
;;; Return: list of file names (strings)
;;; The files "." and ".." are never returned by glob.
;;; Dot files will only be returned if the first character
;;; of a glob pattern is a ".".
;;; The empty pattern matches nothing.
;;; A pattern beginning with / starts at root; otherwise, we start at cwd.
;;; A pattern ending with / matches only directories, e.g., "/usr/man/man?/"
(define (glob . pattern-list)
;; Expand out braces, and apply GLOB-ONE-PATTERN to all the result patterns.
(apply append
(map glob-one-pattern
(apply append (map glob-remove-braces pattern-list)))))
(define (glob-one-pattern pattern)
(let ((plen (string-length pattern)))
(if (zero? plen) '()
(let ((directories-only? (char=? #\/ (string-ref pattern (- plen 1))))
(patterns (split-file-name pattern))) ; Must be non-null.
(if (equal? "" (car patterns))
(really-glob "" (cdr patterns) directories-only?) ; root
(really-glob "." patterns directories-only?)))))) ; cwd
(define (really-glob root-file patterns directories-only?)
;; This is the heart of the matcher.
(let recur ((file root-file)
(pats patterns)
(sure? #f)) ; True if we are sure this file exists.
(if (pair? pats)
(let ((pat (car pats))
(pats (cdr pats))
(dir (file-name-as-directory file)))
(receive (winners sure?) (glob-subpat file pat)
(apply append (map (lambda (f)
(recur (string-append dir f) pats sure?))
winners))))
;; All done.
(if directories-only?
(if (maybe-isdir? file)
(list (file-name-as-directory file))
'())
(if (or sure? (file-exists? file))
(list file)
'())))))
;;; Return the elts of directory FNAME that match pattern PAT.
;;; If PAT contains no wildcards, we cheat and do not match the
;;; constant pattern against every file in FNAME/; we just
;;; immediately return FNAME/PAT. In this case, we indicate that we
;;; aren't actually sure the file exists by returning a true SURE?
;;; value. Not only does this vastly speed up the matcher, it also
;;; allows us to match the constant patterns "." and "..".
(define (glob-subpat fname pat) ; PAT doesn't contain a slash.
(cond ((string=? pat "") (values '() #t))
((constant-glob? pat)
(values (cons pat '()) #f)) ; Don't check filesys.
(else (let* ((dots? (char=? #\. (string-ref pat 0))) ; Match dot files?
(candidates (maybe-directory-files fname dots?))
(re (make-regexp (glob->regexp pat))))
(values (filter (lambda (f) (regexp-exec re f)) candidates)
#t))))) ; These guys exist for sure.
;;; The initial special-case above isn't really for the fast-path; it's
;;; an obscure and unlikely case. But since we have to check pat[0] for an
;;; initial dot, we have to do the check anyway...
;;; Translate a brace-free glob pattern to a regular expression.
(define (glob->regexp pat)
(let ((pat-len (string-length pat)))
(let lp ((result '(#\^))
(i 0)
(state 'normal))
(if (= i pat-len)
(if (eq? state 'normal)
(list->string (reverse (cons #\$ result)))
(error "Illegal glob pattern" pat))
(let ((c (string-ref pat i))
(i (+ i 1)))
(case state
((char-set)
(lp (cons c result)
i
(if (char=? c #\]) 'normal 'char-set)))
((escape)
(lp (case c
((#\$ #\^ #\. #\+ #\? #\* #\| #\( #\) #\[)
(cons c (cons #\\ result)))
(else (cons c result)))
i
'normal))
;; Normal
(else (case c
((#\\) (lp result i 'escape))
((#\*) (lp (cons #\* (cons #\. result)) i 'normal))
((#\?) (lp (cons #\. result) i 'normal))
((#\[) (lp (cons c result) i 'char-set))
((#\$ #\^ #\. #\+ #\| #\( #\))
(lp (cons c (cons #\\ result)) i 'normal))
(else (lp (cons c result) i 'normal))))))))))
;;; Is the glob pattern free of *'s, ?'s and [...]'s?
(define (constant-glob? pattern)
(let ((patlen (string-length pattern)))
(let lp ((i 0))
(or (= i patlen)
(let ((next-i (+ i 1)))
(case (string-ref pattern i)
((#\\) ; Escape char
(if (= next-i patlen)
(error "Ill-formed glob pattern -- ends in backslash"
pattern)
(lp (+ next-i 1))))
((#\* #\? #\[) #f)
(else (lp next-i))))))))
;;; Make an effort to get the files in the putative directory PATH.
;;; If PATH isn't a directory, or some filesys error happens (such
;;; as a broken symlink, or a permissions problem), don't error out,
;;; just quietly return the empty list.
(define (maybe-directory-files path dotfiles?)
(with-errno-handler ((errno data)
(else '())) ; On any error, return ().
(directory-files path dotfiles?)))
;;; Make an effort to find out if the file is a directory. If there's
;;; any error, return #f.
(define (maybe-isdir? path)
(with-errno-handler ((errno data)
(else #f)) ; On any error, return #f.
(file-directory? path)))
;;; This section of code is responsible for processing the braces in glob
;;; patterns. I.e., "{foo,bar}/*.c" -> ("foo/*.c" "bar/*.c")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (append-suffix strs suffix)
(map (lambda (s) (string-append s suffix)) strs))
(define (cross-append prefixes suffixes)
(apply append (map (lambda (sfx) (append-suffix prefixes sfx)) suffixes)))
;;; Parse a glob pattern into an equivalent series of brace-free patterns.
;;; The pattern starts at START and is terminated by (1) end of string,
;;; (2) an unmatched close brace, or (3) a comma (if COMMA-TERMINATES? is set).
;;; Returns two values:
;;; - the list of patterns
;;; - the string index after the pattern terminates. This points at
;;; the comma or brace if they terminated the scan, since they are
;;; not part of the pattern.
(define (parse-glob-braces pattern start comma-terminates?)
(let ((pattern-len (string-length pattern))
(finish (lambda (prefixes pat)
(append-suffix prefixes (list->string (reverse pat))))))
(let lp ((i start)
(prefixes '(""))
(pat '()))
(if (= i pattern-len)
(values (finish prefixes pat) i)
(let ((c (string-ref pattern i)))
(case c
((#\{)
(let ((prefixes (append-suffix prefixes
(list->string (reverse pat)))))
(receive (pats i)
(parse-comma-sequence pattern (+ i 1))
(lp i (cross-append prefixes pats) '()))))
((#\\)
(let ((i (+ i 1)))
(if (= i pattern-len)
(error "Dangling escape char in glob pattern" pattern)
(lp (+ i 1)
prefixes
(cons (string-ref pattern i) pat)))))
((#\,)
(if comma-terminates?
(values (finish prefixes pat) i)
(lp (+ i 1) prefixes (cons c pat))))
((#\})
(values (finish prefixes pat) i))
(else
(lp (+ i 1) prefixes (cons c pat)))))))))
;;; Parse the internals of a {foo,bar,baz} brace list from a glob pattern.
;;; START is the index of the char following the open brace.
;;; Returns two values:
;;; - an equivalent list of brace-free glob patterns
;;; - the index of the char after the terminating brace
(define (parse-comma-sequence pattern start)
(let ((pattern-len (string-length pattern)))
(let lp ((i start)
(patterns '())) ; The list of comma-separated patterns read.
(if (= i pattern-len)
(error "Glob brace-expression pattern not terminated" pattern)
(receive (pats i) (parse-glob-braces pattern i #t)
(let ((patterns (append patterns pats)))
(if (= i pattern-len)
(error "Unterminated brace in glob pattern" pattern)
(let ((c (string-ref pattern i)))
(case c
((#\})
(values patterns (+ i 1)))
((#\,)
(lp (+ i 1) patterns))
(else
(error "glob parser internal error" pattern i)))))))))))
(define (glob-remove-braces pattern)
(receive (pats i) (parse-glob-braces pattern 0 #f)
(if (= i (string-length pattern)) pats
(error "Unmatched close brace in glob pattern" pattern i))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Convert a string into a glob pattern that matches that string exactly --
;;; in other words, quote the \ * ? [] and {} chars with backslashes.
(define (glob-quote string)
(let lp ((i (- (string-length string) 1))
(result '()))
(if (< i 0) (list->string result)
(lp (- i 1)
(let* ((c (string-ref string i))
(result (cons c result)))
(if (memv c '(#\[ #\] #\* #\? #\{ #\} #\\))
(cons #\\ result)
result))))))