;;; Code for processing file names with a glob pattern. ;;; Copyright (c) 1994 by David Albertz (dalbertz@clark.lcs.mit.edu). ;;; Copyright (c) 1994 by Olin Shivers (shivers@clark.lcs.mit.edu). ;;; This code is freely available for use by anyone for any purpose, ;;; so long as you don't charge money for it, remove this notice, or ;;; hold us liable for any results of its use. --enjoy. ;;; Usage: (glob pattern-list) ;;; pattern-list := a list of glob-pattern strings ;;; Return: list of file names (strings) ;;; The files "." and ".." are never returned by glob. ;;; Dot files will only be returned if the first character ;;; of a glob pattern is a ".". ;;; The empty pattern matches nothing. ;;; A pattern beginning with / starts at root; otherwise, we start at cwd. ;;; A pattern ending with / matches only directories, e.g., "/usr/man/man?/" (define (glob . pattern-list) ;; Expand out braces, and apply GLOB-ONE-PATTERN to all the result patterns. (apply append (map glob-one-pattern (apply append (map glob-remove-braces pattern-list))))) (define (glob-one-pattern pattern) (let ((plen (string-length pattern))) (if (zero? plen) '() (let ((directories-only? (char=? #\/ (string-ref pattern (- plen 1)))) (patterns (split-file-name pattern))) ; Must be non-null. (if (equal? "" (car patterns)) (really-glob "" (cdr patterns) directories-only?) ; root (really-glob "." patterns directories-only?)))))) ; cwd (define (really-glob root-file patterns directories-only?) ;; This is the heart of the matcher. (let recur ((file root-file) (pats patterns) (sure? #f)) ; True if we are sure this file exists. (if (pair? pats) (let ((pat (car pats)) (pats (cdr pats)) (dir (file-name-as-directory file))) (receive (winners sure?) (glob-subpat file pat) (apply append (map (lambda (f) (recur (string-append dir f) pats sure?)) winners)))) ;; All done. (if directories-only? (if (maybe-isdir? file) (list (file-name-as-directory file)) '()) (if (or sure? (file-exists? file)) (list file) '()))))) ;;; Return the elts of directory FNAME that match pattern PAT. ;;; If PAT contains no wildcards, we cheat and do not match the ;;; constant pattern against every file in FNAME/; we just ;;; immediately return FNAME/PAT. In this case, we indicate that we ;;; aren't actually sure the file exists by returning a true SURE? ;;; value. Not only does this vastly speed up the matcher, it also ;;; allows us to match the constant patterns "." and "..". (define (glob-subpat fname pat) ; PAT doesn't contain a slash. (cond ((string=? pat "") (values '() #t)) ((constant-glob? pat) (values (cons pat '()) #f)) ; Don't check filesys. (else (let* ((dots? (char=? #\. (string-ref pat 0))) ; Match dot files? (candidates (maybe-directory-files fname dots?)) (re (make-regexp (glob->regexp pat)))) (values (filter (lambda (f) (regexp-exec re f)) candidates) #t))))) ; These guys exist for sure. ;;; The initial special-case above isn't really for the fast-path; it's ;;; an obscure and unlikely case. But since we have to check pat[0] for an ;;; initial dot, we have to do the check anyway... ;;; Translate a brace-free glob pattern to a regular expression. (define (glob->regexp pat) (let ((pat-len (string-length pat))) (let lp ((result '(#\^)) (i 0) (state 'normal)) (if (= i pat-len) (if (eq? state 'normal) (list->string (reverse (cons #\$ result))) (error "Illegal glob pattern" pat)) (let ((c (string-ref pat i)) (i (+ i 1))) (case state ((char-set) (lp (cons c result) i (if (char=? c #\]) 'normal 'char-set))) ((escape) (lp (case c ((#\$ #\^ #\. #\+ #\? #\* #\| #\( #\) #\[) (cons c (cons #\\ result))) (else (cons c result))) i 'normal)) ;; Normal (else (case c ((#\\) (lp result i 'escape)) ((#\*) (lp (cons #\* (cons #\. result)) i 'normal)) ((#\?) (lp (cons #\. result) i 'normal)) ((#\[) (lp (cons c result) i 'char-set)) ((#\$ #\^ #\. #\+ #\| #\( #\)) (lp (cons c (cons #\\ result)) i 'normal)) (else (lp (cons c result) i 'normal)))))))))) ;;; Is the glob pattern free of *'s, ?'s and [...]'s? (define (constant-glob? pattern) (let ((patlen (string-length pattern))) (let lp ((i 0) (escape? #f)) ; Was last char an escape char (backslash)? (if (= i patlen) (if escape? (error "Ill-formed glob pattern" pattern) #t) (let ((next-i (+ i 1))) (if escape? (lp next-i #f) (case (string-ref pattern i) ((#\* #\? #\[) #f) ((#\\) (lp next-i #t)) (else (lp next-i #f))) (lp next-i #f))))))) ;;; Make an effort to get the files in the putative directory PATH. ;;; If PATH isn't a directory, or some filesys error happens (such ;;; as a broken symlink, or a permissions problem), don't error out, ;;; just quietly return the empty list. (define (maybe-directory-files path dotfiles?) (with-errno-handler ((errno data) (else '())) ; On any error, return (). (directory-files path dotfiles?))) ;;; Make an effort to find out if the file is a directory. If there's ;;; any error, return #f. (define (maybe-isdir? path) (with-errno-handler ((errno data) (else #f)) ; On any error, return #f. (file-directory? path))) ;;; This section of code is responsible for processing the braces in glob ;;; patterns. I.e., "{foo,bar}/*.c" -> ("foo/*.c" "bar/*.c") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (append-suffix strs suffix) (map (lambda (s) (string-append s suffix)) strs)) (define (cross-append prefixes suffixes) (apply append (map (lambda (sfx) (append-suffix prefixes sfx)) suffixes))) ;;; Parse a glob pattern into an equivalent series of brace-free patterns. ;;; The pattern starts at START and is terminated by (1) end of string, ;;; (2) an unmatched close brace, or (3) a comma (if COMMA-TERMINATES? is set). ;;; Returns two values: ;;; - the list of patterns ;;; - the string index after the pattern terminates. This points at ;;; the comma or brace if they terminated the scan, since they are ;;; not part of the pattern. (define (parse-glob-braces pattern start comma-terminates?) (let ((pattern-len (string-length pattern)) (finish (lambda (prefixes pat) (append-suffix prefixes (list->string (reverse pat)))))) (let lp ((i start) (prefixes '("")) (pat '())) (if (= i pattern-len) (values (finish prefixes pat) i) (let ((c (string-ref pattern i))) (case c ((#\{) (let ((prefixes (append-suffix prefixes (list->string (reverse pat))))) (receive (pats i) (parse-comma-sequence pattern (+ i 1)) (lp i (cross-append prefixes pats) '())))) ((#\\) (let ((i (+ i 1))) (if (= i pattern-len) (error "Dangling escape char in glob pattern" pattern) (lp (+ i 1) prefixes (cons (string-ref pattern i) pat))))) ((#\,) (if comma-terminates? (values (finish prefixes pat) i) (lp (+ i 1) prefixes (cons c pat)))) ((#\}) (values (finish prefixes pat) i)) (else (lp (+ i 1) prefixes (cons c pat))))))))) ;;; Parse the internals of a {foo,bar,baz} brace list from a glob pattern. ;;; START is the index of the char following the open brace. ;;; Returns two values: ;;; - an equivalent list of brace-free glob patterns ;;; - the index of the char after the terminating brace (define (parse-comma-sequence pattern start) (let ((pattern-len (string-length pattern))) (let lp ((i start) (patterns '())) ; The list of comma-separated patterns read. (if (= i pattern-len) (error "Glob brace-expression pattern not terminated" pattern) (receive (pats i) (parse-glob-braces pattern i #t) (let ((patterns (append patterns pats))) (if (= i pattern-len) (error "Unterminated brace in glob pattern" pattern) (let ((c (string-ref pattern i))) (case c ((#\}) (values patterns (+ i 1))) ((#\,) (lp (+ i 1) patterns)) (else (error "glob parser internal error" pattern i))))))))))) (define (glob-remove-braces pattern) (receive (pats i) (parse-glob-braces pattern 0 #f) (if (= i (string-length pattern)) pats (error "Unmatched close brace in glob pattern" pattern i)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Convert a string into a glob pattern that matches that string exactly -- ;;; in other words, quote the \ * ? [] and {} chars with backslashes. (define (glob-quote string) (let lp ((i (- (string-length string) 1)) (result '())) (if (< i 0) (list->string result) (lp (- i 1) (let* ((c (string-ref string i)) (result (cons c result))) (if (memv c '(#\[ #\] #\* #\? #\{ #\} #\\)) (cons #\\ result) result))))))