scsh-0.6/scsh/test/pattern-matching-test.scm

598 lines
19 KiB
Scheme

;;; Test for the function in section 6 of the scsh-manual "Pattern-matching strings with regular expressions"
;;; Author: Christoph Hetz
;; for testing: (certainly the path will be an other on other systems...)
;; ,open define-record-types handle
;; ,config ,load C:/cygwin/home/mephisto/cvs-scsh/scsh/scsh/test/test-packages.scm
;; ,load C:/cygwin/home/mephisto/cvs-scsh/scsh/scsh/test/test-base.scm
;; load this file
;; (test-all)
;; *** basic help-functions ***
(define eq-match?
(lambda (m1 m2)
(if (equal? m1 #f)
(not m2)
(and (= (match:start m1)
(match:start m2))
(= (match:end m1)
(match:end m2))
(equal? (match:substring m1)
(match:substring m2))))))
(define matches-same-signs?
(lambda (re1 re2)
(let loop ((i 0))
(if (= 256 i)
#t
(let ((str (list->string (list (ascii->char i)))))
(if (eq-match? (string-match re1 str)
(string-match re2 str))
(loop (+ i 1))
#f))))))
;; *** help-strings ***
(define all-signs-string
(let loop ((i 0))
(if (= i 256)
""
(string-append (list->string (list (ascii->char i)))
(loop (+ i 1))))))
(define test-string "Dieser Test-String wurde am 29.07.2004 um 5:23PM erstellt.\na aa aaa aaaa\nab aabb aaabbb\naba abba abbba\n1 12 123 1234\nyyyyyyyyyy\n")
;; *** tests ***
(add-test! 'no-match-test 'pattern-matching
(lambda ()
(not (string-match (rx "xxx") test-string))))
(add-test! 'various-forms-of-non-vowels 'pattern-matching
(lambda ()
(and (eq-match? (string-match (rx (- alpha ("aeiouAEIOU"))) test-string)
(string-match (rx (- (/"azAZ") ("aeiouAEIOU"))) test-string))
(eq-match? (string-match (rx (- (/"azAZ") ("aeiouAEIOU"))) test-string)
(string-match (rx (- alpha ("aeiou") ("AEIOU"))) test-string))
(eq-match? (string-match (rx (- alpha ("aeiou") ("AEIOU"))) test-string)
(string-match (rx (w/nocase (- alpha ("aeiou")))) test-string))
(eq-match? (string-match (rx (w/nocase (- alpha ("aeiou")))) test-string)
(string-match (rx (w/nocase (- (/ "az") ("aeiou")))) test-string)))))
(add-test! '|-test 'pattern-matching
(lambda ()
(eq-match? (string-match (rx (| upper ("aeiou") digit)) "xxx A yyy")
(string-match (rx (| (/ "AZ09") ("aeiou"))) "xxx A yyy"))
(eq-match? (string-match (rx (| upper ("aeiou") digit)) "xxx a yyy")
(string-match (rx (| (/ "AZ09") ("aeiou"))) "xxx a yyy"))
(eq-match? (string-match (rx (| upper ("aeiou") digit)) "xxx 6 yyy")
(string-match (rx (| (/ "AZ09") ("aeiou"))) "xxx 6 yyy"))))
(add-test! 'comma-seperated-list-of-REs 'pattern-matching
(lambda ()
(let ((csl (lambda (re)
(rx (| ""
(: ,re
(* ", " ,re)))))))
(string-match (csl (rx (| "John" "Paul" "George" "Ringo")))
"George, Ringo, Paul, John"))))
(add-test! 'repetition-test 'pattern-matching
(lambda ()
(and (equal? "caaadadr"
(match:substring (string-match (rx (: "c" (+ (| "a" "d")) "r"))
"(caaadadr ...")))
(equal? "caaadadr"
(match:substring (string-match (rx (: "c" (+ ("ad")) "r"))
"(caaadadr ...")))
(equal? "caaadadr"
(match:substring (string-match (rx (: "c" (** 1 6 ("ad")) "r"))
"(caaadadr ...")))
(not (string-match (rx (: "c" (** 1 4 ("ad")) "r"))
"(caaadadr ...")))))
(add-test! 'special-cases-test 'pattern-matching
(lambda ()
(and (matches-same-signs? (rx any) (rx (~)))
(not (string-match (rx (|)) all-signs-string)))))
;XXX something is wrong with this
;(add-test! 're-vs-@re-submatch-test 'pattern-matching
; (lambda ()
; (let* ((f (lambda ()
; (rx (submatch "sub-f1")
; (submatch "sub-f2"))))
; (re (rx (submatch (* "foo"))
; (submatch (? "bar"))
; ,(f)
; (submatch "baz")))
; (match1 (string-match ,re "foofoobarsub-f1sub-f2baz"))
; (match2 (string-match ,@re "foofoobarsub-f1sub-f2baz")))
; (and (...)))))
(add-test! 'posix-string-test 'pattern-matching
(lambda ()
(and (string-match (rx (posix-string "[aeiou]+|x*|y{3,5}"))
"a")
(string-match (rx (posix-string "[aeiou]+|x*|y{3,5}"))
"x")
(string-match (rx (posix-string "[aeiou]+|x*|y{3,5}"))
"")
(string-match (rx (posix-string "[aeiou]+|x*|y{3,5}"))
"yyyy"))))
(add-test! 'dsm-test 'pattern-matching
(lambda ()
(and (equal? "hello"
(match:substring (string-match (rx (dsm 1 0 (submatch "hello")))
"bla hello bla")
2))
(not (match:substring (string-match (rx (dsm 1 0 (submatch "hello")))
"bla hello bla")
1))
(equal? "hello"
(match:substring (string-match (rx (dsm 2 0 (submatch "hello")))
"bla hello bla")
3))
(not (match:substring (string-match (rx (dsm 2 0 (submatch "hello")))
"bla hello bla")
1))
(not (match:substring (string-match (rx (dsm 2 0 (submatch "hello")))
"bla hello bla")
2)))))
(add-test! 'string-regexp 'pattern-matching
(lambda ()
(and (equal? "erstellt."
(match:substring (string-match (rx "erstellt.") test-string)))
(not (string-match (rx "Erstellt.") test-string)))))
(add-test! 'character-set 'pattern-matching
(lambda ()
(eq-match? (string-match (rx ("abcde")) test-string)
(string-match (rx ("edcba")) test-string))))
(add-test! 'any-test 'pattern-matching ;; fails only because of the case i = 0
(lambda ()
(let loop ((i 0))
(if (= 256 i)
#t
(if (string-match (rx any) (list->string (list (ascii->char i))))
(loop (+ i 1))
#f)))))
(add-test! 'sequences-test 'pattern-matching
(lambda ()
(equal? "1234"
(match:substring (string-match (rx (: "1" any any "4")) test-string)))))
(add-test! 'choices 'pattern-matching
(lambda ()
(let ((m1 (string-match (rx (| "erstellt." "xxx")) test-string))
(m2 (string-match (rx (| "xxx" "erstellt.")) test-string)))
(and m1
m2
(eq-match? m1 m2)))))
(add-test! '*-test 'pattern-matching
(lambda ()
(and (equal? ""
(match:substring (string-match (rx (* "y")) test-string)))
(equal? "D"
(match:substring (string-match (rx (* "D")) test-string))))))
(add-test! '+-test 'pattern-matching
(lambda ()
(and (equal? "yyyyyyyyyy"
(match:substring (string-match (rx (+ "y")) test-string)))
(equal? "D"
(match:substring (string-match (rx (+ "D")) test-string))))))
(add-test! '?-test 'pattern-matching
(lambda ()
(and (equal? ""
(match:substring (string-match (rx (? "y")) test-string)))
(equal? "D"
(match:substring (string-match (rx (? "D")) test-string))))))
(add-test! '=-from-test 'pattern-matching
(lambda ()
(and (equal? "yyyyy"
(match:substring (string-match (rx (= 5 "y")) test-string)))
(not (string-match (rx (= 11 "y")) test-string)))))
(add-test! '>=-from-test 'pattern-matching
(lambda ()
(and (equal? "yyyyyyyyyy"
(match:substring (string-match (rx (>= 5 "y")) test-string)))
(equal? "yyyyyyyyyy"
(match:substring (string-match (rx (>= 10 "y")) test-string)))
(not (string-match (rx (>= 11 "y")) test-string)))))
(add-test! '**from-to-test 'pattern-matching
(lambda ()
(and (equal? "yyyyyyyyyy"
(match:substring (string-match (rx (** 1 30 "y")) test-string)))
(equal? "yyyyy"
(match:substring (string-match (rx (** 1 5 "y")) test-string)))
(not (string-match (rx (** 11 12 "y")) test-string))
(not (string-match (rx (** 12 11 any)) test-string))
(equal? ""
(match:substring (string-match (rx (** 0 0 any)) test-string))))))
(add-test! 'single-characters-test 'pattern-matching
(lambda ()
(and (eq-match? (string-match (rx ("abcd")) test-string)
(string-match (rx (| #\a #\b #\c #\d)) test-string))
(eq-match? (string-match (rx ("xy")) test-string)
(string-match (rx (| #\x #\y)) test-string)))))
(add-test! 'range-test 'pattern-matching
(lambda ()
(and (equal? "D"
(match:substring (string-match (rx (/ #\A #\Z #\a #\z #\0 #\9)) test-string)))
(equal? "D"
(match:substring (string-match (rx (/ #\A "Zaz0" #\9)) test-string)))
(equal? "i"
(match:substring (string-match (rx (/ #\a #\z #\0 #\9)) test-string)))
(equal? "i"
(match:substring (string-match (rx (/ #\a "z0" #\9)) test-string)))
(equal? "2"
(match:substring (string-match (rx (/ #\0 #\9)) test-string)))
(equal? "2"
(match:substring (string-match (rx (/ "0" #\9)) test-string))))))
(add-test! 'character-classes-test 'pattern-matching
(lambda ()
(and (eq-match? (string-match (rx lower-case) test-string)
(string-match (rx (- alphabetic upper-case)) test-string))
(eq-match? (string-match (rx upper-case) test-string)
(string-match (rx (- alphabetic lower-case)) test-string))
(equal? "2"
(match:substring (string-match (rx numeric) test-string)))
(equal? "-"
(match:substring (string-match (rx punctuation) test-string)))
(equal? " "
(match:substring (string-match (rx blank) test-string)))
(equal? " "
(match:substring (string-match (rx whitespace) test-string)))
(equal? "\n"
(match:substring (string-match (rx control) test-string)))
(equal? "D"
(match:substring (string-match (rx hex-digit) test-string)))
(equal? "D"
(match:substring (string-match (rx ascii) test-string))))))
(add-test! 'uncsae-w/case-w/nocase-test 'pattern-matching
(lambda ()
(and (equal? "foo"
(match:substring (string-match (rx (uncase "foo")) "bla foo bla")))
(equal? "FOO"
(match:substring (string-match (rx (uncase "foo")) "bla FOO bla")))
(equal? "FOo"
(match:substring (string-match (rx (uncase "foo")) "bla FOo bla")))
(equal? "fOo"
(match:substring (string-match (rx (uncase "foo")) "bla fOo bla")))
(equal? "FoO"
(match:substring (string-match (rx (uncase "foo")) "bla FoO bla")))
(equal? "a"
(match:substring (string-match (rx (uncase (~ "a"))) "a")))
(equal? "A"
(match:substring (string-match (rx (uncase (~ "a"))) "A")))
(not (string-match (rx (w/nocase (~ "a"))) "aA"))
(string-match (rx (w/nocase "abc"
(* "FOO" (w/case "Bar"))
("aeiou")))
"kabcfooBariou")
(not (string-match (rx (w/nocase "abc"
(* "FOO" (w/case "Bar"))
("aeiou")))
"kabcfooBARiou")))))
(add-test! 'dynamic-re-test-1 'pattern-matching
(lambda ()
(let ((str "I am feeding the goose, you are feeding the geese.")
(me 1)
(you 2))
(and (equal? "feeding the goose"
(match:substring (string-match (rx (: "feeding the "
,(if (> me 1)
"geese"
"goose")))
str)))
(equal? "feeding the geese"
(match:substring (string-match (rx (: "feeding the "
,(if (> you 1)
"geese"
"goose")))
str)))))))
(add-test! 'dynamic-re-test-2 'pattern-matching
(lambda ()
(let* ((ws (rx (+ whitespace)))
(date (rx (: (| "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul")
,ws
(| ("123456789")
(: ("12") digit)
"30"
"31")))))
(and (equal? "on Mar 14"
(match:substring (string-match (rx (: "on " ,date))
"it was on Mar 14 ...")))
(equal? "on May 31"
(match:substring (string-match (rx (: "on " ,date))
"it was on May 31 ...")))))))
(add-test! 'regexp?-test 'pattern-matching
(lambda ()
(and (not (regexp? "abc"))
(regexp? (rx "abc")))))
(add-test! 'regexp-search-test 'pattern-matching
(lambda ()
(and (equal? "abc"
(match:substring (regexp-search (rx "abc") "abcdefg")))
(not (regexp-search (rx "abc") "abcdefg" 3))
(not (regexp-search (rx "cba") "abcdefg")))))
(add-test! 'regexp-search?-test 'pattern-matching
(lambda ()
(and (regexp-search? (rx "abc") "abcdefg")
(not (regexp-search? (rx "abc") "abcdefg" 3))
(not (regexp-search? (rx "cba") "abcdefg")))))
(add-test! 'regexp-substitute/global-test-1 'pattern-matching
(lambda ()
(equal? "dry Jin"
(regexp-substitute/global #f (rx "Cotton") "dry Cotton"
'pre "Jin" 'post))))
(add-test! 'regexp-substitute/global-test-2 'pattern-matching
(lambda ()
(equal? "01/03/79"
(regexp-substitute/global #f (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"03/01/79"
'pre 2 "/" 1 "/" 3 'post))))
(add-test! 'regexp-substitute/global-test-3 'pattern-matching
(lambda ()
(equal? "Sep 29, 1961"
(regexp-substitute/global #f (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"9/29/61"
'pre
(lambda (m)
(let ((mon (vector-ref '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
(- (string->number (match:substring m 1))
1)))
(day (match:substring m 2))
(year (match:substring m 3)))
(string-append mon " " day ", 19" year)))
'post))))
(add-test! 'regexp-substitute/global-test-4 'pattern-matching
(lambda ()
(let ((kill-matches (lambda (re s)
(regexp-substitute/global #f re s 'pre 'post))))
(equal? " will disappear, also and "
(kill-matches (rx (| "Windows" "tcl" "Intel"))
"Windows will disappear, also tcl and Intel")))))
(add-test! 'regexp-fold-right-test 'pattern-matching
(lambda ()
(equal? (list "1" "1" "2" "2")
(regexp-fold-right (rx digit)
(lambda (m i lis)
(cons (match:substring m 0) lis))
'() "a1 b1 a2 b2 ..."))))
(add-test! 'let-match-test 'pattern-matching
(lambda ()
(equal? "3/1/79\nmonth: 3\nday: 1\nyear: 79"
(let-match (regexp-search (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"here comes a date: 3/1/79")
(whole-date month day year)
(string-append whole-date "\nmonth: " month "\nday: " day "\nyear: " year)))))
(add-test! 'if-match-test 'pattern-matching
(lambda ()
(and (if-match (regexp-search (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"here comes a date: 3/1/79")
(whole-date month day year)
(and (equal? month "3")
(equal? day "1")
(equal? year "79"))
#f)
(if-match (regexp-search (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"here comes a date: 3|1|79")
(whole-date month day year)
(and (equal? month "3")
(equal? day "1")
(equal? year "79"))
#t))))
(add-test! 'match-cond-test 'pattern-matching
(lambda ()
(let ((m "")
(d "")
(y ""))
(and (match-cond ((regexp-search (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"here comes a date: 3/1/79")
(whole-date month day year)
(begin (set! m month)
(set! d day)
(set! y year)))
(test (equal? m "3")
#t)
(else #f))
(match-cond ((regexp-search (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"here comes a date: 4/1/79")
(whole-date month day year)
(begin (set! m month)
(set! d day)
(set! y year)))
(test (equal? m "3")
#t)
(test (if (equal? m "4")
m
d) =>
(lambda (month)
(equal? month "4")))
(else #f))
(match-cond ((regexp-search (rx (submatch (+ digit)) "/"
(submatch (+ digit)) "/"
(submatch (+ digit)))
"here comes a date: 5/1/79")
(whole-date month day year)
(begin (set! m month)
(set! d day)
(set! y year)))
(test (equal? m "3")
#t)
(test (if (equal? m "4")
m
d) =>
(lambda (month)
(equal? month "4")))
(else #t))))))
(add-test! 'flush-submatches-test 'pattern-matching
(lambda ()
(let ((re (rx (submatch "foo")
(submatch "bar"))))
(and (= 2
(re-seq:tsm re))
(= 0
(re-seq:tsm (flush-submatches re)))
(equal? "foobar"
(match:substring (string-match (flush-submatches re)
"foobar")))))))
(add-test! 'uncase-test 'pattern-matching
(lambda ()
(equal? "FoO"
(match:substring (string-match (uncase (rx "foo"))
"FoO")))))
(add-test! 'simplify-regexp-test 'pattern-matching
(lambda ()
(and (re-dsm? (rx (: (** 0 0 (submatch "apple"))
(submatch "bar"))))
(= 2
(re-dsm:tsm (rx (: (** 0 0 (submatch "apple"))
(submatch "bar"))))))))
;; XXX perhaps only a mistake in the manual - it says:
;; uncase-char-set was of the type: cset -> re
;; in fact it is of the type: cset -> cset
(add-test! 'uncase-char-set-test 'patern-matching
(lambda ()
(equal? "B"
(match:substring (string-match (uncase-char-set (list->char-set (list #\a #\b #\c)))
"dDBb")))))
(add-test! 'uncase-string-test 'pattern-matching
(lambda ()
(equal? "FoO"
(match:substring (string-match (uncase-string "foo")
"FoO")))))
(add-test! 'sre->regexp-test 'pattern-matching
(lambda ()
(regexp? (sre->regexp '(: "Christoph " (? "F. ") "Hetz")))))
;; XXX
;;Warning: wrong number of arguments
;; (re-seq (re-string "Pete ") (re-repeat 1 #f (re-string "Sz")) (re-string "ilagyi"))
;; (procedure wants: (:value))
;; (arguments are: (:value :value :value))
;;
;;
;(add-test! 'regexp->sre-test 'pattern-matching
; (lambda ()
; (equal? '(? "Pete" (+ "Sz") "ilagyi")
; (regexp->sre (re-repeat 0 1 (re-seq (re-string "Pete ")
; (re-repeat 1 #f (re-string "Sz"))
; (re-string "ilagyi")))))))
(add-test! 'char-classes+algebra-test 'pattern-matching
(lambda ()
(and (matches-same-signs? (rx (| lower-case upper-case))
(rx alphabetic))
(matches-same-signs? (rx (- alphabetic lower-case))
(rx upper-case))
(matches-same-signs? (rx (- alphabetic upper-case))
(rx lower-case))
(matches-same-signs? (rx (& upper-case alphanumeric))
(rx upper-case))
(matches-same-signs? (rx (| upper-case lower-case numeric))
(rx alphanumeric))
(matches-same-signs? (rx (~ (& alphanumeric lower-case)
graphic
(| upper-case numeric)))
(rx (- any
alphanumeric
graphic)))
(matches-same-signs? (rx (/ "azAZ09"))
(rx alphanumeric))
(matches-same-signs? (rx (~ (| (/ "az") (/ "AZ") (/ "09"))))
(rx (- any alphanumeric)))
(matches-same-signs? (rx (& (/ "az09")
(/ "AZ09")))
(rx numeric)))))
(add-test! 'different-ways-test 'pattern-matching
(lambda ()
(and (eq-match? (string-match (rx "abcde") "xxxabcdexxx")
(string-match (rx (: "a" "b" "c" "d" "e")) "xxxabcdexxx"))
(eq-match? (string-match (rx "abcde") "xxxabcdexxx")
(string-match (rx (: "a" (: "b" (: "c" (: "d" "e"))))) "xxxabcdexxx"))
(eq-match? (string-match (rx "abcde") "xxxabcdexxx")
(string-match (rx (: "ab" "c" (: "d" "e"))) "xxxabcdexxx"))
(eq-match? (string-match (rx "abcde") "xxxabcdexxx")
(string-match (rx (: "a" "b" "cde")) "xxxabcdexxx"))
(eq-match? (string-match (rx "abcde") "xxxabcdexxx")
(string-match (rx (: (: (: "a" "b") "c") (: "d" "e"))) "xxxabcdexxx"))
(eq-match? (string-match (rx "xxx" (* alphabetic) "xxx") "xxxabcdexxx")
(string-match (rx (+ "x") "abcde" (+ "x")) "xxxabcdexxx"))
(eq-match? (string-match (rx (: "x" (+ "x") (* "x"))
(: (? alphanumeric)
(? alphanumeric)
(? alphanumeric)
(? alphanumeric)
(? alphanumeric)
(? alphanumeric)
(? alphanumeric)
(? alphanumeric))
"xxx")
"xxxabcdexxx")
(string-match (rx (: "xxx" (* (/ "ae")) (+ "x"))) "xxxabcdexxx"))
(eq-match? (string-match (rx "xxxabcdexxx") "xxxabcdexxx")
(string-match (rx (* alphanumeric)) "xxxabcdexxx"))
(eq-match? (string-match (rx (: "xxx" (: "abcde" "x" "xx"))) "xxxabcdexxx")
(string-match (rx (* (| (/ "ae") "x"))) "xxxabcdexxx")))))