From e914b398af11f909e88a8bc85eeebb0768aacd54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Fri, 16 Mar 2018 23:35:07 +0100 Subject: [PATCH] glob: Support square brackets in patterns. * guix/glob.scm (wildcard-indices): Remove. (parse-bracket): New procedure. (compile-glob-pattern): Rewrite. Support square brackets for sets and ranges. (glob-match?): Support sets and ranges. * tests/glob.scm (test-compile-glob-pattern) (test-glob-match): New macros. Use them to rewrite the existing tests, and add new tests. --- guix/glob.scm | 95 +++++++++++++++++++++++++++++++------------------- tests/glob.scm | 67 ++++++++++++++++++++--------------- 2 files changed, 99 insertions(+), 63 deletions(-) diff --git a/guix/glob.scm b/guix/glob.scm index 4fc5173ac0..29c335ca1d 100644 --- a/guix/glob.scm +++ b/guix/glob.scm @@ -25,20 +25,17 @@ ;;; ;;; This is a minimal implementation of "glob patterns" (info "(libc) ;;; Globbbing"). It is currently limited to simple patterns and does not -;;; support braces and square brackets, for instance. +;;; support braces, for instance. ;;; ;;; Code: -(define (wildcard-indices str) - "Return the list of indices in STR where wildcards can be found." - (let loop ((index 0) - (result '())) - (if (= index (string-length str)) - (reverse result) - (loop (+ 1 index) - (case (string-ref str index) - ((#\? #\*) (cons index result)) - (else result)))))) +(define (parse-bracket chars) + "Parse CHARS, a list of characters that extracted from a '[...]' sequence." + (match chars + ((start #\- end) + `(range ,start ,end)) + (lst + `(set ,@lst)))) (define (compile-glob-pattern str) "Return an sexp that represents the compiled form of STR, a glob pattern @@ -48,29 +45,43 @@ such as \"foo*\" or \"foo??bar\"." (((? string? str)) str) (x x))) - (let loop ((index 0) - (indices (wildcard-indices str)) + (define (cons-string chars lst) + (match chars + (() lst) + (_ (cons (list->string (reverse chars)) lst)))) + + (let loop ((chars (string->list str)) + (pending '()) + (brackets 0) (result '())) - (match indices + (match chars (() - (flatten (cond ((zero? index) - (list str)) - ((= index (string-length str)) - (reverse result)) - (else - (reverse (cons (string-drop str index) - result)))))) - ((wildcard-index . rest) - (let ((wildcard (match (string-ref str wildcard-index) + (flatten (reverse (if (null? pending) + result + (cons-string pending result))))) + (((and chr (or #\? #\*)) . rest) + (let ((wildcard (match chr (#\? '?) (#\* '*)))) - (match (substring str index wildcard-index) - ("" (loop (+ 1 wildcard-index) - rest - (cons wildcard result))) - (str (loop (+ 1 wildcard-index) - rest - (cons* wildcard str result))))))))) + (if (zero? brackets) + (loop rest '() 0 + (cons* wildcard (cons-string pending result))) + (loop rest (cons chr pending) brackets result)))) + ((#\[ . rest) + (if (zero? brackets) + (loop rest '() (+ 1 brackets) + (cons-string pending result)) + (loop rest (cons #\[ pending) (+ 1 brackets) result))) + ((#\] . rest) + (cond ((zero? brackets) + (error "unexpected closing bracket" str)) + ((= 1 brackets) + (loop rest '() 0 + (cons (parse-bracket (reverse pending)) result))) + (else + (loop rest (cons #\] pending) (- brackets 1) result)))) + ((chr . rest) + (loop rest (cons chr pending) brackets result))))) (define (glob-match? pattern str) "Return true if STR matches PATTERN, a compiled glob pattern as returned by @@ -78,11 +89,12 @@ such as \"foo*\" or \"foo??bar\"." (let loop ((pattern pattern) (str str)) (match pattern - ((? string? literal) (string=? literal str)) - (((? string? one)) (string=? one str)) - (('*) #t) - (('?) (= 1 (string-length str))) - (() #t) + ((? string? literal) + (string=? literal str)) + (() + (string-null? str)) + (('*) + #t) (('* suffix . rest) (match (string-contains str suffix) (#f #f) @@ -92,6 +104,19 @@ such as \"foo*\" or \"foo??bar\"." (('? . rest) (and (>= (string-length str) 1) (loop rest (string-drop str 1)))) + ((('range start end) . rest) + (and (>= (string-length str) 1) + (let ((chr (string-ref str 0))) + (and (char-set-contains? (ucs-range->char-set + (char->integer start) + (+ 1 (char->integer end))) + chr) + (loop rest (string-drop str 1)))))) + ((('set . chars) . rest) + (and (>= (string-length str) 1) + (let ((chr (string-ref str 0))) + (and (char-set-contains? (list->char-set chars) chr) + (loop rest (string-drop str 1)))))) ((prefix . rest) (and (string-prefix? prefix str) (loop rest (string-drop str (string-length prefix)))))))) diff --git a/tests/glob.scm b/tests/glob.scm index 033eeb10fe..71e2d3fce0 100644 --- a/tests/glob.scm +++ b/tests/glob.scm @@ -23,36 +23,47 @@ (test-begin "glob") -(test-equal "compile-glob-pattern, no wildcards" - "foo" - (compile-glob-pattern "foo")) +(define-syntax test-compile-glob-pattern + (syntax-rules (=>) + ((_ pattern => result rest ...) + (begin + (test-equal (format #f "compile-glob-pattern, ~s" pattern) + result + (compile-glob-pattern pattern)) + (test-compile-glob-pattern rest ...))) + ((_) + #t))) -(test-equal "compile-glob-pattern, Kleene star" - '("foo" * "bar") - (compile-glob-pattern "foo*bar")) +(define-syntax test-glob-match + (syntax-rules (matches and not) + ((_ (pattern-string matches strings ... (and not others ...)) rest ...) + (begin + (test-assert (format #f "glob-match? ~s" pattern-string) + (let ((pattern (compile-glob-pattern pattern-string))) + (and (glob-match? pattern strings) ... + (not (glob-match? pattern others)) ...))) + (test-glob-match rest ...))) + ((_) + #t))) -(test-equal "compile-glob-pattern, question mark" - '(? "foo" *) - (compile-glob-pattern "?foo*")) +(test-compile-glob-pattern + "foo" => "foo" + "?foo*" => '(? "foo" *) + "foo[1-5]" => '("foo" (range #\1 #\5)) + "foo[abc]bar" => '("foo" (set #\a #\b #\c) "bar") + "foo[a[b]c]bar" => '("foo" (set #\a #\[ #\b #\] #\c) "bar") + "[123]x" => '((set #\1 #\2 #\3) "x") + "[a-z]" => '((range #\a #\z))) -(test-assert "literal match" - (let ((pattern (compile-glob-pattern "foo"))) - (and (glob-match? pattern "foo") - (not (glob-match? pattern "foobar")) - (not (glob-match? pattern "barfoo"))))) - -(test-assert "trailing star" - (let ((pattern (compile-glob-pattern "foo*"))) - (and (glob-match? pattern "foo") - (glob-match? pattern "foobar") - (not (glob-match? pattern "xfoo"))))) - -(test-assert "question marks" - (let ((pattern (compile-glob-pattern "foo??bar"))) - (and (glob-match? pattern "fooxxbar") - (glob-match? pattern "fooZZbar") - (not (glob-match? pattern "foobar")) - (not (glob-match? pattern "fooxxxbar")) - (not (glob-match? pattern "fooxxbarzz"))))) +(test-glob-match + ("foo" matches "foo" (and not "foobar" "barfoo")) + ("foo*" matches "foo" "foobar" (and not "xfoo")) + ("foo??bar" matches "fooxxbar" "fooZZbar" + (and not "foobar" "fooxxxbar" "fooxxbarzz")) + ("foo?" matches "foox" (and not "fooxx")) + ("ab[0-9]c" matches "ab0c" "ab7c" "ab9c" + (and not "ab-c" "ab00c" "ab3")) + ("ab[cdefg]" matches "abc" "abd" "abg" + (and not "abh" "abcd" "ab["))) (test-end "glob")