Improve glob matching

1. The * and ** operators will now match a dot if not at the beginning of a path component. E.g. ‘main*’ will match ‘main.cc’.
2. When negating a glob, the * and ** operators will include dot files. E.g. ‘!cache/**’ will match (reject) ‘cache/.DS_Store’.
3. The ** operator no longer needs a trailing slash. E.g. ‘src/**.cc’ will match ‘src/main.cc’ and ‘src/sys/util.cc’.
This commit is contained in:
Allan Odgaard
2012-09-11 17:16:02 +02:00
parent d3c455d9e5
commit e8837fcf9f
3 changed files with 50 additions and 20 deletions

View File

@@ -8,20 +8,9 @@
OAK_DEBUG_VAR(Glob);
OAK_DEBUG_VAR(Glob_Parser);
/*
We do a simple transformation of the glob into a regexp using these rules:
\\. → $0
\*\* /? → ([^/.].*(?=/|$) /? )?
\* → ([^/.][^/]*)?
\? → .
\[.*?\] → $0
[\\|[().?*+{^$] → \\$0
*/
namespace path
{
void glob_t::setup (std::string const& glob)
void glob_t::setup (std::string const& glob, bool matchDotFiles)
{
static regexp::pattern_t const glob_matcher = "(?:"
"(\\\\.)" "|"
@@ -34,8 +23,17 @@ namespace path
static std::string const glob_formater = ""
"${1}"
"${2:+([^/.].*(?=/|$)$3)?}"
"${4:+([^/.][^/]*)?}"
"${2:+(((?!\\.)|(?<!^|/))[^/]*(/(?!\\.)[^/]*)*$3)?}"
"${4:+((?!\\.)|(?<!^|/))[^/]*}"
"${5:+.}"
"${6}"
"${7:+\\\\$7}"
;
static std::string const glob_formater_match_dot_files = ""
"${1}"
"${2:+(.*$3)?}"
"${4:+[^/]*}"
"${5:+.}"
"${6}"
"${7:+\\\\$7}"
@@ -45,7 +43,7 @@ namespace path
std::vector<std::string> expanded;
citerate(str, expand_braces(_negate ? glob.substr(1) : glob))
expanded.push_back(format_string::replace(*str, glob_matcher, glob_formater));
expanded.push_back(format_string::replace(*str, glob_matcher, (matchDotFiles || _negate) ? glob_formater_match_dot_files : glob_formater));
std::string ptrn = "^(.*/)?(" + text::join(expanded, "|") + ")$";
_compiled = regexp::pattern_t(ptrn);

View File

@@ -7,14 +7,14 @@ namespace path
{
struct PUBLIC glob_t
{
glob_t (const char* glob) { setup(glob); }
glob_t (std::string const& glob) { setup(glob); }
glob_t (const char* glob, bool matchDotFiles = false) { setup(glob, matchDotFiles); }
glob_t (std::string const& glob, bool matchDotFiles = false) { setup(glob, matchDotFiles); }
bool does_match (std::string const& filename) const;
private:
friend std::string to_s (glob_t const& glob);
void setup (std::string const& glob);
void setup (std::string const& glob, bool matchDotFiles);
bool _negate;
regexp::pattern_t _compiled;
};

View File

@@ -59,6 +59,32 @@ public:
TS_ASSERT(!path::glob_t("*" ).does_match(".htaccess"));
TS_ASSERT( path::glob_t("{,.}*").does_match("test"));
TS_ASSERT( path::glob_t("{,.}*").does_match(".htaccess"));
TS_ASSERT( path::glob_t("*" ).does_match("foo.txt"));
TS_ASSERT( path::glob_t("foo*" ).does_match("foo.txt"));
TS_ASSERT( path::glob_t("foo/*" ).does_match("foo/bar.txt"));
TS_ASSERT( path::glob_t("foo/bar*" ).does_match("foo/bar.txt"));
TS_ASSERT(!path::glob_t("*" ).does_match(".txt"));
TS_ASSERT(!path::glob_t("foo/*" ).does_match("foo/.txt"));
TS_ASSERT(!path::glob_t("foo/bar/*").does_match("foo/bar/.txt"));
TS_ASSERT( path::glob_t("cache/*" ).does_match("cache/test.cc"));
TS_ASSERT( path::glob_t("cache/**" ).does_match("cache/test.cc"));
TS_ASSERT( path::glob_t("cache/**" ).does_match("cache/foo/test.cc"));
TS_ASSERT( path::glob_t("cache/**/*").does_match("cache/foo/test.cc"));
TS_ASSERT(!path::glob_t("cache/*" ).does_match("cache/.htaccess"));
TS_ASSERT(!path::glob_t("cache/**" ).does_match("cache/.htaccess"));
TS_ASSERT(!path::glob_t("cache/**" ).does_match("cache/foo/.htaccess"));
TS_ASSERT(!path::glob_t("cache/**/*").does_match("cache/foo/.htaccess"));
TS_ASSERT(!path::glob_t("!cache/*" ).does_match("cache/test.cc"));
TS_ASSERT(!path::glob_t("!cache/**" ).does_match("cache/test.cc"));
TS_ASSERT(!path::glob_t("!cache/**" ).does_match("cache/foo/test.cc"));
TS_ASSERT(!path::glob_t("!cache/**/*").does_match("cache/foo/test.cc"));
TS_ASSERT(!path::glob_t("!cache/*" ).does_match("cache/.htaccess"));
TS_ASSERT(!path::glob_t("!cache/**" ).does_match("cache/.htaccess"));
TS_ASSERT(!path::glob_t("!cache/**" ).does_match("cache/foo/.htaccess"));
TS_ASSERT(!path::glob_t("!cache/**/*").does_match("cache/foo/.htaccess"));
}
void test_glob_anchoring ()
@@ -92,8 +118,14 @@ public:
TS_ASSERT( path::glob_t("foo/**" ).does_match("foo/bar/fud.txt"));
TS_ASSERT( path::glob_t("foo/**/*.txt").does_match("foo/fud.txt"));
TS_ASSERT( path::glob_t("foo/**/*.txt").does_match("foo/bar/fud.txt"));
TS_ASSERT(!path::glob_t("**.txt" ).does_match("foo/bar/fud.txt"));
TS_ASSERT(!path::glob_t("f**.txt" ).does_match("foo/bar/fud.txt"));
TS_ASSERT( path::glob_t("**.txt" ).does_match("fud.txt"));
TS_ASSERT( path::glob_t("**.txt" ).does_match("foo/bar/fud.txt"));
TS_ASSERT( path::glob_t("**/*.txt" ).does_match("fud.txt"));
TS_ASSERT( path::glob_t("f**.txt" ).does_match("foo/bar.txt"));
TS_ASSERT(!path::glob_t("f*.txt" ).does_match("foo/bar.txt"));
TS_ASSERT( path::glob_t("f**bar.txt" ).does_match("fbar.txt"));
TS_ASSERT( path::glob_t("f**bar.txt" ).does_match("foo/bar.txt"));
TS_ASSERT(!path::glob_t("f**bar.txt" ).does_match("foo/.bar.txt"));
}
void test_brace_expansion ()