From 4051bd900abbb47557dd8928532eedbc2bca0563 Mon Sep 17 00:00:00 2001 From: Benjamin Herr Date: Mon, 7 Apr 2014 14:47:04 +0200 Subject: [PATCH 1/2] libglob: allow "." and ".." to be matched ... also don't read the whole directory if the glob for that path component doesn't contain any metacharacters. Patterns like `../*.jpg` will work now, and `.*` will match both `.` and `..` to be consistent with shell expansion. As before: Just `*` still won't match `.` and `..`, while it will still match dotfiles like `.git` by default. --- src/libglob/lib.rs | 94 +++++++++++++++++++++++++++++++---- src/test/run-pass/glob-std.rs | 20 +++++--- 2 files changed, 98 insertions(+), 16 deletions(-) diff --git a/src/libglob/lib.rs b/src/libglob/lib.rs index 5065fed01292f..07b7c6604e76d 100644 --- a/src/libglob/lib.rs +++ b/src/libglob/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -51,7 +51,7 @@ pub struct Paths { /// Return an iterator that produces all the Paths that match the given pattern, /// which may be absolute or relative to the current working directory. /// -/// is method uses the default match options and is equivalent to calling +/// This method uses the default match options and is equivalent to calling /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you /// want to use non-default match options. /// @@ -117,9 +117,15 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Paths { let dir_patterns = pattern.slice_from(cmp::min(root_len, pattern.len())) .split_terminator(is_sep) .map(|s| Pattern::new(s)) - .collect(); + .collect::>(); - let todo = list_dir_sorted(&root).move_iter().map(|x|(x,0u)).collect(); + let mut todo = Vec::new(); + if dir_patterns.len() > 0 { + // Shouldn't happen, but we're using -1 as a special index. + assert!(dir_patterns.len() < -1 as uint); + + fill_todo(&mut todo, dir_patterns.as_slice(), 0, &root, options); + } Paths { root: root, @@ -138,6 +144,9 @@ impl Iterator for Paths { } let (path,idx) = self.todo.pop().unwrap(); + // idx -1: was already checked by fill_todo, maybe path was '.' or + // '..' that we can't match here because of normalization. + if idx == -1 as uint { return Some(path); } let ref pattern = *self.dir_patterns.get(idx); if pattern.matches_with(match path.filename_str() { @@ -154,7 +163,8 @@ impl Iterator for Paths { // so we don't need to check the children return Some(path); } else { - self.todo.extend(list_dir_sorted(&path).move_iter().map(|x|(x,idx+1))); + fill_todo(&mut self.todo, self.dir_patterns.as_slice(), + idx + 1, &path, self.options); } } } @@ -162,13 +172,13 @@ impl Iterator for Paths { } -fn list_dir_sorted(path: &Path) -> Vec { +fn list_dir_sorted(path: &Path) -> Option> { match fs::readdir(path) { Ok(mut children) => { children.sort_by(|p1, p2| p2.filename().cmp(&p1.filename())); - children.move_iter().collect() + Some(children.move_iter().collect()) } - Err(..) => Vec::new() + Err(..) => None } } @@ -435,6 +445,72 @@ impl Pattern { } +// Fills `todo` with paths under `path` to be matched by `patterns[idx]`, +// special-casing patterns to match `.` and `..`, and avoiding `readdir()` +// calls when there are no metacharacters in the pattern. +fn fill_todo(todo: &mut Vec<(Path, uint)>, patterns: &[Pattern], idx: uint, path: &Path, + options: MatchOptions) { + // convert a pattern that's just many Char(_) to a string + fn pattern_as_str(pattern: &Pattern) -> Option<~str> { + let mut s = ~""; + for token in pattern.tokens.iter() { + match *token { + Char(c) => s.push_char(c), + _ => return None + } + } + return Some(s); + } + + let add = |todo: &mut Vec<_>, next_path: Path| { + if idx + 1 == patterns.len() { + // We know it's good, so don't make the iterator match this path + // against the pattern again. In particular, it can't match + // . or .. globs since these never show up as path components. + todo.push((next_path, -1 as uint)); + } else { + fill_todo(todo, patterns, idx + 1, &next_path, options); + } + }; + + let pattern = &patterns[idx]; + + match pattern_as_str(pattern) { + Some(s) => { + // This pattern component doesn't have any metacharacters, so we + // don't need to read the current directory to know where to + // continue. So instead of passing control back to the iterator, + // we can just check for that one entry and potentially recurse + // right away. + let special = "." == s || ".." == s; + let next_path = path.join(s); + if (special && path.is_dir()) || (!special && next_path.exists()) { + add(todo, next_path); + } + }, + None => { + match list_dir_sorted(path) { + Some(entries) => { + todo.extend(entries.move_iter().map(|x|(x, idx))); + + // Matching the special directory entries . and .. that refer to + // the current and parent directory respectively requires that + // the pattern has a leading dot, even if the `MatchOptions` field + // `require_literal_leading_dot` is not set. + if pattern.tokens.len() > 0 && pattern.tokens.get(0) == &Char('.') { + for &special in [".", ".."].iter() { + if pattern.matches_with(special, options) { + add(todo, path.join(special)); + } + } + } + } + None => {} + } + } + } +} + fn parse_char_specifiers(s: &[char]) -> Vec { let mut cs = Vec::new(); let mut i = 0; @@ -567,7 +643,7 @@ mod test { fn test_absolute_pattern() { // assume that the filesystem is not empty! assert!(glob("/*").next().is_some()); - assert!(glob("//").next().is_none()); + assert!(glob("//").next().is_some()); // check windows absolute paths with host/device components let root_with_device = os::getcwd().root_path().unwrap().join("*"); diff --git a/src/test/run-pass/glob-std.rs b/src/test/run-pass/glob-std.rs index 0a12731fb4667..eec6d67529574 100644 --- a/src/test/run-pass/glob-std.rs +++ b/src/test/run-pass/glob-std.rs @@ -11,13 +11,12 @@ // ignore-fast check-fast doesn't like 'extern crate extra' // ignore-win32 TempDir may cause IoError on windows: #10462 -#[feature(macro_rules)]; +#![feature(macro_rules)] extern crate glob; use glob::glob; -use std::unstable::finally::Finally; -use std::{os, unstable}; +use std::os; use std::io; use std::io::TempDir; @@ -30,9 +29,9 @@ macro_rules! assert_eq ( ($e1:expr, $e2:expr) => ( pub fn main() { fn mk_file(path: &str, directory: bool) { if directory { - io::fs::mkdir(&Path::new(path), io::UserRWX); + io::fs::mkdir(&Path::new(path), io::UserRWX).unwrap(); } else { - io::File::create(&Path::new(path)); + io::File::create(&Path::new(path)).unwrap(); } } @@ -73,8 +72,8 @@ pub fn main() { mk_file("xyz/z", false); assert_eq!(glob_vec(""), Vec::new()); - assert_eq!(glob_vec("."), Vec::new()); - assert_eq!(glob_vec(".."), Vec::new()); + assert_eq!(glob_vec("."), vec!(os::getcwd())); + assert_eq!(glob_vec(".."), vec!(os::getcwd().join(".."))); assert_eq!(glob_vec("aaa"), vec!(abs_path("aaa"))); assert_eq!(glob_vec("aaa/"), vec!(abs_path("aaa"))); @@ -132,6 +131,13 @@ pub fn main() { abs_path("aaa/tomato/tomato.txt"), abs_path("aaa/tomato/tomoto.txt"))); + assert_eq!(glob_vec("./aaa"), vec!(abs_path("aaa"))); + assert_eq!(glob_vec("./*"), glob_vec("*")); + assert_eq!(glob_vec("*/..").pop().unwrap(), abs_path(".")); + assert_eq!(glob_vec("aaa/../bbb"), vec!(abs_path("bbb"))); + assert_eq!(glob_vec("nonexistent/../bbb"), Vec::new()); + assert_eq!(glob_vec("aaa/tomato/tomato.txt/.."), Vec::new()); + assert_eq!(glob_vec("aa[a]"), vec!(abs_path("aaa"))); assert_eq!(glob_vec("aa[abc]"), vec!(abs_path("aaa"))); assert_eq!(glob_vec("a[bca]a"), vec!(abs_path("aaa"))); From 1700f359bc5d6b8086194e3cc0f3698666dd41a4 Mon Sep 17 00:00:00 2001 From: Benjamin Herr Date: Mon, 7 Apr 2014 18:24:06 +0200 Subject: [PATCH 2/2] libglob: only return dirs for globs ending in / `foo.txt/` should not return `foo.txt` if `foo.txt` is in fact a text file and not a directory. --- src/libglob/lib.rs | 14 ++++++++++++-- src/test/run-pass/glob-std.rs | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/libglob/lib.rs b/src/libglob/lib.rs index 07b7c6604e76d..d19924da5beda 100644 --- a/src/libglob/lib.rs +++ b/src/libglob/lib.rs @@ -43,6 +43,7 @@ use std::path::is_sep; pub struct Paths { root: Path, dir_patterns: Vec, + require_dir: bool, options: MatchOptions, todo: Vec<(Path,uint)>, } @@ -106,6 +107,7 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Paths { return Paths { root: root, dir_patterns: Vec::new(), + require_dir: false, options: options, todo: Vec::new(), }; @@ -118,6 +120,7 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Paths { .split_terminator(is_sep) .map(|s| Pattern::new(s)) .collect::>(); + let require_dir = pattern.chars().next_back().map(is_sep) == Some(true); let mut todo = Vec::new(); if dir_patterns.len() > 0 { @@ -130,6 +133,7 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Paths { Paths { root: root, dir_patterns: dir_patterns, + require_dir: require_dir, options: options, todo: todo, } @@ -146,7 +150,10 @@ impl Iterator for Paths { let (path,idx) = self.todo.pop().unwrap(); // idx -1: was already checked by fill_todo, maybe path was '.' or // '..' that we can't match here because of normalization. - if idx == -1 as uint { return Some(path); } + if idx == -1 as uint { + if self.require_dir && !path.is_dir() { continue; } + return Some(path); + } let ref pattern = *self.dir_patterns.get(idx); if pattern.matches_with(match path.filename_str() { @@ -161,7 +168,10 @@ impl Iterator for Paths { if idx == self.dir_patterns.len() - 1 { // it is not possible for a pattern to match a directory *AND* its children // so we don't need to check the children - return Some(path); + + if !self.require_dir || path.is_dir() { + return Some(path); + } } else { fill_todo(&mut self.todo, self.dir_patterns.as_slice(), idx + 1, &path, self.options); diff --git a/src/test/run-pass/glob-std.rs b/src/test/run-pass/glob-std.rs index eec6d67529574..bd6161dd31a18 100644 --- a/src/test/run-pass/glob-std.rs +++ b/src/test/run-pass/glob-std.rs @@ -138,6 +138,8 @@ pub fn main() { assert_eq!(glob_vec("nonexistent/../bbb"), Vec::new()); assert_eq!(glob_vec("aaa/tomato/tomato.txt/.."), Vec::new()); + assert_eq!(glob_vec("aaa/tomato/tomato.txt/"), Vec::new()); + assert_eq!(glob_vec("aa[a]"), vec!(abs_path("aaa"))); assert_eq!(glob_vec("aa[abc]"), vec!(abs_path("aaa"))); assert_eq!(glob_vec("a[bca]a"), vec!(abs_path("aaa")));