From ed63ec08ac651d642123d5abd1d4589e67049ff2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 17 Feb 2023 11:06:10 +0100 Subject: [PATCH 01/14] update crate status (#301) Really just an excuse to start a new PR for additional attribute work without investing much time. --- crate-status.md | 14 ++++++++++---- src/plumbing/progress.rs | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/crate-status.md b/crate-status.md index d2aa181a633..694e4be9598 100644 --- a/crate-status.md +++ b/crate-status.md @@ -440,7 +440,7 @@ Make it the best-performing implementation and the most convenient one. - [ ] handle submodules - [ ] handle sparse directories - [ ] handle sparse index - - [ ] linear scaling with multi-threading up to IO saturation + - [x] linear scaling with multi-threading up to IO saturation - supported attributes to affect working tree and index contents - [ ] eol - [ ] working-tree-encoding @@ -450,8 +450,10 @@ Make it the best-performing implementation and the most convenient one. - [ ] `ident` - [ ] filter processes - [ ] single-invocation clean/smudge filters -* [x] access to all .gitignore/exclude information -* [ ] access to all attributes information +* manage multiple worktrees +* access to per-path information, like `.gitignore` and `.gitattributes` in a manner well suited for efficient lookups + * [x] _exclude_ information + * [ ] attributes ### gix-revision * [x] `describe()` (similar to `git name-rev`) @@ -602,6 +604,8 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README. * [x] tree with other tree * [ ] respect case-sensitivity of host filesystem. * [x] a way to access various diff related settings or use them + * [ ] respect `diff.*.textconv`, `diff.*.cachetextconv` and external diff viewers with `diff.*.command`, + [along with support for reading `diff` gitattributes](https://github.com/git/git/blob/73876f4861cd3d187a4682290ab75c9dccadbc56/Documentation/gitattributes.txt#L699:L699). * **rewrite tracking** * **deviation** - git keeps up to four candidates whereas we use the first-found candidate that matches the similarity percentage. This can lead to different sources being found. As such, we also don't consider the filename at all. @@ -614,7 +618,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README. * [x] renames * [x] copies * [x] 'find-copies-harder' - find copies with the source being the entire tree. - * [ ] tree with working tree + * [ ] tree or index with working tree * [x] diffs between modified blobs with various algorithms * [ ] tree with index * [x] initialize @@ -673,6 +677,8 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README. * [ ] obtain 'prunable' information * [x] proper handling of worktree related refs * [ ] create, move, remove, and repair + * [x] access exclude information + * [ ] access attribute information * [x] respect `core.worktree` configuration - **deviation** * The delicate interplay between `GIT_COMMON_DIR` and `GIT_WORK_TREE` isn't implemented. diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index 4969fc18c31..807c49bebcc 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -116,6 +116,10 @@ static GIT_CONFIG: &[Record] = &[ config: "core.packedGitLimit", usage: NotApplicable { reason: "we target 32bit systems only and don't use a windowing mechanism" } }, + Record { + config: "core.checkRoundtripEncoding", + usage: Planned { note: Some("needed once working-tree-encoding attributes are supported") } + }, Record { config: "core.bigFileThreshold", usage: Planned { note: Some("unfortunately we can't stream packed files yet, even if not delta-compressed, but respecting the threshold for other operations is definitely a must") } @@ -458,6 +462,18 @@ static GIT_CONFIG: &[Record] = &[ config: "status.renames", usage: Planned { note: Some("the same as diff.renames") } }, + Record { + config: "diff.*.textconv", + usage: Planned { note: None } + }, + Record { + config: "diff.*.cachetextconv", + usage: Planned { note: None } + }, + Record { + config: "diff.*.command", + usage: Planned { note: None } + }, ]; /// A programmatic way to record and display progress. From 067740636b3ca24ce90db91923dfd4ee592fa7f6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 17 Mar 2023 09:38:45 +0100 Subject: [PATCH 02/14] refactor --- gix-attributes/src/match_group.rs | 8 +- gix-attributes/tests/match_group/ignore.rs | 116 ++++++++++++++++++++ gix-attributes/tests/match_group/mod.rs | 119 +-------------------- gix/src/worktree/mod.rs | 19 ++-- 4 files changed, 136 insertions(+), 126 deletions(-) create mode 100644 gix-attributes/tests/match_group/ignore.rs diff --git a/gix-attributes/src/match_group.rs b/gix-attributes/src/match_group.rs index 018bf2567e0..54f72a6d659 100644 --- a/gix-attributes/src/match_group.rs +++ b/gix-attributes/src/match_group.rs @@ -61,6 +61,10 @@ pub enum Value { #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] pub struct Attributes; +fn macro_mode() -> gix_glob::pattern::Mode { + gix_glob::pattern::Mode::all() +} + impl Pattern for Attributes { type Value = Value; @@ -72,7 +76,7 @@ impl Pattern for Attributes { crate::parse::Kind::Macro(macro_name) => ( gix_glob::Pattern { text: macro_name.as_str().into(), - mode: gix_glob::pattern::Mode::all(), + mode: macro_mode(), first_wildcard_pos: None, }, Value::MacroAttributes(into_owned_assignments(assignments).ok()?), @@ -93,7 +97,7 @@ impl Pattern for Attributes { } fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool { - pattern.mode != gix_glob::pattern::Mode::all() + pattern.mode != macro_mode() } } diff --git a/gix-attributes/tests/match_group/ignore.rs b/gix-attributes/tests/match_group/ignore.rs new file mode 100644 index 00000000000..e278fdedc09 --- /dev/null +++ b/gix-attributes/tests/match_group/ignore.rs @@ -0,0 +1,116 @@ +use std::io::Read; + +use bstr::{BStr, ByteSlice}; +use gix_attributes::{Ignore, Match, MatchGroup}; +use gix_glob::pattern::Case; + +struct Expectations<'a> { + lines: bstr::Lines<'a>, +} + +impl<'a> Iterator for Expectations<'a> { + type Item = (&'a BStr, Option<(&'a BStr, usize, &'a BStr)>); + + fn next(&mut self) -> Option { + let line = self.lines.next()?; + let (left, value) = line.split_at(line.find_byte(b'\t').unwrap()); + let value = value[1..].as_bstr(); + + let source_and_line = if left == b"::" { + None + } else { + let mut tokens = left.split(|b| *b == b':'); + let source = tokens.next().unwrap().as_bstr(); + let line_number: usize = tokens.next().unwrap().to_str_lossy().parse().ok().unwrap(); + let pattern = tokens.next().unwrap().as_bstr(); + Some((source, line_number, pattern)) + }; + Some((value, source_and_line)) + } +} + +#[test] +fn from_git_dir() -> crate::Result { + let dir = gix_testtools::scripted_fixture_read_only("make_global_and_external_and_dir_ignores.sh")?; + let repo_dir = dir.join("repo"); + let git_dir = repo_dir.join(".git"); + let baseline = std::fs::read(git_dir.parent().unwrap().join("git-check-ignore.baseline"))?; + let mut buf = Vec::new(); + let mut group = MatchGroup::from_git_dir(git_dir, Some(dir.join("user.exclude")), &mut buf)?; + + assert!( + !group.add_patterns_file("not-a-file", false, None, &mut buf)?, + "missing files are no problem and cause a negative response" + ); + assert!( + group.add_patterns_file(repo_dir.join(".gitignore"), true, repo_dir.as_path().into(), &mut buf)?, + "existing files return true" + ); + + buf.clear(); + let ignore_file = repo_dir.join("dir-with-ignore").join(".gitignore"); + std::fs::File::open(&ignore_file)?.read_to_end(&mut buf)?; + group.add_patterns_buffer(&buf, ignore_file, repo_dir.as_path().into()); + + for (path, source_and_line) in (Expectations { + lines: baseline.lines(), + }) { + let actual = group.pattern_matching_relative_path( + path, + repo_dir + .join(path.to_str_lossy().as_ref()) + .metadata() + .ok() + .map(|m| m.is_dir()), + Case::Sensitive, + ); + match (actual, source_and_line) { + ( + Some(Match { + sequence_number, + pattern: _, + source, + value: _, + }), + Some((expected_source, line, _expected_pattern)), + ) => { + assert_eq!(sequence_number, line, "our counting should match the one used in git"); + assert_eq!( + source.map(|p| p.canonicalize().unwrap()), + Some(repo_dir.join(expected_source.to_str_lossy().as_ref()).canonicalize()?) + ); + } + (None, None) => {} + (actual, expected) => panic!("actual {actual:?} should match {expected:?} with path '{path}'"), + } + } + Ok(()) +} + +#[test] +fn from_overrides() { + let input = ["simple", "pattern/"]; + let group = gix_attributes::MatchGroup::::from_overrides(input); + assert_eq!( + group.pattern_matching_relative_path("Simple", None, gix_glob::pattern::Case::Fold), + Some(pattern_to_match(&gix_glob::parse("simple").unwrap(), 0)) + ); + assert_eq!( + group.pattern_matching_relative_path("pattern", Some(true), gix_glob::pattern::Case::Sensitive), + Some(pattern_to_match(&gix_glob::parse("pattern/").unwrap(), 1)) + ); + assert_eq!(group.patterns.len(), 1); + assert_eq!( + gix_attributes::PatternList::::from_overrides(input), + group.patterns.into_iter().next().unwrap() + ); +} + +fn pattern_to_match(pattern: &gix_glob::Pattern, sequence_number: usize) -> Match<'_, ()> { + Match { + pattern, + value: &(), + source: None, + sequence_number, + } +} diff --git a/gix-attributes/tests/match_group/mod.rs b/gix-attributes/tests/match_group/mod.rs index 94ccf42b502..5ae31c89f66 100644 --- a/gix-attributes/tests/match_group/mod.rs +++ b/gix-attributes/tests/match_group/mod.rs @@ -1,118 +1 @@ -mod ignore { - use std::io::Read; - - use bstr::{BStr, ByteSlice}; - use gix_attributes::{Ignore, Match, MatchGroup}; - use gix_glob::pattern::Case; - - struct Expectations<'a> { - lines: bstr::Lines<'a>, - } - - impl<'a> Iterator for Expectations<'a> { - type Item = (&'a BStr, Option<(&'a BStr, usize, &'a BStr)>); - - fn next(&mut self) -> Option { - let line = self.lines.next()?; - let (left, value) = line.split_at(line.find_byte(b'\t').unwrap()); - let value = value[1..].as_bstr(); - - let source_and_line = if left == b"::" { - None - } else { - let mut tokens = left.split(|b| *b == b':'); - let source = tokens.next().unwrap().as_bstr(); - let line_number: usize = tokens.next().unwrap().to_str_lossy().parse().ok().unwrap(); - let pattern = tokens.next().unwrap().as_bstr(); - Some((source, line_number, pattern)) - }; - Some((value, source_and_line)) - } - } - - #[test] - fn from_git_dir() -> crate::Result { - let dir = gix_testtools::scripted_fixture_read_only("make_global_and_external_and_dir_ignores.sh")?; - let repo_dir = dir.join("repo"); - let git_dir = repo_dir.join(".git"); - let baseline = std::fs::read(git_dir.parent().unwrap().join("git-check-ignore.baseline"))?; - let mut buf = Vec::new(); - let mut group = MatchGroup::from_git_dir(git_dir, Some(dir.join("user.exclude")), &mut buf)?; - - assert!( - !group.add_patterns_file("not-a-file", false, None, &mut buf)?, - "missing files are no problem and cause a negative response" - ); - assert!( - group.add_patterns_file(repo_dir.join(".gitignore"), true, repo_dir.as_path().into(), &mut buf)?, - "existing files return true" - ); - - buf.clear(); - let ignore_file = repo_dir.join("dir-with-ignore").join(".gitignore"); - std::fs::File::open(&ignore_file)?.read_to_end(&mut buf)?; - group.add_patterns_buffer(&buf, ignore_file, repo_dir.as_path().into()); - - for (path, source_and_line) in (Expectations { - lines: baseline.lines(), - }) { - let actual = group.pattern_matching_relative_path( - path, - repo_dir - .join(path.to_str_lossy().as_ref()) - .metadata() - .ok() - .map(|m| m.is_dir()), - Case::Sensitive, - ); - match (actual, source_and_line) { - ( - Some(Match { - sequence_number, - pattern: _, - source, - value: _, - }), - Some((expected_source, line, _expected_pattern)), - ) => { - assert_eq!(sequence_number, line, "our counting should match the one used in git"); - assert_eq!( - source.map(|p| p.canonicalize().unwrap()), - Some(repo_dir.join(expected_source.to_str_lossy().as_ref()).canonicalize()?) - ); - } - (None, None) => {} - (actual, expected) => panic!("actual {actual:?} should match {expected:?} with path '{path}'"), - } - } - Ok(()) - } - - #[test] - fn from_overrides() { - let input = ["simple", "pattern/"]; - let group = gix_attributes::MatchGroup::::from_overrides(input); - assert_eq!( - group.pattern_matching_relative_path("Simple", None, gix_glob::pattern::Case::Fold), - Some(pattern_to_match(&gix_glob::parse("simple").unwrap(), 0)) - ); - assert_eq!( - group.pattern_matching_relative_path("pattern", Some(true), gix_glob::pattern::Case::Sensitive), - Some(pattern_to_match(&gix_glob::parse("pattern/").unwrap(), 1)) - ); - assert_eq!(group.patterns.len(), 1); - assert_eq!( - gix_attributes::PatternList::::from_overrides(input), - group.patterns.into_iter().next().unwrap() - ); - } - - fn pattern_to_match(pattern: &gix_glob::Pattern, sequence_number: usize) -> Match<'_, ()> { - Match { - pattern, - value: &(), - source: None, - sequence_number, - } - } -} +mod ignore; diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index 19a44a90067..965690401ac 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -120,18 +120,25 @@ pub mod excludes { /// Configure a file-system cache checking if files below the repository are excluded. /// /// This takes into consideration all the usual repository configuration. - // TODO: test, provide higher-level interface that is much easier to use and doesn't panic. + /// + /// `index` may be used to obtain `.gitignore` files directly from the index under certain conditions. + // TODO: test, provide higher-level interface that is much easier to use and doesn't panic when accessing entries + // by non-relative path. + // TODO: `index` might be so special (given the conditions we are talking about) that it's better obtained internally + // so the caller won't have to care. + // TODO: global files like `~/.gitignore` seem to be missing here, but we need a way to control if these should be loaded. + // probably that needs another permission in the repo options or a custom config variable. The latter is easiest to manage. pub fn excludes( &self, index: &gix_index::State, overrides: Option>, ) -> Result { let repo = self.parent; - let case = repo - .config - .ignore_case - .then_some(gix_glob::pattern::Case::Fold) - .unwrap_or_default(); + let case = if repo.config.ignore_case { + gix_glob::pattern::Case::Fold + } else { + gix_glob::pattern::Case::Sensitive + }; let mut buf = Vec::with_capacity(512); let excludes_file = match repo.config.excludes_file().transpose()? { Some(user_path) => Some(user_path), From 0d340f4fdeff1576460d43ca2210b11f0641c5dd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 5 Apr 2023 08:22:25 +0200 Subject: [PATCH 03/14] feat: add `xdg_config_home()`, installation_config` and `installation_config_prefix()` functions. --- Cargo.lock | 1 + gix-path/Cargo.toml | 1 + gix-path/src/env/git.rs | 87 ++++++++++++++++++++++++++ gix-path/src/env/mod.rs | 132 ++++++++++++++++++++++++++++++++++++++++ gix-path/src/lib.rs | 42 +------------ gix-path/tests/path.rs | 28 ++++++++- 6 files changed, 250 insertions(+), 41 deletions(-) create mode 100644 gix-path/src/env/git.rs create mode 100644 gix-path/src/env/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 131d9b1f2b2..96dfae7aeb1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1960,6 +1960,7 @@ name = "gix-path" version = "0.7.3" dependencies = [ "bstr", + "once_cell", "tempfile", "thiserror", ] diff --git a/gix-path/Cargo.toml b/gix-path/Cargo.toml index 7fcf6c52fb9..f75c922c084 100644 --- a/gix-path/Cargo.toml +++ b/gix-path/Cargo.toml @@ -15,6 +15,7 @@ doctest = false [dependencies] bstr = { version = "1.3.0", default-features = false, features = ["std"] } thiserror = "1.0.26" +once_cell = "1.17.1" [dev-dependencies] tempfile = "3.3.0" diff --git a/gix-path/src/env/git.rs b/gix-path/src/env/git.rs new file mode 100644 index 00000000000..f9ec73dc3eb --- /dev/null +++ b/gix-path/src/env/git.rs @@ -0,0 +1,87 @@ +use std::path::Path; +use std::process::{Command, Stdio}; + +use bstr::{BStr, BString, ByteSlice}; + +/// Returns the file that contains git configuration coming with the installation of the `git` file in the current `PATH`, or `None` +/// if no `git` executable was found or there were other errors during execution. +pub(crate) fn install_config_path() -> Option<&'static BStr> { + static PATH: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { + // Shortcut: in Msys shells this variable is set which allows to deduce the installation directory + // so we can save the `git` invocation. + #[cfg(windows)] + if let Some(mut exec_path) = std::env::var_os("EXEPATH").map(std::path::PathBuf::from) { + exec_path.push("etc"); + exec_path.push("gitconfig"); + return crate::os_string_into_bstring(exec_path.into()).ok(); + } + let mut cmd = Command::new(if cfg!(windows) { "git.exe" } else { "git" }); + cmd.args(["config", "-l", "--show-origin"]) + .stdin(Stdio::null()) + .stderr(Stdio::null()); + first_file_from_config_with_origin(cmd.output().ok()?.stdout.as_slice().into()).map(ToOwned::to_owned) + }); + PATH.as_ref().map(|b| b.as_ref()) +} + +fn first_file_from_config_with_origin(source: &BStr) -> Option<&BStr> { + let file = source.strip_prefix(b"file:")?; + let end_pos = file.find_byte(b'\t')?; + file[..end_pos].trim_with(|c| c == '"').as_bstr().into() +} + +/// Given `config_path` as obtained from `install_config_path()`, return the path of the git installation base. +pub(crate) fn config_to_base_path(config_path: &Path) -> &Path { + config_path + .parent() + .expect("config file paths always have a file name to pop") +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + #[test] + fn config_to_base_path() { + for (input, expected) in [ + ( + "/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig", + "/Applications/Xcode.app/Contents/Developer/usr/share/git-core", + ), + ("C:/git-sdk-64/etc/gitconfig", "C:/git-sdk-64/etc"), + ("C:\\ProgramData/Git/config", "C:\\ProgramData/Git"), + ("C:/Program Files/Git/etc/gitconfig", "C:/Program Files/Git/etc"), + ] { + assert_eq!(super::config_to_base_path(Path::new(input)), Path::new(expected)); + } + } + #[test] + fn first_file_from_config_with_origin() { + let macos = "file:/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig credential.helper=osxkeychain\nfile:/Users/byron/.gitconfig push.default=simple\n"; + let win_msys = + "file:C:/git-sdk-64/etc/gitconfig core.symlinks=false\r\nfile:C:/git-sdk-64/etc/gitconfig core.autocrlf=true"; + let win_cmd = "file:C:/Program Files/Git/etc/gitconfig diff.astextplain.textconv=astextplain\r\nfile:C:/Program Files/Git/etc/gitconfig filter.lfs.clean=gix-lfs clean -- %f\r\n"; + let win_msys_old = "file:\"C:\\ProgramData/Git/config\" diff.astextplain.textconv=astextplain\r\nfile:\"C:\\ProgramData/Git/config\" filter.lfs.clean=git-lfs clean -- %f\r\n"; + let linux = "file:/home/parallels/.gitconfig core.excludesfile=~/.gitignore\n"; + let bogus = "something unexpected"; + let empty = ""; + + for (source, expected) in [ + ( + macos, + Some("/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig"), + ), + (win_msys, Some("C:/git-sdk-64/etc/gitconfig")), + (win_msys_old, Some("C:\\ProgramData/Git/config")), + (win_cmd, Some("C:/Program Files/Git/etc/gitconfig")), + (linux, Some("/home/parallels/.gitconfig")), + (bogus, None), + (empty, None), + ] { + assert_eq!( + super::first_file_from_config_with_origin(source.into()), + expected.map(Into::into) + ); + } + } +} diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs new file mode 100644 index 00000000000..8763b268190 --- /dev/null +++ b/gix-path/src/env/mod.rs @@ -0,0 +1,132 @@ +use bstr::{BString, ByteSlice}; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +mod git; + +/// Return the location at which installation specific git configuration file can be found, or `None` +/// if the binary could not be executed or its results could not be parsed. +/// +/// ### Performance +/// +/// This invokes the git binary which is slow on windows. +pub fn installation_config() -> Option<&'static Path> { + git::install_config_path().and_then(|p| crate::try_from_byte_slice(p).ok()) +} + +/// Return the location at which git installation specific configuration files are located, or `None` if the binary +/// could not be executed or its results could not be parsed. +/// +/// ### Performance +/// +/// This invokes the git binary which is slow on windows. +pub fn installation_config_prefix() -> Option<&'static Path> { + installation_config().map(git::config_to_base_path) +} + +/// Returns the fully qualified path in the *xdg-home* directory (or equivalent in the home dir) to `file`, +/// accessing `env_var()` to learn where these bases are. +/// +/// Note that the `HOME` directory should ultimately come from [`home_dir()`] as it handles windows correctly. +/// The same can be achieved by using [`var()`] as `env_var`. +pub fn xdg_config(file: &str, env_var: &mut dyn FnMut(&str) -> Option) -> Option { + env_var("XDG_CONFIG_HOME") + .map(|home| { + let mut p = PathBuf::from(home); + p.push("git"); + p.push(file); + p + }) + .or_else(|| { + env_var("HOME").map(|home| { + let mut p = PathBuf::from(home); + p.push(".config"); + p.push("git"); + p.push(file); + p + }) + }) +} + +/// Returns the platform dependent system prefix or `None` if it cannot be found (right now only on windows). +/// +/// ### Performance +/// +/// On windows, the slowest part is the launch of the `git.exe` executable in the PATH, which only happens when launched +/// from outside of the `msys2` shell. +/// +/// ### When `None` is returned +/// +/// This happens only windows if the git binary can't be found at all for obtaining its executable path, or if the git binary +/// wasn't built with a well-known directory structure or environment. +pub fn system_prefix() -> Option<&'static Path> { + if cfg!(windows) { + static PREFIX: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { + if let Some(root) = std::env::var_os("EXEPATH").map(PathBuf::from) { + for candidate in ["mingw64", "mingw32"] { + let candidate = root.join(candidate); + if candidate.is_dir() { + return Some(candidate); + } + } + } + + let path = std::process::Command::new("git.exe") + .arg("--exec-path") + .stderr(std::process::Stdio::null()) + .output() + .ok()? + .stdout; + let path = BString::new(path) + .trim_with(|b| b.is_ascii_whitespace()) + .to_path() + .ok()? + .to_owned(); + + let one_past_prefix = path.components().enumerate().find_map(|(idx, c)| { + matches!(c,std::path::Component::Normal(name) if name.to_str() == Some("libexec")).then_some(idx) + })?; + Some(path.components().take(one_past_prefix.checked_sub(1)?).collect()) + }); + PREFIX.as_deref() + } else { + Path::new("/").into() + } +} + +/// Returns a platform independent home directory. +/// +/// On unix this simply returns $HOME on windows this uses %HOMEDRIVE%\%HOMEPATH% or %USERPROFILE% +pub fn home_dir() -> Option { + if let Some(home) = std::env::var_os("HOME") { + return Some(home.into()); + } + + // NOTE: technically we should also check HOMESHARE in case HOME is a UNC path + // but git doesn't do this either so probably best to wait for an upstream fix. + #[cfg(windows)] + { + if let Some(homedrive) = std::env::var_os("HOMEDRIVE") { + if let Some(home_path) = std::env::var_os("HOMEPATH") { + let home = PathBuf::from(homedrive).join(home_path); + if home.metadata().map_or(false, |home| home.is_dir()) { + return Some(home); + } + } + } + if let Some(userprofile) = std::env::var_os("USERPROFILE") { + return Some(userprofile.into()); + } + } + None +} + +/// Returns the contents of an environment variable of `name` with some special handling +/// for certain environment variables (like `HOME`) for platform compatibility. +pub fn var(name: &str) -> Option { + if name == "HOME" { + home_dir().map(PathBuf::into_os_string) + } else { + std::env::var_os(name) + } +} diff --git a/gix-path/src/lib.rs b/gix-path/src/lib.rs index 9171c6b8faf..6895aca4657 100644 --- a/gix-path/src/lib.rs +++ b/gix-path/src/lib.rs @@ -56,10 +56,6 @@ pub struct Spec(bstr::BString); mod convert; -use std::env::var_os; -use std::ffi::OsString; -use std::path::PathBuf; - pub use convert::*; mod util; @@ -71,39 +67,5 @@ mod spec; pub mod realpath; pub use realpath::function::{realpath, realpath_opts}; -/// Returns a platform independent home directory. -/// -/// On unix this simply returns $HOME on windows this uses %HOMEDRIVE%\%HOMEPATH% or %USERPROFILE% -pub fn home_dir() -> Option { - if let Some(home) = var_os("HOME") { - return Some(home.into()); - } - - // NOTE: technically we should also check HOMESHARE in case HOME is a UNC path - // but git doesn't do this either so probably best to wait for an upstream fix. - #[cfg(windows)] - { - if let Some(homedrive) = var_os("HOMEDRIVE") { - if let Some(home_path) = var_os("HOMEPATH") { - let home = PathBuf::from(homedrive).join(home_path); - if home.metadata().map_or(false, |home| home.is_dir()) { - return Some(home); - } - } - } - if let Some(userprofile) = var_os("USERPROFILE") { - return Some(userprofile.into()); - } - } - None -} - -/// Returns the contents of an environment variable of `name` with some special handling -/// for certain environment variables (like `HOME`) for platform compatibility. -pub fn env_var(name: &str) -> Option { - if name == "HOME" { - home_dir().map(PathBuf::into_os_string) - } else { - std::env::var_os(name) - } -} +/// Information about the environment in terms of locations of resources. +pub mod env; diff --git a/gix-path/tests/path.rs b/gix-path/tests/path.rs index 0b2bc9e9974..0bf9398056a 100644 --- a/gix-path/tests/path.rs +++ b/gix-path/tests/path.rs @@ -5,7 +5,7 @@ mod realpath; mod home_dir { #[test] fn returns_existing_directory() { - if let Some(home) = gix_path::home_dir() { + if let Some(home) = gix_path::env::home_dir() { assert!( home.is_dir(), "the home directory would typically exist, even though on unix we don't test for that." @@ -13,4 +13,30 @@ mod home_dir { } } } + +mod xdg_config_path { + use std::ffi::OsStr; + + #[test] + fn prefers_xdg_config_bases() { + let actual = gix_path::env::xdg_config("test", &mut |n| { + (n == OsStr::new("XDG_CONFIG_HOME")).then(|| "marker".into()) + }) + .expect("set"); + #[cfg(unix)] + assert_eq!(actual.to_str(), Some("marker/git/test")); + #[cfg(windows)] + assert_eq!(actual.to_str(), Some("marker\\git\\test")); + } + + #[test] + fn falls_back_to_home() { + let actual = gix_path::env::xdg_config("test", &mut |n| (n == OsStr::new("HOME")).then(|| "marker".into())) + .expect("set"); + #[cfg(unix)] + assert_eq!(actual.to_str(), Some("marker/.config/git/test")); + #[cfg(windows)] + assert_eq!(actual.to_str(), Some("marker\\.config\\git\\test")); + } +} mod util; From da9009f807acfe50000724589853a112fc5ab9a4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 5 Apr 2023 08:27:00 +0200 Subject: [PATCH 04/14] fix: Use `gix-path` for obtaining some shared directories as base for path generation. This will make system wide configuration available on windows at least when using the migw builds. --- gix-config/src/file/init/comfort.rs | 12 ++-- gix-config/src/source.rs | 91 ++++------------------------- 2 files changed, 17 insertions(+), 86 deletions(-) diff --git a/gix-config/src/file/init/comfort.rs b/gix-config/src/file/init/comfort.rs index 6c97282cac0..aa77fb9c01c 100644 --- a/gix-config/src/file/init/comfort.rs +++ b/gix-config/src/file/init/comfort.rs @@ -28,7 +28,7 @@ impl File<'static> { .flat_map(|kind| kind.sources()) .filter_map(|source| { let path = source - .storage_location(&mut gix_path::env_var) + .storage_location(&mut gix_path::env::var) .and_then(|p| p.is_file().then_some(p)) .map(|p| p.into_owned()); @@ -41,7 +41,7 @@ impl File<'static> { .into() }); - let home = gix_path::home_dir(); + let home = gix_path::env::home_dir(); let options = init::Options { includes: init::includes::Options::follow_without_conditional(home.as_deref()), ..Default::default() @@ -57,7 +57,7 @@ impl File<'static> { /// /// [`gix-config`'s documentation]: https://git-scm.com/docs/gix-config#Documentation/gix-config.txt-GITCONFIGCOUNT pub fn from_environment_overrides() -> Result, init::from_env::Error> { - let home = gix_path::home_dir(); + let home = gix_path::env::home_dir(); let options = init::Options { includes: init::includes::Options::follow_without_conditional(home.as_deref()), ..Default::default() @@ -86,7 +86,7 @@ impl File<'static> { let mut path = dir.into(); path.push( source - .storage_location(&mut gix_path::env_var) + .storage_location(&mut gix_path::env::var) .expect("location available for local"), ); let local = Self::from_path_no_includes(&path, source)?; @@ -99,7 +99,7 @@ impl File<'static> { let source = Source::Worktree; let path = git_dir.join( source - .storage_location(&mut gix_path::env_var) + .storage_location(&mut gix_path::env::var) .expect("location available for worktree"), ); Self::from_path_no_includes(path, source) @@ -108,7 +108,7 @@ impl File<'static> { } .transpose()?; - let home = gix_path::home_dir(); + let home = gix_path::env::home_dir(); let options = init::Options { includes: init::includes::Options::follow( path::interpolate::Context { diff --git a/gix-config/src/source.rs b/gix-config/src/source.rs index a3ae4a0f7af..d8ca60db4ba 100644 --- a/gix-config/src/source.rs +++ b/gix-config/src/source.rs @@ -65,29 +65,19 @@ impl Source { pub fn storage_location(self, env_var: &mut dyn FnMut(&str) -> Option) -> Option> { use Source::*; match self { - GitInstallation => git::install_config_path().map(gix_path::from_bstr), - System => env_var("GIT_CONFIG_NO_SYSTEM") - .is_none() - .then(|| PathBuf::from(env_var("GIT_CONFIG_SYSTEM").unwrap_or_else(|| "/etc/gitconfig".into())).into()), + GitInstallation => gix_path::env::installation_config().map(Into::into), + System => { + if env_var("GIT_CONFIG_NO_SYSTEM").is_some() { + None + } else { + env_var("GIT_CONFIG_SYSTEM") + .map(|p| Cow::Owned(p.into())) + .or_else(|| gix_path::env::system_prefix().map(|p| p.join("etc/gitconfig").into())) + } + } Git => match env_var("GIT_CONFIG_GLOBAL") { Some(global_override) => Some(PathBuf::from(global_override).into()), - None => env_var("XDG_CONFIG_HOME") - .map(|home| { - let mut p = PathBuf::from(home); - p.push("git"); - p.push("config"); - p - }) - .or_else(|| { - env_var("HOME").map(|home| { - let mut p = PathBuf::from(home); - p.push(".config"); - p.push("git"); - p.push("config"); - p - }) - }) - .map(Cow::Owned), + None => gix_path::env::xdg_config("config", env_var).map(Cow::Owned), }, User => env_var("GIT_CONFIG_GLOBAL") .map(|global_override| PathBuf::from(global_override).into()) @@ -104,62 +94,3 @@ impl Source { } } } - -/// Environment information involving the `git` program itself. -mod git { - use std::process::{Command, Stdio}; - - use bstr::{BStr, BString, ByteSlice}; - - /// Returns the file that contains git configuration coming with the installation of the `git` file in the current `PATH`, or `None` - /// if no `git` executable was found or there were other errors during execution. - pub fn install_config_path() -> Option<&'static BStr> { - static PATH: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { - let mut cmd = Command::new(if cfg!(windows) { "git.exe" } else { "git" }); - cmd.args(["config", "-l", "--show-origin"]) - .stdin(Stdio::null()) - .stderr(Stdio::null()); - first_file_from_config_with_origin(cmd.output().ok()?.stdout.as_slice().into()).map(ToOwned::to_owned) - }); - PATH.as_ref().map(|b| b.as_ref()) - } - - fn first_file_from_config_with_origin(source: &BStr) -> Option<&BStr> { - let file = source.strip_prefix(b"file:")?; - let end_pos = file.find_byte(b'\t')?; - file[..end_pos].trim_with(|c| c == '"').as_bstr().into() - } - - #[cfg(test)] - mod tests { - #[test] - fn first_file_from_config_with_origin() { - let macos = "file:/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig credential.helper=osxkeychain\nfile:/Users/byron/.gitconfig push.default=simple\n"; - let win_msys = - "file:C:/git-sdk-64/etc/gitconfig core.symlinks=false\r\nfile:C:/git-sdk-64/etc/gitconfig core.autocrlf=true"; - let win_cmd = "file:C:/Program Files/Git/etc/gitconfig diff.astextplain.textconv=astextplain\r\nfile:C:/Program Files/Git/etc/gitconfig filter.lfs.clean=gix-lfs clean -- %f\r\n"; - let win_msys_old = "file:\"C:\\ProgramData/Git/config\" diff.astextplain.textconv=astextplain\r\nfile:\"C:\\ProgramData/Git/config\" filter.lfs.clean=git-lfs clean -- %f\r\n"; - let linux = "file:/home/parallels/.gitconfig core.excludesfile=~/.gitignore\n"; - let bogus = "something unexpected"; - let empty = ""; - - for (source, expected) in [ - ( - macos, - Some("/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig"), - ), - (win_msys, Some("C:/git-sdk-64/etc/gitconfig")), - (win_msys_old, Some("C:\\ProgramData/Git/config")), - (win_cmd, Some("C:/Program Files/Git/etc/gitconfig")), - (linux, Some("/home/parallels/.gitconfig")), - (bogus, None), - (empty, None), - ] { - assert_eq!( - super::first_file_from_config_with_origin(source.into()), - expected.map(Into::into) - ); - } - } - } -} From e3f8c4bcda263728a5c2d5171b2b9941297ba629 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 6 Apr 2023 12:37:25 +0200 Subject: [PATCH 05/14] fix!: remove whitespace related processing from glob parsing and don't copy parsed input. This kind of processing was only done for ignore patterns, which are quite different from how things are done in attribute files. --- gix-glob/src/parse.rs | 43 +++------------------------------ gix-glob/src/pattern.rs | 16 +++++++++--- gix-glob/src/wildmatch.rs | 2 +- gix-glob/tests/parse/mod.rs | 31 ++++++++++-------------- gix-glob/tests/wildmatch/mod.rs | 4 +-- 5 files changed, 33 insertions(+), 63 deletions(-) diff --git a/gix-glob/src/parse.rs b/gix-glob/src/parse.rs index 3693f88efcb..665f459b913 100644 --- a/gix-glob/src/parse.rs +++ b/gix-glob/src/parse.rs @@ -1,4 +1,4 @@ -use bstr::{BString, ByteSlice}; +use bstr::ByteSlice; use crate::{pattern, pattern::Mode}; @@ -7,7 +7,7 @@ use crate::{pattern, pattern::Mode}; /// using `pattern::Mode` flags. /// /// Returns `(pattern, mode, no_wildcard_len)` -pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option)> { +pub fn pattern(mut pat: &[u8]) -> Option<(&[u8], pattern::Mode, Option)> { let mut mode = Mode::empty(); if pat.is_empty() { return None; @@ -28,10 +28,9 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option) mode |= Mode::ABSOLUTE; pat = &pat[1..]; } - let mut pat = truncate_non_escaped_trailing_spaces(pat); if pat.last() == Some(&b'/') { mode |= Mode::MUST_BE_DIR; - pat.pop(); + pat = &pat[..pat.len() - 1]; } if !pat.contains(&b'/') { @@ -41,7 +40,7 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option) mode |= Mode::ENDS_WITH; } - let pos_of_first_wildcard = first_wildcard_pos(&pat); + let pos_of_first_wildcard = first_wildcard_pos(pat); Some((pat, mode, pos_of_first_wildcard)) } @@ -50,37 +49,3 @@ fn first_wildcard_pos(pat: &[u8]) -> Option { } pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\"; - -/// We always copy just because that's ultimately needed anyway, not because we always have to. -fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { - match buf.rfind_not_byteset(br"\ ") { - Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace - None => buf.into(), - Some(start_of_non_space) => { - // This seems a bit strange but attempts to recreate the git implementation while - // actually removing the escape characters before spaces. We leave other backslashes - // for escapes to be handled by `glob/globset`. - let mut res: BString = buf[..start_of_non_space + 1].into(); - - let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); - let mut bare_spaces = 0; - while let Some(b) = trailing_bytes.next() { - match b { - b' ' => { - bare_spaces += 1; - } - b'\\' => { - res.extend(std::iter::repeat(b' ').take(bare_spaces)); - bare_spaces = 0; - // Skip what follows, like git does, but keep spaces if possible. - if trailing_bytes.next() == Some(&b' ') { - res.push(b' '); - } - } - _ => unreachable!("BUG: this must be either backslash or space"), - } - } - res - } - } -} diff --git a/gix-glob/src/pattern.rs b/gix-glob/src/pattern.rs index c863e641d27..08022410a2c 100644 --- a/gix-glob/src/pattern.rs +++ b/gix-glob/src/pattern.rs @@ -45,16 +45,20 @@ impl Default for Case { } } +/// Instantiation impl Pattern { /// Parse the given `text` as pattern, or return `None` if `text` was empty. pub fn from_bytes(text: &[u8]) -> Option { crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { - text, + text: text.into(), mode, first_wildcard_pos, }) } +} +/// Access +impl Pattern { /// Return true if a match is negated. pub fn is_negative(&self) -> bool { self.mode.contains(Mode::NEGATIVE) @@ -67,6 +71,11 @@ impl Pattern { /// `basename_start_pos` is the index at which the `path`'s basename starts. /// /// Lastly, `case` folding can be configured as well. + /// + /// # Note + /// + /// This is specific to how exclude patterns match. + // TODO: then it should be in `gix-attributes` pub fn matches_repo_relative_path<'a>( &self, path: impl Into<&'a BStr>, @@ -105,8 +114,9 @@ impl Pattern { /// `mode` can identify `value` as path which won't match the slash character, and can match /// strings with cases ignored as well. Note that the case folding performed here is ASCII only. /// - /// Note that this method uses some shortcuts to accelerate simple patterns. - fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { + /// Note that this method uses some shortcuts to accelerate simple patterns, but falls back to + /// [wildmatch()][crate::wildmatch()] if these fail. + pub fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { let value = value.into(); match self.first_wildcard_pos { // "*literal" case, overrides starts-with diff --git a/gix-glob/src/wildmatch.rs b/gix-glob/src/wildmatch.rs index c86e6e2518e..4d5944f6e92 100644 --- a/gix-glob/src/wildmatch.rs +++ b/gix-glob/src/wildmatch.rs @@ -2,7 +2,7 @@ use bitflags::bitflags; bitflags! { /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()]. #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] - #[derive(Copy, Clone)] + #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] pub struct Mode: u8 { /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths. const NO_MATCH_SLASH_LITERAL = 1 << 0; diff --git a/gix-glob/tests/parse/mod.rs b/gix-glob/tests/parse/mod.rs index d6be0df2429..fc668d1e6f2 100644 --- a/gix-glob/tests/parse/mod.rs +++ b/gix-glob/tests/parse/mod.rs @@ -106,11 +106,11 @@ fn trailing_slashes_are_marked_and_removed() { } #[test] -fn trailing_spaces_are_ignored() { - assert_eq!(gix_glob::parse(br"a "), pat("a", Mode::NO_SUB_DIR, None)); +fn trailing_spaces_are_taken_literally() { + assert_eq!(gix_glob::parse(br"a "), pat("a ", Mode::NO_SUB_DIR, None)); assert_eq!( gix_glob::parse(b"a\t\t "), - pat("a\t\t", Mode::NO_SUB_DIR, None), + pat("a\t\t ", Mode::NO_SUB_DIR, None), "trailing tabs are not ignored" ); } @@ -119,37 +119,32 @@ fn trailing_spaces_are_ignored() { fn trailing_spaces_can_be_escaped_to_be_literal() { assert_eq!( gix_glob::parse(br"a \ "), - pat("a ", Mode::NO_SUB_DIR, None), - "a single escape in front of the last desired space is enough" + pat("a \\ ", Mode::NO_SUB_DIR, Some(3)), + "there is no escaping" ); assert_eq!( gix_glob::parse(br"a b c "), - pat("a b c", Mode::NO_SUB_DIR, None), - "spaces in the middle are fine" + pat("a b c ", Mode::NO_SUB_DIR, None), + "spaces in the middle are fine and also at the end" ); assert_eq!( gix_glob::parse(br"a\ \ \ "), - pat("a ", Mode::NO_SUB_DIR, None), - "one can also escape every single one" - ); - assert_eq!( - gix_glob::parse(br"a \ "), - pat("a ", Mode::NO_SUB_DIR, None), - "or just the one in the middle, losing the last actual space" + pat(r"a\ \ \ ", Mode::NO_SUB_DIR, Some(1)), + "one can also escape every single space, but it's interpreted by the globbing engine" ); assert_eq!( gix_glob::parse(br"a \"), - pat("a ", Mode::NO_SUB_DIR, None), - "escaping nothing also works as a whitespace protection" + pat(r"a \", Mode::NO_SUB_DIR, Some(4)), + "escaping nothing also works" ); assert_eq!( gix_glob::parse(br"a \\\ "), - pat(r"a ", Mode::NO_SUB_DIR, None), + pat(r"a \\\ ", Mode::NO_SUB_DIR, Some(4)), "strange things like these work too" ); assert_eq!( gix_glob::parse(br"a \\ "), - pat(r"a ", Mode::NO_SUB_DIR, None), + pat(r"a \\ ", Mode::NO_SUB_DIR, Some(4)), "strange things like these work as well" ); } diff --git a/gix-glob/tests/wildmatch/mod.rs b/gix-glob/tests/wildmatch/mod.rs index 2e74dabf3f4..11fbd664b98 100644 --- a/gix-glob/tests/wildmatch/mod.rs +++ b/gix-glob/tests/wildmatch/mod.rs @@ -73,8 +73,8 @@ fn corpus() { (0,0,0,0, "]", "[!]-]"), (1,1,1,1, "a", "[!]-]"), (0,0,0,0, "", r"\"), - (0,0,1,1, r"XXX/\", r"*/\"), - (0,0,1,1, r"XXX/\", r"*/\\"), + (0,0,0,0, r"XXX/\", r"*/\"), + (1,1,1,1, r"XXX/\", r"*/\\"), (1,1,1,1, "foo", "foo"), (1,1,1,1, "@foo", "@foo"), (0,0,0,0, "foo", "@foo"), From 042154b9076dc44e4378339e16875208c8125213 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 9 Apr 2023 10:45:04 +0200 Subject: [PATCH 06/14] feat: add utilities for stack-based searching with pattern matching. --- Cargo.lock | 2 + gix-glob/Cargo.toml | 2 + gix-glob/src/lib.rs | 2 + gix-glob/src/pattern.rs | 5 -- gix-glob/src/search/mod.rs | 42 +++++++++ gix-glob/src/search/pattern.rs | 134 +++++++++++++++++++++++++++++ gix-glob/tests/glob.rs | 1 + gix-glob/tests/pattern/matching.rs | 2 +- gix-glob/tests/search/mod.rs | 1 + gix-glob/tests/search/pattern.rs | 39 +++++++++ 10 files changed, 224 insertions(+), 6 deletions(-) create mode 100644 gix-glob/src/search/mod.rs create mode 100644 gix-glob/src/search/pattern.rs create mode 100644 gix-glob/tests/search/mod.rs create mode 100644 gix-glob/tests/search/pattern.rs diff --git a/Cargo.lock b/Cargo.lock index 96dfae7aeb1..f144a4cd3b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1668,6 +1668,8 @@ dependencies = [ "bitflags 2.0.2", "bstr", "document-features", + "gix-features 0.28.1", + "gix-path 0.7.3", "gix-testtools", "serde", ] diff --git a/gix-glob/Cargo.toml b/gix-glob/Cargo.toml index f8f5add38fe..689a7b6aeaf 100644 --- a/gix-glob/Cargo.toml +++ b/gix-glob/Cargo.toml @@ -16,6 +16,8 @@ doctest = false serde1 = ["serde", "bstr/serde", "bitflags/serde"] [dependencies] +gix-path = { version = "^0.7.2", path = "../gix-path" } +gix-features = { version = "^0.28.0", path = "../gix-features" } bstr = { version = "1.3.0", default-features = false, features = ["std"]} bitflags = "2" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/gix-glob/src/lib.rs b/gix-glob/src/lib.rs index 48d011a52c5..fb732f7c89a 100644 --- a/gix-glob/src/lib.rs +++ b/gix-glob/src/lib.rs @@ -27,6 +27,8 @@ pub struct Pattern { /// pub mod pattern; +pub mod search; + /// pub mod wildmatch; pub use wildmatch::function::wildmatch; diff --git a/gix-glob/src/pattern.rs b/gix-glob/src/pattern.rs index 08022410a2c..c2708a351b8 100644 --- a/gix-glob/src/pattern.rs +++ b/gix-glob/src/pattern.rs @@ -71,11 +71,6 @@ impl Pattern { /// `basename_start_pos` is the index at which the `path`'s basename starts. /// /// Lastly, `case` folding can be configured as well. - /// - /// # Note - /// - /// This is specific to how exclude patterns match. - // TODO: then it should be in `gix-attributes` pub fn matches_repo_relative_path<'a>( &self, path: impl Into<&'a BStr>, diff --git a/gix-glob/src/search/mod.rs b/gix-glob/src/search/mod.rs new file mode 100644 index 00000000000..de0fde4f1af --- /dev/null +++ b/gix-glob/src/search/mod.rs @@ -0,0 +1,42 @@ +//! Utilities for searching matches of paths to patterns. +//! +//! Please note that these are specific to how both excludes and attributes are searched, and this is +//! merely a way to share code among them. +use std::path::{Path, PathBuf}; + +/// +pub mod pattern; + +/// A trait to convert bytes into patterns and their associated value. +/// +/// This is used for `gitattributes` which have a value, and `gitignore` which don't. +pub trait Pattern: Clone + PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Default { + /// The value associated with a pattern. + type Value: PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Clone; + + /// Parse all patterns in `bytes` line by line, ignoring lines with errors, and collect them. + fn bytes_to_patterns(bytes: &[u8], source: &Path) -> Vec>; + + /// Returns true if the given pattern may be used for matching. + fn may_use_glob_pattern(pattern: &crate::Pattern) -> bool; +} + +/// Add the given file at `source` if it exists, otherwise do nothing. +/// If a `root` is provided, it's not considered a global file anymore. +/// Returns true if the file was added, or false if it didn't exist. +pub fn add_patterns_file( + patterns: &mut Vec>, + source: impl Into, + follow_symlinks: bool, + root: Option<&Path>, + buf: &mut Vec, +) -> std::io::Result { + let previous_len = patterns.len(); + patterns.extend(pattern::List::::from_file( + source.into(), + root, + follow_symlinks, + buf, + )?); + Ok(patterns.len() != previous_len) +} diff --git a/gix-glob/src/search/pattern.rs b/gix-glob/src/search/pattern.rs new file mode 100644 index 00000000000..a661e6a167a --- /dev/null +++ b/gix-glob/src/search/pattern.rs @@ -0,0 +1,134 @@ +use crate::pattern::Case; +use crate::search::Pattern; +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use std::io::Read; +use std::path::{Path, PathBuf}; + +/// A list of patterns which optionally know where they were loaded from and what their base is. +/// +/// Knowing their base which is relative to a source directory, it will ignore all path to match against +/// that don't also start with said base. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct List { + /// Patterns and their associated data in the order they were loaded in or specified, + /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_). + /// + /// During matching, this order is reversed. + pub patterns: Vec>, + + /// The path from which the patterns were read, or `None` if the patterns + /// don't originate in a file on disk. + pub source: Option, + + /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root. + /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root. + pub base: Option, +} + +/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Mapping { + /// The pattern itself, like `/target/*` + pub pattern: crate::Pattern, + /// The value associated with the pattern. + pub value: T, + /// Typically the line number in the file the pattern was parsed from. + pub sequence_number: usize, +} + +fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec) -> std::io::Result { + buf.clear(); + let file = if follow_symlinks { + std::fs::File::open(path) + } else { + gix_features::fs::open_options_no_follow().read(true).open(path) + }; + Ok(match file { + Ok(mut file) => { + file.read_to_end(buf)?; + true + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => false, + Err(err) => return Err(err), + }) +} + +/// Instantiation +impl List +where + T: Pattern, +{ + /// `source` is the location of the `bytes` which represent a list of patterns line by line. + pub fn from_bytes(bytes: &[u8], source: impl Into, root: Option<&Path>) -> Self { + let source = source.into(); + let patterns = T::bytes_to_patterns(bytes, source.as_path()); + + let base = root + .and_then(|root| source.parent().expect("file").strip_prefix(root).ok()) + .and_then(|base| { + (!base.as_os_str().is_empty()).then(|| { + let mut base: BString = + gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned(); + + base.push_byte(b'/'); + base + }) + }); + List { + patterns, + source: Some(source), + base, + } + } + + /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally + /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file. + pub fn from_file( + source: impl Into, + root: Option<&Path>, + follow_symlinks: bool, + buf: &mut Vec, + ) -> std::io::Result> { + let source = source.into(); + Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root))) + } +} + +/// Utilities +impl List +where + T: Pattern, +{ + /// If this list is anchored to a base path, return `relative_path` as being relative to our base and return + /// an updated `basename_pos` as well if it was set. + /// `case` is respected for the comparison. + /// + /// This is useful to turn repository-relative paths into paths relative to a particular search base. + pub fn strip_base_handle_recompute_basename_pos<'a>( + &self, + relative_path: &'a BStr, + basename_pos: Option, + case: Case, + ) -> Option<(&'a BStr, Option)> { + match self.base.as_deref() { + Some(base) => ( + match case { + Case::Sensitive => relative_path.strip_prefix(base.as_slice())?.as_bstr(), + Case::Fold => { + let rela_dir = relative_path.get(..base.len())?; + if !rela_dir.eq_ignore_ascii_case(base) { + return None; + } + &relative_path[base.len()..] + } + }, + basename_pos.and_then(|pos| { + let pos = pos - base.len(); + (pos != 0).then_some(pos) + }), + ), + None => (relative_path, basename_pos), + } + .into() + } +} diff --git a/gix-glob/tests/glob.rs b/gix-glob/tests/glob.rs index 3a90f1d5101..256a74bc7db 100644 --- a/gix-glob/tests/glob.rs +++ b/gix-glob/tests/glob.rs @@ -1,3 +1,4 @@ mod parse; mod pattern; +mod search; mod wildmatch; diff --git a/gix-glob/tests/pattern/matching.rs b/gix-glob/tests/pattern/matching.rs index 3e757f8d6c4..8a5208d9a58 100644 --- a/gix-glob/tests/pattern/matching.rs +++ b/gix-glob/tests/pattern/matching.rs @@ -271,7 +271,7 @@ fn names_do_not_automatically_match_entire_directories() { #[test] fn directory_patterns_do_not_match_files_within_a_directory_as_well_like_slash_star_star() { - // this feature is implemented with the directory stack, which excludes entire directories + // this feature is implemented with the directory stack in `gix-ignore`, which excludes entire directories let pattern = &pat("dir/"); assert!(!match_path(pattern, "dir/file", None, Case::Sensitive)); assert!(!match_path(pattern, "base/dir/file", None, Case::Sensitive)); diff --git a/gix-glob/tests/search/mod.rs b/gix-glob/tests/search/mod.rs new file mode 100644 index 00000000000..e88c9a6052e --- /dev/null +++ b/gix-glob/tests/search/mod.rs @@ -0,0 +1 @@ +mod pattern; diff --git a/gix-glob/tests/search/pattern.rs b/gix-glob/tests/search/pattern.rs new file mode 100644 index 00000000000..6d89b9600b3 --- /dev/null +++ b/gix-glob/tests/search/pattern.rs @@ -0,0 +1,39 @@ +mod list { + use gix_glob::pattern::Case; + use gix_glob::search::pattern::{List, Mapping}; + use gix_glob::search::Pattern; + use std::path::Path; + + #[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Default)] + struct Dummy; + + impl Pattern for Dummy { + type Value = (); + + fn bytes_to_patterns(_bytes: &[u8], _source: &Path) -> Vec> { + vec![] + } + + fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool { + unreachable!("won't be called") + } + } + + #[test] + fn strip_base_handle_recompute_basename_pos() { + let list = List::::from_bytes(&[], "a/b/source", Some(Path::new(""))); + let res = list.strip_base_handle_recompute_basename_pos("a/b/file".into(), Some(4), Case::Sensitive); + assert_eq!( + res, + Some(("file".into(), None)), + "files don't have a basename position anymore" + ); + + let res = list.strip_base_handle_recompute_basename_pos("a/B/c/File".into(), Some(6), Case::Fold); + assert_eq!( + res, + Some(("c/File".into(), Some(2))), + "otherwise the basename is recomputed, case folding is effective" + ); + } +} From ef8f405d06adf100bbe5f0b1fccbe8bb2c6fd650 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 13 Apr 2023 21:47:46 +0200 Subject: [PATCH 07/14] A new create for filesystem specific utilities --- Cargo.lock | 7 ++ Cargo.toml | 1 + README.md | 1 + crate-status.md | 7 +- gix-utils/CHANGELOG.md | 6 ++ gix-utils/Cargo.toml | 15 ++++ gix-utils/src/fs_capabilities.rs | 122 +++++++++++++++++++++++++++++ gix-utils/src/lib.rs | 32 ++++++++ gix-utils/src/snapshot.rs | 127 +++++++++++++++++++++++++++++++ gix-utils/src/symlink.rs | 54 +++++++++++++ 10 files changed, 371 insertions(+), 1 deletion(-) create mode 100644 gix-utils/CHANGELOG.md create mode 100644 gix-utils/Cargo.toml create mode 100644 gix-utils/src/fs_capabilities.rs create mode 100644 gix-utils/src/lib.rs create mode 100644 gix-utils/src/snapshot.rs create mode 100644 gix-utils/src/symlink.rs diff --git a/Cargo.lock b/Cargo.lock index f144a4cd3b8..a347266280a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2307,6 +2307,13 @@ dependencies = [ "url", ] +[[package]] +name = "gix-utils" +version = "0.1.0" +dependencies = [ + "gix-features 0.28.1", +] + [[package]] name = "gix-validate" version = "0.7.3" diff --git a/Cargo.toml b/Cargo.toml index 607c7eb8da5..fa9d1c79b58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -176,6 +176,7 @@ members = [ "gix-pathspec", "gix-refspec", "gix-path", + "gix-utils", "gix", "gitoxide-core", "gix-hashtable", diff --git a/README.md b/README.md index fc9d361efb6..9097547e92c 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ is usable to some extent. * [gix-refspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-refspec) * `gitoxide-core` * **very early** _(possibly without any documentation and many rough edges)_ + * [gix-utils](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-utils) * [gix-worktree](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree) * [gix-bitmap](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-bitmap) * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date) diff --git a/crate-status.md b/crate-status.md index 694e4be9598..6a6e2c1e8fa 100644 --- a/crate-status.md +++ b/crate-status.md @@ -101,10 +101,15 @@ and itself relies on all `git-*` crates. It's not meant for consumption, for app * [x] write the table of contents ### gix-hashtable - * [x] hashmap * [x] hashset +### gix-utils + +* **filesystem** + * [x] probe capabilities + * [x] symlink creation and removal + * [x] file snapshots ### gix-object * *decode (zero-copy)* borrowed objects diff --git a/gix-utils/CHANGELOG.md b/gix-utils/CHANGELOG.md new file mode 100644 index 00000000000..1d013ff92fa --- /dev/null +++ b/gix-utils/CHANGELOG.md @@ -0,0 +1,6 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). diff --git a/gix-utils/Cargo.toml b/gix-utils/Cargo.toml new file mode 100644 index 00000000000..fecb89dbc04 --- /dev/null +++ b/gix-utils/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "gix-utils" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A crate with `gitoxide` utilities that don't need feature toggles" +authors = ["Sebastian Thiel "] +edition = "2021" +rust-version = "1.64" + +[lib] +doctest = false + +[dependencies] +gix-features = { path = "../gix-features" } diff --git a/gix-utils/src/fs_capabilities.rs b/gix-utils/src/fs_capabilities.rs new file mode 100644 index 00000000000..0c7d2b6bfd7 --- /dev/null +++ b/gix-utils/src/fs_capabilities.rs @@ -0,0 +1,122 @@ +// TODO: tests +use crate::FilesystemCapabilities; +use std::path::Path; + +#[cfg(windows)] +impl Default for FilesystemCapabilities { + fn default() -> Self { + FilesystemCapabilities { + precompose_unicode: false, + ignore_case: true, + executable_bit: false, + symlink: false, + } + } +} + +#[cfg(target_os = "macos")] +impl Default for FilesystemCapabilities { + fn default() -> Self { + FilesystemCapabilities { + precompose_unicode: true, + ignore_case: true, + executable_bit: true, + symlink: true, + } + } +} + +#[cfg(all(unix, not(target_os = "macos")))] +impl Default for FilesystemCapabilities { + fn default() -> Self { + FilesystemCapabilities { + precompose_unicode: false, + ignore_case: false, + executable_bit: true, + symlink: true, + } + } +} + +impl FilesystemCapabilities { + /// try to determine all values in this context by probing them in the given `git_dir`, which + /// should be on the file system the git repository is located on. + /// `git_dir` is a typical git repository, expected to be populated with the typical files like `config`. + /// + /// All errors are ignored and interpreted on top of the default for the platform the binary is compiled for. + pub fn probe(git_dir: impl AsRef) -> Self { + let root = git_dir.as_ref(); + let ctx = FilesystemCapabilities::default(); + FilesystemCapabilities { + symlink: Self::probe_symlink(root).unwrap_or(ctx.symlink), + ignore_case: Self::probe_ignore_case(root).unwrap_or(ctx.ignore_case), + precompose_unicode: Self::probe_precompose_unicode(root).unwrap_or(ctx.precompose_unicode), + executable_bit: Self::probe_file_mode(root).unwrap_or(ctx.executable_bit), + } + } + + #[cfg(unix)] + fn probe_file_mode(root: &Path) -> std::io::Result { + use std::os::unix::fs::{MetadataExt, OpenOptionsExt}; + + // test it exactly as we typically create executable files, not using chmod. + let test_path = root.join("_test_executable_bit"); + let res = std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .mode(0o777) + .open(&test_path) + .and_then(|f| f.metadata().map(|m| m.mode() & 0o100 == 0o100)); + std::fs::remove_file(test_path)?; + res + } + + #[cfg(not(unix))] + fn probe_file_mode(_root: &Path) -> std::io::Result { + Ok(false) + } + + fn probe_ignore_case(git_dir: &Path) -> std::io::Result { + std::fs::metadata(git_dir.join("cOnFiG")).map(|_| true).or_else(|err| { + if err.kind() == std::io::ErrorKind::NotFound { + Ok(false) + } else { + Err(err) + } + }) + } + + fn probe_precompose_unicode(root: &Path) -> std::io::Result { + let precomposed = "ä"; + let decomposed = "a\u{308}"; + + let precomposed = root.join(precomposed); + std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&precomposed)?; + let res = root.join(decomposed).symlink_metadata().map(|_| true); + std::fs::remove_file(precomposed)?; + res + } + + fn probe_symlink(root: &Path) -> std::io::Result { + let src_path = root.join("__link_src_file"); + std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&src_path)?; + let link_path = root.join("__file_link"); + if crate::symlink::create(&src_path, &link_path).is_err() { + std::fs::remove_file(&src_path)?; + return Ok(false); + } + + let res = std::fs::symlink_metadata(&link_path).map(|m| m.file_type().is_symlink()); + + let cleanup = crate::symlink::remove(&link_path).or_else(|_| std::fs::remove_file(&link_path)); + std::fs::remove_file(&src_path).and(cleanup)?; + + res + } +} diff --git a/gix-utils/src/lib.rs b/gix-utils/src/lib.rs new file mode 100644 index 00000000000..2bd9cf80cf8 --- /dev/null +++ b/gix-utils/src/lib.rs @@ -0,0 +1,32 @@ +//! A crate with utilities that don't need feature toggles. +//! +//! If they would need feature toggles, they should be in `gix-features` instead. +#![deny(rust_2018_idioms)] +#![forbid(unsafe_code)] + +/// Common knowledge about the worktree that is needed across most interactions with the work tree +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +pub struct FilesystemCapabilities { + /// If true, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that + /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally + /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and + /// perform conversions accordingly. + /// If false, no conversions will be performed. + pub precompose_unicode: bool, + /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`. + /// This is also called case-folding. + pub ignore_case: bool, + /// If true, we assume the executable bit is honored as part of the files mode. If false, we assume the file system + /// ignores the executable bit, hence it will be reported as 'off' even though we just tried to set it to be on. + pub executable_bit: bool, + /// If true, the file system supports symbolic links and we should try to create them. Otherwise symbolic links will be checked + /// out as files which contain the link as text. + pub symlink: bool, +} + +mod snapshot; +pub use snapshot::{FileSnapshot, SharedFileSnapshot, SharedFileSnapshotMut}; + +mod fs_capabilities; +pub mod symlink; diff --git a/gix-utils/src/snapshot.rs b/gix-utils/src/snapshot.rs new file mode 100644 index 00000000000..02a0ec843c0 --- /dev/null +++ b/gix-utils/src/snapshot.rs @@ -0,0 +1,127 @@ +// TODO: tests +use std::ops::Deref; + +use gix_features::threading::{get_mut, get_ref, MutableOnDemand, OwnShared}; + +/// A structure holding enough information to reload a value if its on-disk representation changes as determined by its modified time. +#[derive(Debug)] +pub struct FileSnapshot { + value: T, + modified: std::time::SystemTime, +} + +impl Clone for FileSnapshot { + fn clone(&self) -> Self { + Self { + value: self.value.clone(), + modified: self.modified, + } + } +} + +/// A snapshot of a resource which is up-to-date in the moment it is retrieved. +pub type SharedFileSnapshot = OwnShared>; + +/// Use this type for fields in structs that are to store the [`FileSnapshot`], typically behind an [`OwnShared`]. +/// +/// Note that the resource itself is behind another [`OwnShared`] to allow it to be used without holding any kind of lock, hence +/// without blocking updates while it is used. +#[derive(Debug, Default)] +pub struct SharedFileSnapshotMut(pub MutableOnDemand>>); + +impl Deref for FileSnapshot { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl Deref for SharedFileSnapshotMut { + type Target = MutableOnDemand>>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl SharedFileSnapshotMut { + /// Create a new instance of this type. + /// + /// Useful in case `Default::default()` isn't working for some reason. + pub fn new() -> Self { + SharedFileSnapshotMut(MutableOnDemand::new(None)) + } + + /// Refresh `state` forcefully by re-`open`ing the resource. Note that `open()` returns `None` if the resource isn't + /// present on disk, and that it's critical that the modified time is obtained _before_ opening the resource. + pub fn force_refresh( + &self, + open: impl FnOnce() -> Result, E>, + ) -> Result<(), E> { + let mut state = get_mut(&self.0); + *state = open()?.map(|(modified, value)| OwnShared::new(FileSnapshot { value, modified })); + Ok(()) + } + + /// Assure that the resource in `state` is up-to-date by comparing the `current_modification_time` with the one we know in `state` + /// and by acting accordingly. + /// Returns the potentially updated/reloaded resource if it is still present on disk, which then represents a snapshot that is up-to-date + /// in that very moment, or `None` if the underlying file doesn't exist. + /// + /// Note that even though this is racy, each time a request is made there is a chance to see the actual state. + pub fn recent_snapshot( + &self, + mut current_modification_time: impl FnMut() -> Option, + open: impl FnOnce() -> Result, E>, + ) -> Result>, E> { + let state = get_ref(self); + let recent_modification = current_modification_time(); + let buffer = match (&*state, recent_modification) { + (None, None) => (*state).clone(), + (Some(_), None) => { + drop(state); + let mut state = get_mut(self); + *state = None; + (*state).clone() + } + (Some(snapshot), Some(modified_time)) => { + if snapshot.modified < modified_time { + drop(state); + let mut state = get_mut(self); + + if let (Some(_snapshot), Some(modified_time)) = (&*state, current_modification_time()) { + *state = open()?.map(|value| { + OwnShared::new(FileSnapshot { + value, + modified: modified_time, + }) + }); + } + + (*state).clone() + } else { + // Note that this relies on sub-section precision or else is a race when the packed file was just changed. + // It's nothing we can know though, so… up to the caller unfortunately. + Some(snapshot.clone()) + } + } + (None, Some(_modified_time)) => { + drop(state); + let mut state = get_mut(self); + // Still in the same situation? If so, load the buffer. This compensates for the trampling herd + // during lazy-loading at the expense of another mtime check. + if let (None, Some(modified_time)) = (&*state, current_modification_time()) { + *state = open()?.map(|value| { + OwnShared::new(FileSnapshot { + value, + modified: modified_time, + }) + }); + } + (*state).clone() + } + }; + Ok(buffer) + } +} diff --git a/gix-utils/src/symlink.rs b/gix-utils/src/symlink.rs new file mode 100644 index 00000000000..d8590823bfc --- /dev/null +++ b/gix-utils/src/symlink.rs @@ -0,0 +1,54 @@ +use std::{io, path::Path}; + +#[cfg(not(windows))] +pub fn create(original: &Path, link: &Path) -> io::Result<()> { + std::os::unix::fs::symlink(original, link) +} + +#[cfg(not(windows))] +pub fn remove(path: &Path) -> io::Result<()> { + std::fs::remove_file(path) +} + +// TODO: use the `symlink` crate once it can delete directory symlinks +#[cfg(windows)] +pub fn remove(path: &Path) -> io::Result<()> { + if let Ok(meta) = std::fs::metadata(path) { + if meta.is_file() { + std::fs::remove_file(path) // this removes the link itself + } else { + std::fs::remove_dir(path) // however, this sees the destination directory, which isn't the right thing actually + } + } else { + std::fs::remove_file(path).or_else(|_| std::fs::remove_dir(path)) + } +} + +#[cfg(windows)] +pub fn create(original: &Path, link: &Path) -> io::Result<()> { + use std::os::windows::fs::{symlink_dir, symlink_file}; + // TODO: figure out if links to links count as files or whatever they point at + if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { + symlink_dir(original, link) + } else { + symlink_file(original, link) + } +} + +pub mod error { + use std::io::ErrorKind::AlreadyExists; + + #[cfg(not(windows))] + pub fn indicates_collision(err: &std::io::Error) -> bool { + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error(), and ::FileSystemLoop respectively + err.kind() == AlreadyExists + || err.raw_os_error() == Some(21) + || err.raw_os_error() == Some(62) // no-follow on symlnk on mac-os + || err.raw_os_error() == Some(40) // no-follow on symlnk on ubuntu + } + + #[cfg(windows)] + pub fn indicates_collision(err: &std::io::Error) -> bool { + err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied + } +} From b645d28f9641c6b4022e1e37ad9fe528922ec747 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 13 Apr 2023 22:05:31 +0200 Subject: [PATCH 08/14] feat!: remove types that are now available in `gix-os` --- Cargo.lock | 4 + gix-features/src/fs.rs | 130 ------------------ gix-ref/Cargo.toml | 2 + gix-ref/src/store/file/loose/mod.rs | 4 +- gix-ref/src/store/file/packed.rs | 4 +- gix-ref/tests/Cargo.toml | 1 + .../create_or_update/collisions.rs | 2 +- gix-worktree/Cargo.toml | 1 + gix-worktree/src/fs/cache/platform.rs | 2 +- gix-worktree/src/fs/capabilities.rs | 122 ---------------- gix-worktree/src/fs/mod.rs | 23 ---- gix-worktree/src/index/checkout.rs | 3 +- gix-worktree/src/index/entry.rs | 13 +- gix-worktree/src/index/mod.rs | 3 +- gix-worktree/src/lib.rs | 2 - gix-worktree/src/os.rs | 50 ------- gix-worktree/tests/worktree/fs/mod.rs | 2 +- gix-worktree/tests/worktree/index/checkout.rs | 7 +- gix/Cargo.toml | 1 + gix/src/config/cache/access.rs | 2 +- gix/src/create.rs | 4 +- gix/src/lib.rs | 1 + gix/src/open/repository.rs | 4 +- gix/src/shallow.rs | 4 +- gix/src/worktree/mod.rs | 4 +- 25 files changed, 41 insertions(+), 354 deletions(-) delete mode 100644 gix-worktree/src/fs/capabilities.rs delete mode 100644 gix-worktree/src/os.rs diff --git a/Cargo.lock b/Cargo.lock index a347266280a..636fdbe13e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1331,6 +1331,7 @@ dependencies = [ "gix-transport", "gix-traverse 0.24.0", "gix-url", + "gix-utils", "gix-validate 0.7.4", "gix-worktree 0.15.2", "is_ci", @@ -2073,6 +2074,7 @@ dependencies = [ "gix-path 0.7.3", "gix-tempfile 5.0.2", "gix-testtools", + "gix-utils", "gix-validate 0.7.4", "memmap2", "nom", @@ -2094,6 +2096,7 @@ dependencies = [ "gix-odb", "gix-ref 0.27.2", "gix-testtools", + "gix-utils", "gix-validate 0.7.4", "gix-worktree 0.15.2", "tempfile", @@ -2366,6 +2369,7 @@ dependencies = [ "gix-odb", "gix-path 0.7.3", "gix-testtools", + "gix-utils", "io-close", "serde", "symlink", diff --git a/gix-features/src/fs.rs b/gix-features/src/fs.rs index f65779b926a..f07ac1f0f48 100644 --- a/gix-features/src/fs.rs +++ b/gix-features/src/fs.rs @@ -114,133 +114,3 @@ pub fn open_options_no_follow() -> std::fs::OpenOptions { } options } - -mod snapshot { - use std::ops::Deref; - - use crate::threading::{get_mut, get_ref, MutableOnDemand, OwnShared}; - - /// A structure holding enough information to reload a value if its on-disk representation changes as determined by its modified time. - #[derive(Debug)] - pub struct Snapshot { - value: T, - modified: std::time::SystemTime, - } - - impl Clone for Snapshot { - fn clone(&self) -> Self { - Self { - value: self.value.clone(), - modified: self.modified, - } - } - } - - /// A snapshot of a resource which is up-to-date in the moment it is retrieved. - pub type SharedSnapshot = OwnShared>; - - /// Use this type for fields in structs that are to store the [`Snapshot`], typically behind an [`OwnShared`]. - /// - /// Note that the resource itself is behind another [`OwnShared`] to allow it to be used without holding any kind of lock, hence - /// without blocking updates while it is used. - #[derive(Debug, Default)] - pub struct MutableSnapshot(pub MutableOnDemand>>); - - impl Deref for Snapshot { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.value - } - } - - impl Deref for MutableSnapshot { - type Target = MutableOnDemand>>; - - fn deref(&self) -> &Self::Target { - &self.0 - } - } - - impl MutableSnapshot { - /// Create a new instance of this type. - /// - /// Useful in case `Default::default()` isn't working for some reason. - pub fn new() -> Self { - MutableSnapshot(MutableOnDemand::new(None)) - } - - /// Refresh `state` forcefully by re-`open`ing the resource. Note that `open()` returns `None` if the resource isn't - /// present on disk, and that it's critical that the modified time is obtained _before_ opening the resource. - pub fn force_refresh( - &self, - open: impl FnOnce() -> Result, E>, - ) -> Result<(), E> { - let mut state = get_mut(&self.0); - *state = open()?.map(|(modified, value)| OwnShared::new(Snapshot { value, modified })); - Ok(()) - } - - /// Assure that the resource in `state` is up-to-date by comparing the `current_modification_time` with the one we know in `state` - /// and by acting accordingly. - /// Returns the potentially updated/reloaded resource if it is still present on disk, which then represents a snapshot that is up-to-date - /// in that very moment, or `None` if the underlying file doesn't exist. - /// - /// Note that even though this is racy, each time a request is made there is a chance to see the actual state. - pub fn recent_snapshot( - &self, - mut current_modification_time: impl FnMut() -> Option, - open: impl FnOnce() -> Result, E>, - ) -> Result>, E> { - let state = get_ref(self); - let recent_modification = current_modification_time(); - let buffer = match (&*state, recent_modification) { - (None, None) => (*state).clone(), - (Some(_), None) => { - drop(state); - let mut state = get_mut(self); - *state = None; - (*state).clone() - } - (Some(snapshot), Some(modified_time)) => { - if snapshot.modified < modified_time { - drop(state); - let mut state = get_mut(self); - - if let (Some(_snapshot), Some(modified_time)) = (&*state, current_modification_time()) { - *state = open()?.map(|value| { - OwnShared::new(Snapshot { - value, - modified: modified_time, - }) - }); - } - - (*state).clone() - } else { - // Note that this relies on sub-section precision or else is a race when the packed file was just changed. - // It's nothing we can know though, so… up to the caller unfortunately. - Some(snapshot.clone()) - } - } - (None, Some(_modified_time)) => { - drop(state); - let mut state = get_mut(self); - // Still in the same situation? If so, load the buffer. This compensates for the trampling herd - // during lazy-loading at the expense of another mtime check. - if let (None, Some(modified_time)) = (&*state, current_modification_time()) { - *state = open()?.map(|value| { - OwnShared::new(Snapshot { - value, - modified: modified_time, - }) - }); - } - (*state).clone() - } - }; - Ok(buffer) - } - } -} -pub use snapshot::{MutableSnapshot, SharedSnapshot, Snapshot}; diff --git a/gix-ref/Cargo.toml b/gix-ref/Cargo.toml index 35d1add57d5..24015a7de3d 100644 --- a/gix-ref/Cargo.toml +++ b/gix-ref/Cargo.toml @@ -20,6 +20,7 @@ serde1 = ["serde", "gix-hash/serde1", "gix-actor/serde1", "gix-object/serde1"] [dependencies] gix-features = { version = "^0.28.1", path = "../gix-features", features = ["walkdir"]} +gix-utils = { version = "0.1.0", path = "../gix-utils" } gix-path = { version = "^0.7.2", path = "../gix-path" } gix-hash = { version = "^0.10.4", path = "../gix-hash" } gix-object = { version = "^0.28.0", path = "../gix-object" } @@ -39,6 +40,7 @@ document-features = { version = "0.2.1", optional = true } [dev-dependencies] gix-testtools = { path = "../tests/tools" } +gix-utils = { path = "../gix-utils" } tempfile = "3.2.0" diff --git a/gix-ref/src/store/file/loose/mod.rs b/gix-ref/src/store/file/loose/mod.rs index 230641509b0..20e64ea4a09 100644 --- a/gix-ref/src/store/file/loose/mod.rs +++ b/gix-ref/src/store/file/loose/mod.rs @@ -39,7 +39,7 @@ mod init { common_dir: None, write_reflog, namespace: None, - packed: gix_features::fs::MutableSnapshot::new().into(), + packed: gix_utils::SharedFileSnapshotMut::new().into(), object_hash, } } @@ -57,7 +57,7 @@ mod init { common_dir: Some(common_dir.into()), write_reflog, namespace: None, - packed: gix_features::fs::MutableSnapshot::new().into(), + packed: gix_utils::SharedFileSnapshotMut::new().into(), object_hash, } } diff --git a/gix-ref/src/store/file/packed.rs b/gix-ref/src/store/file/packed.rs index 271ec7f5ae8..83bfbf5a9c6 100644 --- a/gix-ref/src/store/file/packed.rs +++ b/gix-ref/src/store/file/packed.rs @@ -69,14 +69,14 @@ pub mod transaction { } /// An up-to-date snapshot of the packed refs buffer. -pub type SharedBufferSnapshot = gix_features::fs::SharedSnapshot; +pub type SharedBufferSnapshot = gix_utils::SharedFileSnapshot; pub(crate) mod modifiable { use gix_features::threading::OwnShared; use crate::{file, packed}; - pub(crate) type MutableSharedBuffer = OwnShared>; + pub(crate) type MutableSharedBuffer = OwnShared>; impl file::Store { pub(crate) fn force_refresh_packed_buffer(&self) -> Result<(), packed::buffer::open::Error> { diff --git a/gix-ref/tests/Cargo.toml b/gix-ref/tests/Cargo.toml index 92be378b223..f60818a363f 100644 --- a/gix-ref/tests/Cargo.toml +++ b/gix-ref/tests/Cargo.toml @@ -27,6 +27,7 @@ required-features = ["internal-testing-gix-features-parallel"] [dev-dependencies] gix-ref = { path = ".." } +gix-utils = { path = "../../gix-utils" } gix-features = { path = "../../gix-features", features = ["walkdir"]} gix-testtools = { path = "../../tests/tools" } gix-discover = { path = "../../gix-discover" } diff --git a/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/collisions.rs b/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/collisions.rs index 1d9635684de..be88f19fa61 100644 --- a/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/collisions.rs +++ b/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/collisions.rs @@ -14,7 +14,7 @@ use crate::{ fn case_sensitive(tmp_dir: &std::path::Path) -> bool { std::fs::write(tmp_dir.join("config"), "").expect("can create file once"); - !gix_worktree::fs::Capabilities::probe(tmp_dir).ignore_case + !gix_utils::FilesystemCapabilities::probe(tmp_dir).ignore_case } #[test] diff --git a/gix-worktree/Cargo.toml b/gix-worktree/Cargo.toml index 023a6e6f8b1..c5f0446dc81 100644 --- a/gix-worktree/Cargo.toml +++ b/gix-worktree/Cargo.toml @@ -31,6 +31,7 @@ internal-testing-to-avoid-being-run-by-cargo-test-all = [] [dependencies] gix-index = { version = "^0.15.1", path = "../gix-index" } +gix-utils = { version = "^0.1.0", path = "../gix-utils" } gix-hash = { version = "^0.10.4", path = "../gix-hash" } gix-object = { version = "^0.28.0", path = "../gix-object" } gix-glob = { version = "^0.5.5", path = "../gix-glob" } diff --git a/gix-worktree/src/fs/cache/platform.rs b/gix-worktree/src/fs/cache/platform.rs index 90bbdbe3cee..0b377acec6a 100644 --- a/gix-worktree/src/fs/cache/platform.rs +++ b/gix-worktree/src/fs/cache/platform.rs @@ -153,7 +153,7 @@ fn create_leading_directory( Ok(()) } else if unlink_on_collision { if meta.file_type().is_symlink() { - crate::os::remove_symlink(stack.current())?; + gix_utils::symlink::remove(stack.current())?; } else { std::fs::remove_file(stack.current())?; } diff --git a/gix-worktree/src/fs/capabilities.rs b/gix-worktree/src/fs/capabilities.rs deleted file mode 100644 index 64daab9cea3..00000000000 --- a/gix-worktree/src/fs/capabilities.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::path::Path; - -use crate::fs::Capabilities; - -#[cfg(windows)] -impl Default for Capabilities { - fn default() -> Self { - Capabilities { - precompose_unicode: false, - ignore_case: true, - executable_bit: false, - symlink: false, - } - } -} - -#[cfg(target_os = "macos")] -impl Default for Capabilities { - fn default() -> Self { - Capabilities { - precompose_unicode: true, - ignore_case: true, - executable_bit: true, - symlink: true, - } - } -} - -#[cfg(all(unix, not(target_os = "macos")))] -impl Default for Capabilities { - fn default() -> Self { - Capabilities { - precompose_unicode: false, - ignore_case: false, - executable_bit: true, - symlink: true, - } - } -} - -impl Capabilities { - /// try to determine all values in this context by probing them in the given `git_dir`, which - /// should be on the file system the git repository is located on. - /// `git_dir` is a typical git repository, expected to be populated with the typical files like `config`. - /// - /// All errors are ignored and interpreted on top of the default for the platform the binary is compiled for. - pub fn probe(git_dir: impl AsRef) -> Self { - let root = git_dir.as_ref(); - let ctx = Capabilities::default(); - Capabilities { - symlink: Self::probe_symlink(root).unwrap_or(ctx.symlink), - ignore_case: Self::probe_ignore_case(root).unwrap_or(ctx.ignore_case), - precompose_unicode: Self::probe_precompose_unicode(root).unwrap_or(ctx.precompose_unicode), - executable_bit: Self::probe_file_mode(root).unwrap_or(ctx.executable_bit), - } - } - - #[cfg(unix)] - fn probe_file_mode(root: &Path) -> std::io::Result { - use std::os::unix::fs::{MetadataExt, OpenOptionsExt}; - - // test it exactly as we typically create executable files, not using chmod. - let test_path = root.join("_test_executable_bit"); - let res = std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .mode(0o777) - .open(&test_path) - .and_then(|f| f.metadata().map(|m| m.mode() & 0o100 == 0o100)); - std::fs::remove_file(test_path)?; - res - } - - #[cfg(not(unix))] - fn probe_file_mode(_root: &Path) -> std::io::Result { - Ok(false) - } - - fn probe_ignore_case(git_dir: &Path) -> std::io::Result { - std::fs::metadata(git_dir.join("cOnFiG")).map(|_| true).or_else(|err| { - if err.kind() == std::io::ErrorKind::NotFound { - Ok(false) - } else { - Err(err) - } - }) - } - - fn probe_precompose_unicode(root: &Path) -> std::io::Result { - let precomposed = "ä"; - let decomposed = "a\u{308}"; - - let precomposed = root.join(precomposed); - std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&precomposed)?; - let res = root.join(decomposed).symlink_metadata().map(|_| true); - std::fs::remove_file(precomposed)?; - res - } - - fn probe_symlink(root: &Path) -> std::io::Result { - let src_path = root.join("__link_src_file"); - std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&src_path)?; - let link_path = root.join("__file_link"); - if crate::os::create_symlink(&src_path, &link_path).is_err() { - std::fs::remove_file(&src_path)?; - return Ok(false); - } - - let res = std::fs::symlink_metadata(&link_path).map(|m| m.file_type().is_symlink()); - - let cleanup = crate::os::remove_symlink(&link_path).or_else(|_| std::fs::remove_file(&link_path)); - std::fs::remove_file(&src_path).and(cleanup)?; - - res - } -} diff --git a/gix-worktree/src/fs/mod.rs b/gix-worktree/src/fs/mod.rs index d20faacb3f1..2f243a37267 100644 --- a/gix-worktree/src/fs/mod.rs +++ b/gix-worktree/src/fs/mod.rs @@ -2,27 +2,6 @@ use std::path::PathBuf; use bstr::BString; -/// Common knowledge about the worktree that is needed across most interactions with the work tree -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] -pub struct Capabilities { - /// If true, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that - /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally - /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and - /// perform conversions accordingly. - /// If false, no conversions will be performed. - pub precompose_unicode: bool, - /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`. - /// This is also called case-folding. - pub ignore_case: bool, - /// If true, we assume the executable bit is honored as part of the files mode. If false, we assume the file system - /// ignores the executable bit, hence it will be reported as 'off' even though we just tried to set it to be on. - pub executable_bit: bool, - /// If true, the file system supports symbolic links and we should try to create them. Otherwise symbolic links will be checked - /// out as files which contain the link as text. - pub symlink: bool, -} - /// A stack of path components with the delegation of side-effects as the currently set path changes, component by component. #[derive(Clone)] pub struct Stack { @@ -77,5 +56,3 @@ pub(crate) type PathOidMapping = (BString, gix_hash::ObjectId); pub mod cache; /// pub mod stack; - -mod capabilities; diff --git a/gix-worktree/src/index/checkout.rs b/gix-worktree/src/index/checkout.rs index 6bc46537562..bd7bbfddf60 100644 --- a/gix-worktree/src/index/checkout.rs +++ b/gix-worktree/src/index/checkout.rs @@ -1,6 +1,7 @@ #![allow(missing_docs)] use bstr::BString; use gix_attributes::Attributes; +use gix_utils::FilesystemCapabilities; #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Collision { @@ -30,7 +31,7 @@ pub struct Outcome { #[derive(Clone)] pub struct Options { /// capabilities of the file system - pub fs: crate::fs::Capabilities, + pub fs: FilesystemCapabilities, /// If set, don't use more than this amount of threads. /// Otherwise, usually use as many threads as there are logical cores. /// A value of 0 is interpreted as no-limit diff --git a/gix-worktree/src/index/entry.rs b/gix-worktree/src/index/entry.rs index 32628c4e0e3..9541786b70d 100644 --- a/gix-worktree/src/index/entry.rs +++ b/gix-worktree/src/index/entry.rs @@ -3,9 +3,10 @@ use std::{convert::TryInto, fs::OpenOptions, io::Write, path::Path, time::Durati use bstr::BStr; use gix_hash::oid; use gix_index::Entry; +use gix_utils::FilesystemCapabilities; use io_close::Close; -use crate::{fs, index, os}; +use crate::{fs, index}; pub struct Context<'a, Find> { pub find: &'a mut Find, @@ -19,7 +20,7 @@ pub fn checkout( entry_path: &BStr, Context { find, path_cache, buf }: Context<'_, Find>, index::checkout::Options { - fs: fs::Capabilities { + fs: FilesystemCapabilities { symlink, executable_bit, .. @@ -85,7 +86,9 @@ where .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; if symlink { - try_write_or_unlink(dest, overwrite_existing, |p| os::create_symlink(symlink_destination, p))?; + try_write_or_unlink(dest, overwrite_existing, |p| { + gix_utils::symlink::create(symlink_destination, p) + })?; } else { let mut file = try_write_or_unlink(dest, overwrite_existing, |p| { open_options(p, destination_is_initially_empty, overwrite_existing).open(dest) @@ -115,7 +118,7 @@ fn try_write_or_unlink( if overwrite_existing { match op(path) { Ok(res) => Ok(res), - Err(err) if os::indicates_collision(&err) => { + Err(err) if gix_utils::symlink::error::indicates_collision(&err) => { try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; op(path) } @@ -130,7 +133,7 @@ fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> st if path_meta.is_dir() { std::fs::remove_dir_all(path) } else if path_meta.file_type().is_symlink() { - os::remove_symlink(path) + gix_utils::symlink::remove(path) } else { std::fs::remove_file(path) } diff --git a/gix-worktree/src/index/mod.rs b/gix-worktree/src/index/mod.rs index 684d1cae9e4..2703ebfce80 100644 --- a/gix-worktree/src/index/mod.rs +++ b/gix-worktree/src/index/mod.rs @@ -130,7 +130,6 @@ mod chunk { use crate::{ fs, index, index::{checkout, entry}, - os, }; mod reduce { @@ -285,7 +284,7 @@ mod chunk { bytes.inc_by(object_size); Ok(object_size) } - Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { + Err(index::checkout::Error::Io(err)) if gix_utils::symlink::error::indicates_collision(&err) => { // We are here because a file existed or was blocked by a directory which shouldn't be possible unless // we are on a file insensitive file system. files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 9a67e0289ee..43c22feea0e 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -11,5 +11,3 @@ pub mod fs; /// pub mod index; - -pub(crate) mod os; diff --git a/gix-worktree/src/os.rs b/gix-worktree/src/os.rs deleted file mode 100644 index a297e73cd87..00000000000 --- a/gix-worktree/src/os.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::{io, io::ErrorKind::AlreadyExists, path::Path}; - -#[cfg(not(windows))] -pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { - std::os::unix::fs::symlink(original, link) -} - -#[cfg(not(windows))] -pub fn remove_symlink(path: &Path) -> io::Result<()> { - std::fs::remove_file(path) -} - -// TODO: use the `symlink` crate once it can delete directory symlinks -#[cfg(windows)] -pub fn remove_symlink(path: &Path) -> io::Result<()> { - if let Ok(meta) = std::fs::metadata(path) { - if meta.is_file() { - std::fs::remove_file(path) // this removes the link itself - } else { - std::fs::remove_dir(path) // however, this sees the destination directory, which isn't the right thing actually - } - } else { - std::fs::remove_file(path).or_else(|_| std::fs::remove_dir(path)) - } -} - -#[cfg(windows)] -pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { - use std::os::windows::fs::{symlink_dir, symlink_file}; - // TODO: figure out if links to links count as files or whatever they point at - if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { - symlink_dir(original, link) - } else { - symlink_file(original, link) - } -} - -#[cfg(not(windows))] -pub fn indicates_collision(err: &std::io::Error) -> bool { - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error(), and ::FileSystemLoop respectively - err.kind() == AlreadyExists - || err.raw_os_error() == Some(21) - || err.raw_os_error() == Some(62) // no-follow on symlnk on mac-os - || err.raw_os_error() == Some(40) // no-follow on symlnk on ubuntu -} - -#[cfg(windows)] -pub fn indicates_collision(err: &std::io::Error) -> bool { - err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied -} diff --git a/gix-worktree/tests/worktree/fs/mod.rs b/gix-worktree/tests/worktree/fs/mod.rs index 435e245d1a7..50f09e14013 100644 --- a/gix-worktree/tests/worktree/fs/mod.rs +++ b/gix-worktree/tests/worktree/fs/mod.rs @@ -2,7 +2,7 @@ fn from_probing_cwd() { let dir = tempfile::tempdir().unwrap(); std::fs::File::create(dir.path().join("config")).unwrap(); - let ctx = gix_worktree::fs::Capabilities::probe(dir.path()); + let ctx = gix_utils::FilesystemCapabilities::probe(dir.path()); dbg!(ctx); let entries: Vec<_> = std::fs::read_dir(dir.path()) .unwrap() diff --git a/gix-worktree/tests/worktree/index/checkout.rs b/gix-worktree/tests/worktree/index/checkout.rs index 8b08a35d228..4204524ea56 100644 --- a/gix-worktree/tests/worktree/index/checkout.rs +++ b/gix-worktree/tests/worktree/index/checkout.rs @@ -10,7 +10,8 @@ use std::{ use gix_features::progress; use gix_object::bstr::ByteSlice; use gix_odb::FindExt; -use gix_worktree::{fs::Capabilities, index, index::checkout::Collision}; +use gix_utils::FilesystemCapabilities; +use gix_worktree::{index, index::checkout::Collision}; use tempfile::TempDir; use crate::fixture_path; @@ -418,8 +419,8 @@ fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&P source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() } -fn probe_gitoxide_dir() -> crate::Result { - Ok(gix_worktree::fs::Capabilities::probe( +fn probe_gitoxide_dir() -> crate::Result { + Ok(gix_utils::FilesystemCapabilities::probe( std::env::current_dir()?.join("..").join(".git"), )) } diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 3a5982151bb..bd4266243e4 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -110,6 +110,7 @@ fast-sha1 = [ "gix-features/fast-sha1" ] [dependencies] +gix-utils = { version = "0.1.0", path = "../gix-utils" } gix-ref = { version = "^0.27.2", path = "../gix-ref" } gix-discover = { version = "^0.16.2", path = "../gix-discover" } gix-tempfile = { version = "^5.0.0", path = "../gix-tempfile", default-features = false, features = ["signals"] } diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 2326dd01d7f..00e98ccc76c 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -175,7 +175,7 @@ impl Cache { .map(|value| Checkout::WORKERS.try_from_workers(value)), )?; Ok(gix_worktree::index::checkout::Options { - fs: gix_worktree::fs::Capabilities { + fs: gix_utils::FilesystemCapabilities { precompose_unicode: boolean(self, "core.precomposeUnicode", &Core::PRECOMPOSE_UNICODE, false)?, ignore_case: boolean(self, "core.ignoreCase", &Core::IGNORE_CASE, false)?, executable_bit: boolean(self, "core.fileMode", &Core::FILE_MODE, true)?, diff --git a/gix/src/create.rs b/gix/src/create.rs index 96d047e3bc7..362a20f964b 100644 --- a/gix/src/create.rs +++ b/gix/src/create.rs @@ -117,7 +117,7 @@ pub struct Options { pub destination_must_be_empty: bool, /// If set, use these filesystem capabilities to populate the respective gix-config fields. /// If `None`, the directory will be probed. - pub fs_capabilities: Option, + pub fs_capabilities: Option, } /// Create a new `.git` repository of `kind` within the possibly non-existing `directory` @@ -208,7 +208,7 @@ pub fn into( { let mut config = gix_config::File::default(); { - let caps = fs_capabilities.unwrap_or_else(|| gix_worktree::fs::Capabilities::probe(&dot_git)); + let caps = fs_capabilities.unwrap_or_else(|| gix_utils::FilesystemCapabilities::probe(&dot_git)); let mut core = config.new_section("core", None).expect("valid section name"); core.push(key("repositoryformatversion"), Some("0".into())); diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 5bf12191cfd..99aaf721849 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -92,6 +92,7 @@ pub use gix_traverse as traverse; pub use gix_url as url; #[doc(inline)] pub use gix_url::Url; +pub use gix_utils as utils; pub use hash::{oid, ObjectId}; pub mod interrupt; diff --git a/gix/src/open/repository.rs b/gix/src/open/repository.rs index 301c158b2ae..99ff56c4f69 100644 --- a/gix/src/open/repository.rs +++ b/gix/src/open/repository.rs @@ -264,8 +264,8 @@ impl ThreadSafeRepository { config, // used when spawning new repositories off this one when following worktrees linked_worktree_options: options, - index: gix_features::fs::MutableSnapshot::new().into(), - shallow_commits: gix_features::fs::MutableSnapshot::new().into(), + index: gix_utils::SharedFileSnapshotMut::new().into(), + shallow_commits: gix_utils::SharedFileSnapshotMut::new().into(), }) } } diff --git a/gix/src/shallow.rs b/gix/src/shallow.rs index 880591c49ba..abf2c132af0 100644 --- a/gix/src/shallow.rs +++ b/gix/src/shallow.rs @@ -1,8 +1,8 @@ pub(crate) type CommitsStorage = - gix_features::threading::OwnShared>>; + gix_features::threading::OwnShared>>; /// A lazily loaded and auto-updated list of commits which are at the shallow boundary (behind which there are no commits available), /// sorted to allow bisecting. -pub type Commits = gix_features::fs::SharedSnapshot>; +pub type Commits = gix_utils::SharedFileSnapshot>; /// #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index 965690401ac..1e3aa0c6b09 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -7,9 +7,9 @@ use crate::{ Repository, }; -pub(crate) type IndexStorage = gix_features::threading::OwnShared>; +pub(crate) type IndexStorage = gix_features::threading::OwnShared>; /// A lazily loaded and auto-updated worktree index. -pub type Index = gix_features::fs::SharedSnapshot; +pub type Index = gix_utils::SharedFileSnapshot; /// A stand-in to a worktree as result of a worktree iteration. /// From fc28701b52139d9422dd73687d776cf95de98b35 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 9 Apr 2023 11:07:28 +0200 Subject: [PATCH 09/14] Add new `gix-ignore` crate with the contents moved and adapted from `gix-attributes`. --- Cargo.lock | 14 ++ Cargo.toml | 1 + README.md | 1 + crate-status.md | 12 +- gix-ignore/CHANGELOG.md | 7 + gix-ignore/Cargo.toml | 36 ++++ gix-ignore/src/lib.rs | 34 ++++ gix-ignore/src/parse.rs | 63 ++++++ gix-ignore/src/search.rs | 179 ++++++++++++++++++ .../tests/fixtures/attributes/various.txt | 11 ++ .../fixtures/generated-archives/.gitignore | 1 + gix-ignore/tests/fixtures/ignore/various.txt | 14 ++ ...ake_global_and_external_and_dir_ignores.sh | 95 ++++++++++ gix-ignore/tests/ignore.rs | 3 + gix-ignore/tests/parse/mod.rs | 114 +++++++++++ gix-ignore/tests/search/mod.rs | 130 +++++++++++++ 16 files changed, 710 insertions(+), 5 deletions(-) create mode 100644 gix-ignore/CHANGELOG.md create mode 100644 gix-ignore/Cargo.toml create mode 100644 gix-ignore/src/lib.rs create mode 100644 gix-ignore/src/parse.rs create mode 100644 gix-ignore/src/search.rs create mode 100644 gix-ignore/tests/fixtures/attributes/various.txt create mode 100644 gix-ignore/tests/fixtures/generated-archives/.gitignore create mode 100644 gix-ignore/tests/fixtures/ignore/various.txt create mode 100644 gix-ignore/tests/fixtures/make_global_and_external_and_dir_ignores.sh create mode 100644 gix-ignore/tests/ignore.rs create mode 100644 gix-ignore/tests/parse/mod.rs create mode 100644 gix-ignore/tests/search/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 636fdbe13e1..b7e5871630a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1727,6 +1727,20 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "gix-ignore" +version = "0.1.0" +dependencies = [ + "bstr", + "document-features", + "gix-glob 0.5.5", + "gix-path 0.7.3", + "gix-testtools", + "gix-utils", + "serde", + "unicode-bom 2.0.2", +] + [[package]] name = "gix-index" version = "0.12.4" diff --git a/Cargo.toml b/Cargo.toml index fa9d1c79b58..7b8f0aaf07b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -173,6 +173,7 @@ members = [ "gix-tempfile", "gix-lock", "gix-attributes", + "gix-ignore", "gix-pathspec", "gix-refspec", "gix-path", diff --git a/README.md b/README.md index 9097547e92c..7dd0b3a00d4 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ is usable to some extent. * [gix-discover](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-discover) * [gix-path](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-path) * [gix-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-attributes) + * [gix-ignore](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-ignore) * [gix-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-pathspec) * [gix-index](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-index) * [gix-revision](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-revision) diff --git a/crate-status.md b/crate-status.md index 6a6e2c1e8fa..bcf5a068cae 100644 --- a/crate-status.md +++ b/crate-status.md @@ -328,11 +328,13 @@ Check out the [performance discussion][gix-traverse-performance] as well. * [ ] Some examples ### gix-attributes -* [x] parse git-ignore files (aka gix-attributes without the attributes or negation) -* [x] parse gix-attributes files -* [ ] create an attributes stack, ideally one that includes 'ignored' status from .gitignore files. - * [ ] support for built-in `binary` macro for `-text -diff -merge` - +* [x] parse `.gitattribute` files +* [ ] an attributes stack for matching paths to their attributes, with support for built-in `binary` macro for `-text -diff -merge` + +### gix-ignore +* [x] parse `.gitignore` files +* [x] an attributes stack for checking if paths are excluded + ### gix-quote * **ansi-c** * [x] quote diff --git a/gix-ignore/CHANGELOG.md b/gix-ignore/CHANGELOG.md new file mode 100644 index 00000000000..52e80a16a21 --- /dev/null +++ b/gix-ignore/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + diff --git a/gix-ignore/Cargo.toml b/gix-ignore/Cargo.toml new file mode 100644 index 00000000000..93ef3366bbb --- /dev/null +++ b/gix-ignore/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "gix-ignore" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project dealing .gitignore files" +authors = ["Sebastian Thiel "] +edition = "2021" +include = ["src/**/*", "CHANGELOG.md"] +rust-version = "1.64" + +[lib] +doctest = false + +[features] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde1 = ["serde", "bstr/serde", "gix-glob/serde1"] + +[dependencies] +gix-glob = { version = "^0.5.5", path = "../gix-glob" } +gix-path = { version = "^0.7.3", path = "../gix-path" } + +bstr = { version = "1.3.0", default-features = false, features = ["std", "unicode"]} +unicode-bom = "2.0.2" +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} + +document-features = { version = "0.2.1", optional = true } + +[dev-dependencies] +gix-testtools = { path = "../tests/tools"} +gix-utils = { path = "../gix-utils" } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] +features = ["document-features"] diff --git a/gix-ignore/src/lib.rs b/gix-ignore/src/lib.rs new file mode 100644 index 00000000000..20ca1cc8c9c --- /dev/null +++ b/gix-ignore/src/lib.rs @@ -0,0 +1,34 @@ +//! Parse `.gitignore` files and provide utilities to match against them. +//! +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +pub use gix_glob as glob; + +/// +pub mod search; +/// A grouping of lists of patterns while possibly keeping associated to their base path in order to find matches. +/// +/// Pattern lists with base path are queryable relative to that base, otherwise they are relative to the repository root. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Search { + /// A list of pattern lists, each representing a patterns from a file or specified by hand, in the order they were + /// specified in. + /// + /// When matching, this order is reversed. + pub patterns: Vec>, +} + +/// +pub mod parse; + +/// Parse git ignore patterns, line by line, from `bytes`. +pub fn parse(bytes: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(bytes) +} diff --git a/gix-ignore/src/parse.rs b/gix-ignore/src/parse.rs new file mode 100644 index 00000000000..11ceaabaf1a --- /dev/null +++ b/gix-ignore/src/parse.rs @@ -0,0 +1,63 @@ +use bstr::ByteSlice; + +/// An iterator over line-wise ignore patterns parsed from a buffer. +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +impl<'a> Lines<'a> { + /// Create a new instance from `buf` to parse ignore patterns from. + pub fn new(buf: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(buf); + Lines { + lines: buf[bom.len()..].lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = (gix_glob::Pattern, usize); + + fn next(&mut self) -> Option { + for line in self.lines.by_ref() { + self.line_no += 1; + if line.first() == Some(&b'#') { + continue; + } + match gix_glob::Pattern::from_bytes(truncate_non_escaped_trailing_spaces(line)) { + None => continue, + Some(pattern) => return Some((pattern, self.line_no)), + } + } + None + } +} + +/// We always copy just because that's ultimately needed anyway, not because we always have to. +fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> &[u8] { + let mut last_space_pos = None; + let mut bytes = buf.iter().enumerate(); + while let Some((pos, b)) = bytes.next() { + match *b { + b' ' => { + last_space_pos.get_or_insert(pos); + continue; + } + b'\\' => { + if bytes.next().is_none() { + return buf; + } + } + _ => {} + } + last_space_pos = None; + } + + if let Some(pos) = last_space_pos { + &buf[..pos] + } else { + buf + } +} diff --git a/gix-ignore/src/search.rs b/gix-ignore/src/search.rs new file mode 100644 index 00000000000..84278062c51 --- /dev/null +++ b/gix-ignore/src/search.rs @@ -0,0 +1,179 @@ +use crate::Search; +use bstr::{BStr, ByteSlice}; +use gix_glob::search::{pattern, Pattern}; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +/// Describes a matching pattern within a search for ignored paths. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Match<'a, T> { + /// The glob pattern itself, like `/target/*`. + pub pattern: &'a gix_glob::Pattern, + /// The value associated with the pattern. + pub value: &'a T, + /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. + pub source: Option<&'a Path>, + /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. + pub sequence_number: usize, +} + +/// An implementation of the [`Pattern`] trait for ignore patterns. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Ignore; + +impl Pattern for Ignore { + type Value = (); + + fn bytes_to_patterns(bytes: &[u8], _source: &std::path::Path) -> Vec> { + crate::parse(bytes) + .map(|(pattern, line_number)| pattern::Mapping { + pattern, + value: (), + sequence_number: line_number, + }) + .collect() + } + + fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool { + true + } +} + +/// Instantiation of a search for ignore patterns. +impl Search { + /// Given `git_dir`, a `.git` repository, load static ignore patterns from `info/exclude` + /// and from `excludes_file` if it is provided. + /// Note that it's not considered an error if the provided `excludes_file` does not exist. + pub fn from_git_dir( + git_dir: impl AsRef, + excludes_file: Option, + buf: &mut Vec, + ) -> std::io::Result { + let mut group = Self::default(); + + let follow_symlinks = true; + // order matters! More important ones first. + group.patterns.extend( + excludes_file + .and_then(|file| pattern::List::::from_file(file, None, follow_symlinks, buf).transpose()) + .transpose()?, + ); + group.patterns.extend(pattern::List::::from_file( + git_dir.as_ref().join("info").join("exclude"), + None, + follow_symlinks, + buf, + )?); + Ok(group) + } + + /// Parse a list of patterns, using slashes as path separators + pub fn from_overrides(patterns: impl IntoIterator>) -> Self { + Search { + patterns: vec![pattern::List { + patterns: patterns + .into_iter() + .map(Into::into) + .enumerate() + .filter_map(|(seq_id, pattern)| { + let pattern = gix_path::try_into_bstr(PathBuf::from(pattern)).ok()?; + gix_glob::parse(pattern.as_ref()).map(|p| pattern::Mapping { + pattern: p, + value: (), + sequence_number: seq_id, + }) + }) + .collect(), + source: None, + base: None, + }], + } + } +} + +/// Mutation +impl Search { + /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they + /// are relative to. This also means that `source` is contained within `root` if `root` is provided. + pub fn add_patterns_buffer(&mut self, bytes: &[u8], source: impl Into, root: Option<&Path>) { + self.patterns + .push(pattern::List::from_bytes(bytes, source.into(), root)); + } +} + +/// Return a match if a pattern matches `relative_path`, providing a pre-computed `basename_pos` which is the +/// starting position of the basename of `relative_path`. `is_dir` is true if `relative_path` is a directory. +/// `case` specifies whether cases should be folded during matching or not. +pub fn pattern_matching_relative_path<'a>( + list: &'a gix_glob::search::pattern::List, + relative_path: &BStr, + basename_pos: Option, + is_dir: Option, + case: gix_glob::pattern::Case, +) -> Option> { + let (relative_path, basename_start_pos) = + list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case)?; + list.patterns + .iter() + .rev() + .filter(|pm| Ignore::may_use_glob_pattern(&pm.pattern)) + .find_map( + |pattern::Mapping { + pattern, + value, + sequence_number, + }| { + pattern + .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) + .then_some(Match { + pattern, + value, + source: list.source.as_deref(), + sequence_number: *sequence_number, + }) + }, + ) +} + +/// Like [`pattern_matching_relative_path()`], but returns an index to the pattern +/// that matched `relative_path`, instead of the match itself. +pub fn pattern_idx_matching_relative_path( + list: &gix_glob::search::pattern::List, + relative_path: &BStr, + basename_pos: Option, + is_dir: Option, + case: gix_glob::pattern::Case, +) -> Option { + let (relative_path, basename_start_pos) = + list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case)?; + list.patterns + .iter() + .enumerate() + .rev() + .filter(|(_, pm)| Ignore::may_use_glob_pattern(&pm.pattern)) + .find_map(|(idx, pm)| { + pm.pattern + .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) + .then_some(idx) + }) +} + +/// Matching of ignore patterns. +impl Search { + /// Match `relative_path` and return the first match if found. + /// `is_dir` is true if `relative_path` is a directory. + /// `case` specifies whether cases should be folded during matching or not. + pub fn pattern_matching_relative_path<'a>( + &self, + relative_path: impl Into<&'a BStr>, + is_dir: Option, + case: gix_glob::pattern::Case, + ) -> Option> { + let relative_path = relative_path.into(); + let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); + self.patterns + .iter() + .rev() + .find_map(|pl| pattern_matching_relative_path(pl, relative_path, basename_pos, is_dir, case)) + } +} diff --git a/gix-ignore/tests/fixtures/attributes/various.txt b/gix-ignore/tests/fixtures/attributes/various.txt new file mode 100644 index 00000000000..2b8b7f565ba --- /dev/null +++ b/gix-ignore/tests/fixtures/attributes/various.txt @@ -0,0 +1,11 @@ + # no attribute for now +*.[oa] c + +# comment +"*.html" a b=c + +# other comment +\!foo.html x + +\#a/path -a +/* !b diff --git a/gix-ignore/tests/fixtures/generated-archives/.gitignore b/gix-ignore/tests/fixtures/generated-archives/.gitignore new file mode 100644 index 00000000000..1a25065876d --- /dev/null +++ b/gix-ignore/tests/fixtures/generated-archives/.gitignore @@ -0,0 +1 @@ +make_global_and_external_and_dir_ignores.tar.xz diff --git a/gix-ignore/tests/fixtures/ignore/various.txt b/gix-ignore/tests/fixtures/ignore/various.txt new file mode 100644 index 00000000000..214e46f2d54 --- /dev/null +++ b/gix-ignore/tests/fixtures/ignore/various.txt @@ -0,0 +1,14 @@ +# ignore objects and archives, anywhere in the tree. +*.[oa] + +# ignore generated html files, +*.html + +# except foo.html which is maintained by hand +!foo.html + +# exclude everything except directory foo/bar +/* +!/foo +/foo/* +!/foo/bar diff --git a/gix-ignore/tests/fixtures/make_global_and_external_and_dir_ignores.sh b/gix-ignore/tests/fixtures/make_global_and_external_and_dir_ignores.sh new file mode 100644 index 00000000000..f4bd0997ab4 --- /dev/null +++ b/gix-ignore/tests/fixtures/make_global_and_external_and_dir_ignores.sh @@ -0,0 +1,95 @@ +#!/bin/bash +set -eu -o pipefail + +cat <user.exclude +# a custom exclude configured per user +user-file-anywhere +/user-file-from-top + +user-dir-anywhere/ +/user-dir-from-top + +user-subdir/file +**/user-subdir-anywhere/file +a/b/* +z/x +EOF + +mkdir repo; +(cd repo + git init -q + git config core.excludesFile ../user.exclude + + cat <.git/info/exclude +# a sample .git/info/exclude +file-anywhere +/file-from-top + +dir-anywhere/ +/dir-from-top + +subdir/file +**/subdir-anywhere/file +EOF + + cat <.gitignore +# a sample .gitignore +top-level-local-file-anywhere +d/e/* +e/f +EOF + + mkdir dir-with-ignore + cat <dir-with-ignore/.gitignore +# a sample .gitignore +sub-level-local-file-anywhere +EOF + + git add .gitignore dir-with-ignore + git commit --allow-empty -m "init" + + mkdir user-dir-anywhere user-dir-from-top dir-anywhere dir-from-top + mkdir -p dir/user-dir-anywhere dir/dir-anywhere + +git check-ignore -vn --stdin 2>&1 <git-check-ignore.baseline || : +user-file-anywhere +dir/user-file-anywhere +user-file-from-top +no-match/user-file-from-top +user-dir-anywhere +user-dir-from-top +no-match/user-dir-from-top +user-subdir/file +subdir/user-subdir-anywhere/file +file-anywhere +dir/file-anywhere +file-from-top +no-match/file-from-top +dir-anywhere +dir/dir-anywhere +dir-from-top +no-match/dir-from-top +subdir/file +subdir/subdir-anywhere/file +top-level-local-file-anywhere +dir/top-level-local-file-anywhere +no-match/sub-level-local-file-anywhere +dir-with-ignore/sub-level-local-file-anywhere +dir-with-ignore/sub-dir/sub-level-local-file-anywhere +a/b/C +a/B/c +A/B/C +z/x +Z/x +z/X +Z/X +d/e/F +d/e/f +D/e/F +D/E/F +e/f +e/F +E/f +E/F +EOF +) diff --git a/gix-ignore/tests/ignore.rs b/gix-ignore/tests/ignore.rs new file mode 100644 index 00000000000..e01bbccf3ca --- /dev/null +++ b/gix-ignore/tests/ignore.rs @@ -0,0 +1,3 @@ +pub use gix_testtools::Result; +mod parse; +mod search; diff --git a/gix-ignore/tests/parse/mod.rs b/gix-ignore/tests/parse/mod.rs new file mode 100644 index 00000000000..8a5e278c2f0 --- /dev/null +++ b/gix-ignore/tests/parse/mod.rs @@ -0,0 +1,114 @@ +use bstr::BString; +use gix_glob::{pattern::Mode, Pattern}; +use gix_testtools::fixture_bytes; + +#[test] +fn byte_order_marks_are_no_patterns() { + assert_eq!( + flatten(gix_ignore::parse("\u{feff}hello".as_bytes()).next()), + Some(pat(r"hello", Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn line_numbers_are_counted_correctly() { + let input = fixture_bytes("ignore/various.txt"); + let actual: Vec<_> = gix_ignore::parse(&input).map(flat_map).collect(); + assert_eq!( + actual, + vec![ + pat("*.[oa]", Mode::NO_SUB_DIR, 2), + pat("*.html", Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), + pat("foo.html", Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), + pat("*", Mode::NO_SUB_DIR | Mode::ENDS_WITH | Mode::ABSOLUTE, 11), + pat("foo", Mode::NEGATIVE | Mode::NO_SUB_DIR | Mode::ABSOLUTE, 12), + pat("foo/*", Mode::ABSOLUTE, 13), + pat("foo/bar", Mode::ABSOLUTE | Mode::NEGATIVE, 14) + ] + ); +} + +#[test] +fn line_endings_can_be_windows_or_unix() { + assert_eq!( + gix_ignore::parse(b"unix\nwindows\r\nlast") + .map(flat_map) + .collect::>(), + vec![ + pat(r"unix", Mode::NO_SUB_DIR, 1), + pat(r"windows", Mode::NO_SUB_DIR, 2), + pat(r"last", Mode::NO_SUB_DIR, 3) + ] + ); +} + +#[test] +fn comments_are_ignored_as_well_as_empty_ones() { + assert!(gix_ignore::parse(b"# hello world").next().is_none()); + assert!(gix_ignore::parse(b"\n\r\n\t\t \n").next().is_none()); +} + +#[test] +fn backslashes_before_hashes_are_no_comments() { + assert_eq!( + flatten(gix_ignore::parse(br"\#hello").next()), + Some(pat(r"#hello", Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn trailing_spaces_can_be_escaped_to_be_literal() { + fn parse_one(input: &str) -> (BString, Mode, usize) { + let actual: Vec<_> = gix_ignore::parse(input.as_bytes()).map(flat_map).collect(); + assert_eq!(actual.len(), 1, "{input:?} should match"); + actual.into_iter().next().expect("present") + } + + assert_eq!( + parse_one(r"a \ "), + pat(r"a \ ", Mode::NO_SUB_DIR, 1), + "a single escape in front of the last desired space is enough to keep it, along with the escape" + ); + assert_eq!( + parse_one(r"a b c "), + pat("a b c", Mode::NO_SUB_DIR, 1), + "spaces in the middle are fine, trailing ones are removed" + ); + assert_eq!( + parse_one(r"a\ \ \ "), + pat(r"a\ \ \ ", Mode::NO_SUB_DIR, 1), + "one can also escape every single one, what matters is the last escaped one" + ); + assert_eq!( + parse_one(r"a \ "), + pat(r"a \ ", Mode::NO_SUB_DIR, 1), + "or just the one in the middle, losing the last actual space" + ); + assert_eq!( + parse_one(r"a \"), + pat(r"a \", Mode::NO_SUB_DIR, 1), + "escaping 'nothing' also works" + ); + assert_eq!( + parse_one(r"a \\\ "), + pat(r"a \\\ ", Mode::NO_SUB_DIR, 1), + "an escaped backslash followed by a backslash escapes whitespace" + ); + assert_eq!( + parse_one(r"a \\ "), + pat(r"a \\", Mode::NO_SUB_DIR, 1), + "strange things like these work as well, but trailers are removed if the backslash is escaped" + ); +} + +fn flatten(input: Option<(Pattern, usize)>) -> Option<(BString, gix_glob::pattern::Mode, usize)> { + input.map(flat_map) +} + +fn flat_map(input: (Pattern, usize)) -> (BString, gix_glob::pattern::Mode, usize) { + (input.0.text, input.0.mode, input.1) +} + +fn pat(pattern: &str, mode: Mode, pos: usize) -> (BString, Mode, usize) { + (pattern.into(), mode, pos) +} diff --git a/gix-ignore/tests/search/mod.rs b/gix-ignore/tests/search/mod.rs new file mode 100644 index 00000000000..83256dea727 --- /dev/null +++ b/gix-ignore/tests/search/mod.rs @@ -0,0 +1,130 @@ +use std::io::Read; + +use bstr::{BStr, ByteSlice}; +use gix_glob::pattern::Case; +use gix_ignore::search::Match; +use gix_utils::FilesystemCapabilities; + +struct Expectations<'a> { + lines: bstr::Lines<'a>, +} + +impl<'a> Iterator for Expectations<'a> { + type Item = (&'a BStr, Option<(&'a BStr, usize, &'a BStr)>); + + fn next(&mut self) -> Option { + let line = self.lines.next()?; + let (left, value) = line.split_at(line.find_byte(b'\t').unwrap()); + let value = value[1..].as_bstr(); + + let source_and_line = if left == b"::" { + None + } else { + let mut tokens = left.split(|b| *b == b':'); + let source = tokens.next().unwrap().as_bstr(); + let line_number: usize = tokens.next().unwrap().to_str_lossy().parse().ok().unwrap(); + let pattern = tokens.next().unwrap().as_bstr(); + Some((source, line_number, pattern)) + }; + Some((value, source_and_line)) + } +} + +#[test] +fn baseline_from_git_dir() -> crate::Result { + let case = if FilesystemCapabilities::probe("../.git").ignore_case { + Case::Fold + } else { + Case::Sensitive + }; + let dir = gix_testtools::scripted_fixture_read_only("make_global_and_external_and_dir_ignores.sh")?; + let repo_dir = dir.join("repo"); + let git_dir = repo_dir.join(".git"); + let baseline = std::fs::read(git_dir.parent().unwrap().join("git-check-ignore.baseline"))?; + let mut buf = Vec::new(); + let mut group = gix_ignore::Search::from_git_dir(git_dir, Some(dir.join("user.exclude")), &mut buf)?; + + assert!( + !gix_glob::search::add_patterns_file(&mut group.patterns, "not-a-file", false, None, &mut buf)?, + "missing files are no problem and cause a negative response" + ); + assert!( + gix_glob::search::add_patterns_file( + &mut group.patterns, + repo_dir.join(".gitignore"), + true, + repo_dir.as_path().into(), + &mut buf + )?, + "existing files return true" + ); + + buf.clear(); + let ignore_file = repo_dir.join("dir-with-ignore").join(".gitignore"); + std::fs::File::open(&ignore_file)?.read_to_end(&mut buf)?; + group.add_patterns_buffer(&buf, ignore_file, repo_dir.as_path().into()); + + for (path, source_and_line) in (Expectations { + lines: baseline.lines(), + }) { + let actual = group.pattern_matching_relative_path( + path, + repo_dir + .join(path.to_str_lossy().as_ref()) + .metadata() + .ok() + .map(|m| m.is_dir()), + case, + ); + match (actual, source_and_line) { + ( + Some(Match { + sequence_number, + pattern: _, + source, + value: _, + }), + Some((expected_source, line, _expected_pattern)), + ) => { + assert_eq!(sequence_number, line, "our counting should match the one used in git"); + assert_eq!( + source.map(|p| p.canonicalize().unwrap()), + Some(repo_dir.join(expected_source.to_str_lossy().as_ref()).canonicalize()?) + ); + } + (None, None) => {} + (actual, expected) => { + panic!("{case:?}: actual {actual:?} should match {expected:?} with path '{path}'") + } + } + } + Ok(()) +} + +#[test] +fn from_overrides() { + let input = ["simple", "pattern/"]; + let group = gix_ignore::Search::from_overrides(input); + assert_eq!( + group.pattern_matching_relative_path("Simple", None, gix_glob::pattern::Case::Fold), + Some(pattern_to_match(&gix_glob::parse("simple").unwrap(), 0)) + ); + assert_eq!( + group.pattern_matching_relative_path("pattern", Some(true), gix_glob::pattern::Case::Sensitive), + Some(pattern_to_match(&gix_glob::parse("pattern/").unwrap(), 1)) + ); + assert_eq!(group.patterns.len(), 1); + assert_eq!( + gix_ignore::Search::from_overrides(input).patterns[0], + group.patterns.into_iter().next().unwrap() + ); +} + +fn pattern_to_match(pattern: &gix_glob::Pattern, sequence_number: usize) -> Match<'_, ()> { + Match { + pattern, + value: &(), + source: None, + sequence_number, + } +} From 424ad62c852112b1104fe853b71de4dfe3a9f915 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 8 Apr 2023 09:01:27 +0200 Subject: [PATCH 10/14] feat!: an API for matching attributes. An implementation for `Search` along with various breaking modifications of the original module and type layout. --- Cargo.lock | 15 +- gix-attributes/Cargo.toml | 9 +- gix-attributes/src/lib.rs | 83 ++-- gix-attributes/src/match_group.rs | 358 ----------------- gix-attributes/src/name.rs | 26 +- .../src/{parse/attribute.rs => parse.rs} | 15 +- gix-attributes/src/parse/ignore.rs | 36 -- gix-attributes/src/parse/mod.rs | 10 - gix-attributes/src/search/attributes.rs | 205 ++++++++++ gix-attributes/src/search/mod.rs | 141 +++++++ gix-attributes/src/search/outcome.rs | 313 +++++++++++++++ gix-attributes/src/source.rs | 28 ++ gix-attributes/src/state.rs | 84 +++- gix-attributes/tests/attributes.rs | 3 +- .../fixtures/generated-archives/.gitignore | 1 + ...global_and_external_and_dir_ignores.tar.xz | 3 - .../tests/fixtures/ignore/various.txt | 14 - .../fixtures/make_attributes_baseline.sh | 130 ++++++ ...ake_global_and_external_and_dir_ignores.sh | 77 ---- gix-attributes/tests/match_group/ignore.rs | 116 ------ gix-attributes/tests/match_group/mod.rs | 1 - gix-attributes/tests/parse/attribute.rs | 317 --------------- gix-attributes/tests/parse/ignore.rs | 65 --- gix-attributes/tests/parse/mod.rs | 372 +++++++++++++++++- gix-attributes/tests/search/mod.rs | 328 +++++++++++++++ gix-attributes/tests/state/mod.rs | 23 ++ 26 files changed, 1707 insertions(+), 1066 deletions(-) delete mode 100644 gix-attributes/src/match_group.rs rename gix-attributes/src/{parse/attribute.rs => parse.rs} (94%) delete mode 100644 gix-attributes/src/parse/ignore.rs delete mode 100644 gix-attributes/src/parse/mod.rs create mode 100644 gix-attributes/src/search/attributes.rs create mode 100644 gix-attributes/src/search/mod.rs create mode 100644 gix-attributes/src/search/outcome.rs create mode 100644 gix-attributes/src/source.rs create mode 100644 gix-attributes/tests/fixtures/generated-archives/.gitignore delete mode 100644 gix-attributes/tests/fixtures/generated-archives/make_global_and_external_and_dir_ignores.tar.xz delete mode 100644 gix-attributes/tests/fixtures/ignore/various.txt create mode 100644 gix-attributes/tests/fixtures/make_attributes_baseline.sh delete mode 100644 gix-attributes/tests/fixtures/make_global_and_external_and_dir_ignores.sh delete mode 100644 gix-attributes/tests/match_group/ignore.rs delete mode 100644 gix-attributes/tests/match_group/mod.rs delete mode 100644 gix-attributes/tests/parse/attribute.rs delete mode 100644 gix-attributes/tests/parse/ignore.rs create mode 100644 gix-attributes/tests/search/mod.rs create mode 100644 gix-attributes/tests/state/mod.rs diff --git a/Cargo.lock b/Cargo.lock index b7e5871630a..10be4e04f4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1407,12 +1407,15 @@ version = "0.10.0" dependencies = [ "bstr", "document-features", - "gix-features 0.28.1", "gix-glob 0.5.5", "gix-path 0.7.3", "gix-quote 0.4.3", "gix-testtools", + "gix-utils", + "kstring", + "log", "serde", + "smallvec", "thiserror", "unicode-bom 2.0.2", ] @@ -2771,6 +2774,16 @@ dependencies = [ "rayon", ] +[[package]] +name = "kstring" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" +dependencies = [ + "serde", + "static_assertions", +] + [[package]] name = "kv-log-macro" version = "1.0.7" diff --git a/gix-attributes/Cargo.toml b/gix-attributes/Cargo.toml index 2789db5dc75..3116b77fc75 100644 --- a/gix-attributes/Cargo.toml +++ b/gix-attributes/Cargo.toml @@ -14,23 +14,26 @@ doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde1 = ["serde", "bstr/serde", "gix-glob/serde1"] +serde1 = ["serde", "bstr/serde", "gix-glob/serde1", "kstring/serde"] [dependencies] -gix-features = { version = "^0.28.0", path = "../gix-features" } -gix-path = { version = "^0.7.2", path = "../gix-path" } +gix-path = { version = "^0.7.3", path = "../gix-path" } gix-quote = { version = "^0.4.3", path = "../gix-quote" } gix-glob = { version = "^0.5.5", path = "../gix-glob" } bstr = { version = "1.3.0", default-features = false, features = ["std", "unicode"]} +smallvec = "1.10.0" +kstring = "2.0.0" unicode-bom = "2.0.2" thiserror = "1.0.26" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} +log = "0.4.17" document-features = { version = "0.2.1", optional = true } [dev-dependencies] gix-testtools = { path = "../tests/tools"} +gix-utils = { path = "../gix-utils" } [package.metadata.docs.rs] all-features = true diff --git a/gix-attributes/src/lib.rs b/gix-attributes/src/lib.rs index 7d95c022f41..b81c3e9371d 100644 --- a/gix-attributes/src/lib.rs +++ b/gix-attributes/src/lib.rs @@ -1,4 +1,4 @@ -//! Parse `.gitattribute` and `.gitignore` files and provide utilities to match against them. +//! Parse `.gitattribute` files and provide utilities to match against them. //! //! ## Feature Flags #![cfg_attr( @@ -6,25 +6,26 @@ cfg_attr(doc, doc = ::document_features::document_features!()) )] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] -#![deny(missing_docs, rust_2018_idioms)] -#![forbid(unsafe_code)] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] -use std::path::PathBuf; - -use bstr::{BStr, BString}; pub use gix_glob as glob; +use kstring::{KString, KStringRef}; mod assignment; /// pub mod name; -mod state; +/// +pub mod state; -mod match_group; -pub use match_group::{Attributes, Ignore, Match, Pattern}; +/// +pub mod search; /// pub mod parse; -/// Parse attribute assignments line by line from `bytes`. + +/// Parse attribute assignments line by line from `bytes`, and fail the operation on error. +/// +/// For leniency, ignore errors using `filter_map(Result::ok)` for example. pub fn parse(bytes: &[u8]) -> parse::Lines<'_> { parse::Lines::new(bytes) } @@ -42,7 +43,7 @@ pub enum StateRef<'a> { /// The attribute is set to the given value, which followed the `=` sign. /// Note that values can be empty. #[cfg_attr(feature = "serde1", serde(borrow))] - Value(&'a BStr), + Value(state::ValueRef<'a>), /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. Unspecified, } @@ -59,7 +60,7 @@ pub enum State { Unset, /// The attribute is set to the given value, which followed the `=` sign. /// Note that values can be empty. - Value(BString), // TODO(performance): Is there a non-utf8 compact_str/KBString crate? See https://github.com/cobalt-org/kstring/issues/37#issuecomment-1446777265 . + Value(state::Value), /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. Unspecified, } @@ -67,11 +68,11 @@ pub enum State { /// Represents a validated attribute name #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] -pub struct Name(pub(crate) String); // TODO(performance): See if `KBString` or `compact_string` could be meaningful here. +pub struct Name(pub(crate) KString); /// Holds a validated attribute name as a reference -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] -pub struct NameRef<'a>(&'a str); +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct NameRef<'a>(KStringRef<'a>); /// Name an attribute and describe it's assigned state. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] @@ -84,7 +85,7 @@ pub struct Assignment { } /// Holds validated attribute data as a reference -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] pub struct AssignmentRef<'a> { /// The name of the attribute. pub name: NameRef<'a>, @@ -92,46 +93,34 @@ pub struct AssignmentRef<'a> { pub state: StateRef<'a>, } -/// A grouping of lists of patterns while possibly keeping associated to their base path. +/// A grouping of lists of patterns while possibly keeping associated to their base path in order to find matches. /// /// Pattern lists with base path are queryable relative to that base, otherwise they are relative to the repository root. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct MatchGroup { +pub struct Search { /// A list of pattern lists, each representing a patterns from a file or specified by hand, in the order they were /// specified in. /// - /// During matching, this order is reversed. - pub patterns: Vec>, + /// When matching, this order is reversed. + patterns: Vec>, } -/// A list of patterns which optionally know where they were loaded from and what their base is. +/// A list of known global sources for git attribute files in order of ascending precedence. /// -/// Knowing their base which is relative to a source directory, it will ignore all path to match against -/// that don't also start with said base. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct PatternList { - /// Patterns and their associated data in the order they were loaded in or specified, - /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_). +/// This means that values from the first variant will be returned first. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +pub enum Source { + /// The attribute file that the installation itself ships with. + GitInstallation, + /// System-wide attributes file. This is typically defined as + /// `$(prefix)/etc/gitattributes` (where prefix is the git-installation directory). + System, + /// This is `/git/attributes` and is git application configuration per user. /// - /// During matching, this order is reversed. - pub patterns: Vec>, - - /// The path from which the patterns were read, or `None` if the patterns - /// don't originate in a file on disk. - pub source: Option, - - /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root. - /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root. - pub base: Option, + /// Note that there is no `~/.gitattributes` file. + Git, + /// The configuration of the repository itself, located in `$GIT_DIR/info/attributes`. + Local, } -/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -pub struct PatternMapping { - /// The pattern itself, like `/target/*` - pub pattern: gix_glob::Pattern, - /// The value associated with the pattern. - pub value: T, - /// Typically the line number in the file the pattern was parsed from. - pub sequence_number: usize, -} +mod source; diff --git a/gix-attributes/src/match_group.rs b/gix-attributes/src/match_group.rs deleted file mode 100644 index 54f72a6d659..00000000000 --- a/gix-attributes/src/match_group.rs +++ /dev/null @@ -1,358 +0,0 @@ -use std::{ - ffi::OsString, - io::Read, - path::{Path, PathBuf}, -}; - -use bstr::{BStr, BString, ByteSlice, ByteVec}; - -use crate::{Assignment, MatchGroup, PatternList, PatternMapping}; - -fn into_owned_assignments<'a>( - attrs: impl Iterator, crate::name::Error>>, -) -> Result, crate::name::Error> { - attrs.map(|res| res.map(|attr| attr.to_owned())).collect() -} - -/// A trait to convert bytes into patterns and their associated value. -/// -/// This is used for `gitattributes` which have a value, and `gitignore` which don't. -pub trait Pattern: Clone + PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Default { - /// The value associated with a pattern. - type Value: PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Clone; - - /// Parse all patterns in `bytes` line by line, ignoring lines with errors, and collect them. - fn bytes_to_patterns(bytes: &[u8]) -> Vec>; - - /// Returns true if the given pattern may be used for matching. - fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool; -} - -/// An implementation of the [`Pattern`] trait for ignore patterns. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct Ignore; - -impl Pattern for Ignore { - type Value = (); - - fn bytes_to_patterns(bytes: &[u8]) -> Vec> { - crate::parse::ignore(bytes) - .map(|(pattern, line_number)| PatternMapping { - pattern, - value: (), - sequence_number: line_number, - }) - .collect() - } - - fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool { - true - } -} - -/// A value of an attribute pattern, which is either a macro definition or -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -pub enum Value { - MacroAttributes(Vec), - Assignments(Vec), -} - -/// An implementation of the [`Pattern`] trait for attributes. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct Attributes; - -fn macro_mode() -> gix_glob::pattern::Mode { - gix_glob::pattern::Mode::all() -} - -impl Pattern for Attributes { - type Value = Value; - - fn bytes_to_patterns(bytes: &[u8]) -> Vec> { - crate::parse(bytes) - .filter_map(Result::ok) - .filter_map(|(pattern_kind, assignments, line_number)| { - let (pattern, value) = match pattern_kind { - crate::parse::Kind::Macro(macro_name) => ( - gix_glob::Pattern { - text: macro_name.as_str().into(), - mode: macro_mode(), - first_wildcard_pos: None, - }, - Value::MacroAttributes(into_owned_assignments(assignments).ok()?), - ), - crate::parse::Kind::Pattern(p) => ( - (!p.is_negative()).then_some(p)?, - Value::Assignments(into_owned_assignments(assignments).ok()?), - ), - }; - PatternMapping { - pattern, - value, - sequence_number: line_number, - } - .into() - }) - .collect() - } - - fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool { - pattern.mode != macro_mode() - } -} - -/// Describes a matching value within a [`MatchGroup`]. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -pub struct Match<'a, T> { - /// The glob pattern itself, like `/target/*`. - pub pattern: &'a gix_glob::Pattern, - /// The value associated with the pattern. - pub value: &'a T, - /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. - pub source: Option<&'a Path>, - /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. - pub sequence_number: usize, -} - -impl MatchGroup -where - T: Pattern, -{ - /// Match `relative_path`, a path relative to the repository containing all patterns, and return the first match if available. - // TODO: better docs - pub fn pattern_matching_relative_path<'a>( - &self, - relative_path: impl Into<&'a BStr>, - is_dir: Option, - case: gix_glob::pattern::Case, - ) -> Option> { - let relative_path = relative_path.into(); - let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); - self.patterns - .iter() - .rev() - .find_map(|pl| pl.pattern_matching_relative_path(relative_path, basename_pos, is_dir, case)) - } -} - -impl MatchGroup { - /// Given `git_dir`, a `.git` repository, load ignore patterns from `info/exclude` and from `excludes_file` if it - /// is provided. - /// Note that it's not considered an error if the provided `excludes_file` does not exist. - pub fn from_git_dir( - git_dir: impl AsRef, - excludes_file: Option, - buf: &mut Vec, - ) -> std::io::Result { - let mut group = Self::default(); - - let follow_symlinks = true; - // order matters! More important ones first. - group.patterns.extend( - excludes_file - .map(|file| PatternList::::from_file(file, None, follow_symlinks, buf)) - .transpose()? - .flatten(), - ); - group.patterns.extend(PatternList::::from_file( - git_dir.as_ref().join("info").join("exclude"), - None, - follow_symlinks, - buf, - )?); - Ok(group) - } - - /// See [PatternList::::from_overrides()] for details. - pub fn from_overrides(patterns: impl IntoIterator>) -> Self { - MatchGroup { - patterns: vec![PatternList::::from_overrides(patterns)], - } - } - - /// Add the given file at `source` if it exists, otherwise do nothing. If a `root` is provided, it's not considered a global file anymore. - /// Returns true if the file was added, or false if it didn't exist. - pub fn add_patterns_file( - &mut self, - source: impl Into, - follow_symlinks: bool, - root: Option<&Path>, - buf: &mut Vec, - ) -> std::io::Result { - let previous_len = self.patterns.len(); - self.patterns.extend(PatternList::::from_file( - source.into(), - root, - follow_symlinks, - buf, - )?); - Ok(self.patterns.len() != previous_len) - } - - /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they - /// are relative to. This also means that `source` is contained within `root` if `root` is provided. - pub fn add_patterns_buffer(&mut self, bytes: &[u8], source: impl Into, root: Option<&Path>) { - self.patterns - .push(PatternList::::from_bytes(bytes, source.into(), root)); - } -} - -fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec) -> std::io::Result { - buf.clear(); - let file = if follow_symlinks { - std::fs::File::open(path) - } else { - gix_features::fs::open_options_no_follow().read(true).open(path) - }; - Ok(match file { - Ok(mut file) => { - file.read_to_end(buf)?; - true - } - Err(err) if err.kind() == std::io::ErrorKind::NotFound => false, - Err(err) => return Err(err), - }) -} - -impl PatternList -where - T: Pattern, -{ - /// `source` is the location of the `bytes` which represent a list of patterns line by line. - pub fn from_bytes(bytes: &[u8], source: impl Into, root: Option<&Path>) -> Self { - let source = source.into(); - let patterns = T::bytes_to_patterns(bytes); - - let base = root - .and_then(|root| source.parent().expect("file").strip_prefix(root).ok()) - .and_then(|base| { - (!base.as_os_str().is_empty()).then(|| { - let mut base: BString = - gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned(); - - base.push_byte(b'/'); - base - }) - }); - PatternList { - patterns, - source: Some(source), - base, - } - } - - /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally - /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file. - pub fn from_file( - source: impl Into, - root: Option<&Path>, - follow_symlinks: bool, - buf: &mut Vec, - ) -> std::io::Result> { - let source = source.into(); - Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root))) - } -} - -impl PatternList -where - T: Pattern, -{ - /// Return a match if a pattern matches `relative_path`, providing a pre-computed `basename_pos` which is the - /// starting position of the basename of `relative_path`. `is_dir` is true if `relative_path` is a directory. - /// `case` specifies whether cases should be folded during matching or not. - pub fn pattern_matching_relative_path( - &self, - relative_path: &BStr, - basename_pos: Option, - is_dir: Option, - case: gix_glob::pattern::Case, - ) -> Option> { - let (relative_path, basename_start_pos) = - self.strip_base_handle_recompute_basename_pos(relative_path, basename_pos)?; - self.patterns - .iter() - .rev() - .filter(|pm| T::may_use_glob_pattern(&pm.pattern)) - .find_map( - |PatternMapping { - pattern, - value, - sequence_number, - }| { - pattern - .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) - .then_some(Match { - pattern, - value, - source: self.source.as_deref(), - sequence_number: *sequence_number, - }) - }, - ) - } - - /// Like [`pattern_matching_relative_path()`][Self::pattern_matching_relative_path()], but returns an index to the pattern - /// that matched `relative_path`, instead of the match itself. - pub fn pattern_idx_matching_relative_path( - &self, - relative_path: &BStr, - basename_pos: Option, - is_dir: Option, - case: gix_glob::pattern::Case, - ) -> Option { - let (relative_path, basename_start_pos) = - self.strip_base_handle_recompute_basename_pos(relative_path, basename_pos)?; - self.patterns - .iter() - .enumerate() - .rev() - .filter(|(_, pm)| T::may_use_glob_pattern(&pm.pattern)) - .find_map(|(idx, pm)| { - pm.pattern - .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) - .then_some(idx) - }) - } - - fn strip_base_handle_recompute_basename_pos<'a>( - &self, - relative_path: &'a BStr, - basename_pos: Option, - ) -> Option<(&'a BStr, Option)> { - match self.base.as_deref() { - Some(base) => ( - relative_path.strip_prefix(base.as_slice())?.as_bstr(), - basename_pos.and_then(|pos| { - let pos = pos - base.len(); - (pos != 0).then_some(pos) - }), - ), - None => (relative_path, basename_pos), - } - .into() - } -} - -impl PatternList { - /// Parse a list of patterns, using slashes as path separators - pub fn from_overrides(patterns: impl IntoIterator>) -> Self { - PatternList { - patterns: patterns - .into_iter() - .map(Into::into) - .enumerate() - .filter_map(|(seq_id, pattern)| { - let pattern = gix_path::try_into_bstr(PathBuf::from(pattern)).ok()?; - gix_glob::parse(pattern.as_ref()).map(|p| PatternMapping { - pattern: p, - value: (), - sequence_number: seq_id, - }) - }) - .collect(), - source: None, - base: None, - } - } -} diff --git a/gix-attributes/src/name.rs b/gix-attributes/src/name.rs index 03064dbda97..40d86fd4cec 100644 --- a/gix-attributes/src/name.rs +++ b/gix-attributes/src/name.rs @@ -1,4 +1,5 @@ -use bstr::BString; +use bstr::{BStr, BString, ByteSlice}; +use kstring::KStringRef; use crate::{Name, NameRef}; @@ -10,13 +11,32 @@ impl<'a> NameRef<'a> { /// Return the inner `str`. pub fn as_str(&self) -> &str { - self.0 + self.0.as_str() } } impl AsRef for NameRef<'_> { fn as_ref(&self) -> &str { - self.0 + self.0.as_ref() + } +} + +impl<'a> TryFrom<&'a BStr> for NameRef<'a> { + type Error = Error; + + fn try_from(attr: &'a BStr) -> Result { + fn attr_valid(attr: &BStr) -> bool { + if attr.first() == Some(&b'-') { + return false; + } + + attr.bytes() + .all(|b| matches!(b, b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9')) + } + + attr_valid(attr) + .then(|| NameRef(KStringRef::from_ref(attr.to_str().expect("no illformed utf8")))) + .ok_or_else(|| Error { attribute: attr.into() }) } } diff --git a/gix-attributes/src/parse/attribute.rs b/gix-attributes/src/parse.rs similarity index 94% rename from gix-attributes/src/parse/attribute.rs rename to gix-attributes/src/parse.rs index 9e4b4c66e28..9c19b14f12c 100644 --- a/gix-attributes/src/parse/attribute.rs +++ b/gix-attributes/src/parse.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use bstr::{BStr, ByteSlice}; +use kstring::KStringRef; use crate::{name, AssignmentRef, Name, NameRef, StateRef}; @@ -58,12 +59,7 @@ impl<'a> Iter<'a> { } else if attr.first() == Some(&b'!') { (&attr[1..], StateRef::Unspecified) } else { - ( - attr, - possibly_value - .map(|v| StateRef::Value(v.as_bstr())) - .unwrap_or(StateRef::Set), - ) + (attr, possibly_value.map(StateRef::from_bytes).unwrap_or(StateRef::Set)) }; Ok(AssignmentRef::new(check_attr(attr)?, state)) } @@ -80,7 +76,7 @@ fn check_attr(attr: &BStr) -> Result, name::Error> { } attr_valid(attr) - .then(|| NameRef(attr.to_str().expect("no illformed utf8"))) + .then(|| NameRef(KStringRef::from_ref(attr.to_str().expect("no illformed utf8")))) .ok_or_else(|| name::Error { attribute: attr.into() }) } @@ -93,6 +89,7 @@ impl<'a> Iterator for Iter<'a> { } } +/// Instantiation impl<'a> Lines<'a> { /// Create a new instance to parse all attributes in all lines of the input `bytes`. pub fn new(bytes: &'a [u8]) -> Self { @@ -145,11 +142,11 @@ fn parse_line(line: &BStr, line_number: usize) -> Option, let kind_res = match line.strip_prefix(b"[attr]") { Some(macro_name) => check_attr(macro_name.into()) - .map(|name| Kind::Macro(name.to_owned())) .map_err(|err| Error::MacroName { line_number, macro_name: err.attribute, - }), + }) + .map(|name| Kind::Macro(name.to_owned())), None => { let pattern = gix_glob::Pattern::from_bytes(line.as_ref())?; if pattern.mode.contains(gix_glob::pattern::Mode::NEGATIVE) { diff --git a/gix-attributes/src/parse/ignore.rs b/gix-attributes/src/parse/ignore.rs deleted file mode 100644 index a27ee028525..00000000000 --- a/gix-attributes/src/parse/ignore.rs +++ /dev/null @@ -1,36 +0,0 @@ -use bstr::ByteSlice; - -/// An iterator over line-wise ignore patterns parsed from a buffer. -pub struct Lines<'a> { - lines: bstr::Lines<'a>, - line_no: usize, -} - -impl<'a> Lines<'a> { - /// Create a new instance from `buf` to parse ignore patterns from. - pub fn new(buf: &'a [u8]) -> Self { - let bom = unicode_bom::Bom::from(buf); - Lines { - lines: buf[bom.len()..].lines(), - line_no: 0, - } - } -} - -impl<'a> Iterator for Lines<'a> { - type Item = (gix_glob::Pattern, usize); - - fn next(&mut self) -> Option { - for line in self.lines.by_ref() { - self.line_no += 1; - if line.first() == Some(&b'#') { - continue; - } - match gix_glob::Pattern::from_bytes(line) { - None => continue, - Some(pattern) => return Some((pattern, self.line_no)), - } - } - None - } -} diff --git a/gix-attributes/src/parse/mod.rs b/gix-attributes/src/parse/mod.rs deleted file mode 100644 index 82cacc8ed02..00000000000 --- a/gix-attributes/src/parse/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -/// -pub mod ignore; - -mod attribute; -pub use attribute::{Error, Iter, Kind, Lines}; - -/// Parse git ignore patterns, line by line, from `bytes`. -pub fn ignore(bytes: &[u8]) -> ignore::Lines<'_> { - ignore::Lines::new(bytes) -} diff --git a/gix-attributes/src/search/attributes.rs b/gix-attributes/src/search/attributes.rs new file mode 100644 index 00000000000..dcc98dc48a2 --- /dev/null +++ b/gix-attributes/src/search/attributes.rs @@ -0,0 +1,205 @@ +use super::Attributes; +use crate::search::{Assignments, MetadataCollection, Outcome, TrackedAssignment, Value}; +use crate::Search; +use bstr::{BStr, ByteSlice}; +use gix_glob::search::{pattern, Pattern}; +use std::path::{Path, PathBuf}; + +/// Instantiation and initialization. +impl Search { + /// Create a search instance preloaded with *built-ins* as well as attribute `files` from various global locations. + /// See [`Source`][crate::Source] for a way to obtain these paths. + /// Note that parsing is lenient and errors are logged. + /// `buf` is used to read `files` from disk which will be ignored if they do not exist. + /// `collection` will be updated with information necessary to perform lookups later. + pub fn new_globals( + files: impl IntoIterator>, + buf: &mut Vec, + collection: &mut MetadataCollection, + ) -> std::io::Result { + let mut group = Self::default(); + group.add_patterns_buffer(b"[attr]binary -diff -merge -text", "[builtin]", None, collection); + + for path in files.into_iter() { + group.add_patterns_file(path, true, None, buf, collection)?; + } + Ok(group) + } +} + +/// Mutation +impl Search { + /// Add the given file at `source` to our patterns if it exists, otherwise do nothing. + /// Update `collection` with newly added attribute names. + /// If a `root` is provided, it's not considered a global file anymore. + /// Returns true if the file was added, or false if it didn't exist. + pub fn add_patterns_file( + &mut self, + source: impl Into, + follow_symlinks: bool, + root: Option<&Path>, + buf: &mut Vec, + collection: &mut MetadataCollection, + ) -> std::io::Result { + let was_added = gix_glob::search::add_patterns_file(&mut self.patterns, source, follow_symlinks, root, buf)?; + if was_added { + collection.update_from_list(self.patterns.last_mut().expect("just added")); + } + Ok(was_added) + } + /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they + /// are relative to. This also means that `source` is contained within `root` if `root` is provided. + pub fn add_patterns_buffer( + &mut self, + bytes: &[u8], + source: impl Into, + root: Option<&Path>, + collection: &mut MetadataCollection, + ) { + self.patterns.push(pattern::List::from_bytes(bytes, source, root)); + collection.update_from_list(self.patterns.last_mut().expect("just added")); + } +} + +/// Access and matching +impl Search { + /// Match `relative_path`, a path relative to the repository, while respective `case`-sensitivity and write them to `out` + /// Return true if at least one pattern matched. + pub fn pattern_matching_relative_path<'a, 'b>( + &'a self, + relative_path: impl Into<&'b BStr>, + case: gix_glob::pattern::Case, + out: &mut Outcome<'a>, + ) -> bool { + let relative_path = relative_path.into(); + let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); + let mut has_match = false; + self.patterns.iter().rev().any(|pl| { + has_match |= pattern_matching_relative_path(pl, relative_path, basename_pos, case, out); + out.is_done() + }); + has_match + } + + /// Return the amount of pattern lists contained in this instance. + pub fn num_pattern_lists(&self) -> usize { + self.patterns.len() + } +} + +impl Pattern for Attributes { + type Value = Value; + + fn bytes_to_patterns(bytes: &[u8], source: &std::path::Path) -> Vec> { + fn into_owned_assignments<'a>( + attrs: impl Iterator, crate::name::Error>>, + ) -> Option { + let res = attrs + .map(|res| { + res.map(|a| TrackedAssignment { + id: Default::default(), + inner: a.to_owned(), + }) + }) + .collect::>(); + match res { + Ok(res) => Some(res), + Err(err) => { + log::warn!("{}", err); + None + } + } + } + + crate::parse(bytes) + .filter_map(|res| match res { + Ok(pattern) => Some(pattern), + Err(err) => { + log::warn!("{}: {}", source.display(), err); + None + } + }) + .filter_map(|(pattern_kind, assignments, line_number)| { + let (pattern, value) = match pattern_kind { + crate::parse::Kind::Macro(macro_name) => ( + gix_glob::Pattern { + text: macro_name.as_str().into(), + mode: macro_mode(), + first_wildcard_pos: None, + }, + Value::MacroAssignments { + id: Default::default(), + assignments: into_owned_assignments(assignments)?, + }, + ), + crate::parse::Kind::Pattern(p) => ( + (!p.is_negative()).then_some(p)?, + Value::Assignments(into_owned_assignments(assignments)?), + ), + }; + pattern::Mapping { + pattern, + value, + sequence_number: line_number, + } + .into() + }) + .collect() + } + + fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool { + pattern.mode != macro_mode() + } +} + +fn macro_mode() -> gix_glob::pattern::Mode { + gix_glob::pattern::Mode::all() +} + +/// Append all matches of patterns matching `relative_path` to `out`, +/// providing a pre-computed `basename_pos` which is the starting position of the basename of `relative_path`. +/// `case` specifies whether cases should be folded during matching or not. +/// `is_dir` is true if `relative_path` is a directory. +/// Return `true` if at least one pattern matched. +#[allow(unused_variables)] +fn pattern_matching_relative_path<'a>( + list: &'a gix_glob::search::pattern::List, + relative_path: &BStr, + basename_pos: Option, + case: gix_glob::pattern::Case, + out: &mut Outcome<'a>, +) -> bool { + let (relative_path, basename_start_pos) = + match list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case) { + Some(r) => r, + None => return false, + }; + let cur_len = out.remaining(); + 'outer: for pattern::Mapping { + pattern, + value, + sequence_number, + } in list + .patterns + .iter() + .rev() + .filter(|pm| Attributes::may_use_glob_pattern(&pm.pattern)) + { + let value: &Value = value; + let attrs = match value { + Value::MacroAssignments { .. } => { + unreachable!("we can't match on macros as they have no pattern") + } + Value::Assignments(attrs) => attrs, + }; + if out.has_unspecified_attributes(attrs.iter().map(|attr| attr.id)) + && pattern.matches_repo_relative_path(relative_path, basename_start_pos, None, case) + { + let all_filled = out.fill_attributes(attrs.iter(), pattern, list.source.as_deref(), *sequence_number); + if all_filled { + break 'outer; + } + } + } + cur_len != out.remaining() +} diff --git a/gix-attributes/src/search/mod.rs b/gix-attributes/src/search/mod.rs new file mode 100644 index 00000000000..e9c58a1d6d8 --- /dev/null +++ b/gix-attributes/src/search/mod.rs @@ -0,0 +1,141 @@ +use kstring::KString; +use smallvec::SmallVec; +use std::collections::HashMap; + +use crate::Assignment; + +mod attributes; +mod outcome; + +/// A typically sized list of attributes. +pub type Assignments = SmallVec<[TrackedAssignment; AVERAGE_NUM_ATTRS]>; + +/// A value of a [pattern mapping][gix_glob::search::pattern::Mapping], +/// which is either a macro definition or a set of attributes. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum Value { + /// A macro, whose name resolves to the contained assignments. Note that the name is the pattern of the mapping itself. + MacroAssignments { + /// The id of the macro itself, which is both an attribute as well as a set of additional attributes into which the macro + /// resolves + id: AttributeId, + /// The attributes or assignments that the macro resolves to. + assignments: Assignments, + }, + /// A set of assignments which are the attributes themselves. + Assignments(Assignments), +} + +/// A way to have an assignment (`attr=value`) but also associated it with an id that allows perfect mapping +/// to tracking information. +/// Note that the order is produced after the files are parsed as global ordering is needed that goes beyond the scope of a +/// single `Search` instance. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct TrackedAssignment { + /// The order of the assignment. + pub id: AttributeId, + /// The actual assignment information. + pub inner: Assignment, +} + +/// An implementation of the [`Pattern`][gix_glob::search::Pattern] trait for attributes. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Attributes; + +/// Describes a matching pattern with +#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Match<'a> { + /// The glob pattern itself, like `/target/*`. + pub pattern: &'a gix_glob::Pattern, + /// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern. + pub assignment: Assignment, + /// Additional information about the kind of match. + pub kind: MatchKind, + /// Information about the location of the match. + pub location: MatchLocation<'a>, +} + +/// Describes in which what file and line the match was found. +#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct MatchLocation<'a> { + /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. + pub source: Option<&'a std::path::Path>, + /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. + pub sequence_number: usize, +} + +/// The kind of attribute within the context of a [match][Match]. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub enum MatchKind { + /// A attribute. + Attribute { + /// The location of the macro which referred to it the list with all in-order attributes and macros, or `None` if + /// this is attribute wasn't resolved. + /// + /// Use [`Outcome::match_by_id()`] to retrieve the macro. + macro_id: Option, + }, + /// The attribute is a macro, which will resolve into one or more attributes or macros. + Macro { + /// The location of the parent macro which referred to this one in the list with all in-order attributes and macros, + /// or `None` if this is macro wasn't resolved by another one. + /// + /// Use [`Outcome::match_by_id()`] to retrieve the parent. + parent_macro_id: Option, + }, +} + +/// The result of a search, containing all matching attributes. +#[derive(Default)] +pub struct Outcome<'pattern> { + /// The list of all available attributes, by ascending order. Each slots index corresponds to an attribute with that order, i.e. + /// `arr[attr.id] = `. + /// + /// This list needs to be up-to-date with the search group so all possible attribute names are known. + matches_by_id: Vec>, + /// A stack of attributes to use for processing attributes of matched patterns and for resolving their macros. + attrs_stack: SmallVec<[(AttributeId, Assignment, Option); 8]>, + /// A set of attributes we should limit ourselves to, or empty if we should fill in all attributes, made of + selected: SmallVec<[(KString, Option); AVERAGE_NUM_ATTRS]>, + /// The amount of attributes that still need to be set, or `None` if this outcome is consumed which means it + /// needs to be re-initialized. + remaining: Option, +} + +#[derive(Default, Clone)] +struct Slot<'pattern> { + r#match: Option>, + /// A list of all assignments, being an empty list for non-macro attributes, or all assignments (with order) for macros. + /// It's used to resolve macros. + macro_attributes: Assignments, +} + +/// A type to denote an id of an attribute assignment for uniquely identifying each attribute or assignment. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +pub struct AttributeId(pub usize); + +impl Default for AttributeId { + fn default() -> Self { + AttributeId(usize::MAX) + } +} + +/// A utility type to collect metadata for each attribute, unified by its name. +#[derive(Clone, Debug, Default)] +pub struct MetadataCollection { + /// A mapping of an attribute or macro name to its order, that is the time when it was *first* seen. + /// + /// This is the inverse of the order attributes are searched. + name_to_meta: HashMap, +} + +/// Metadata associated with an attribute or macro name. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Metadata { + /// The id to uniquely identify an attribute in the [MetadataCollection]. + pub id: AttributeId, + /// If non-zero in length, this entry belongs to a macro which resolves to these attribute names. + pub macro_attributes: Assignments, +} + +const AVERAGE_NUM_ATTRS: usize = 3; diff --git a/gix-attributes/src/search/outcome.rs b/gix-attributes/src/search/outcome.rs new file mode 100644 index 00000000000..3afc93f05d8 --- /dev/null +++ b/gix-attributes/src/search/outcome.rs @@ -0,0 +1,313 @@ +use crate::search::{ + Assignments, AttributeId, Attributes, Match, MatchKind, MatchLocation, Metadata, MetadataCollection, Outcome, + TrackedAssignment, Value, +}; +use crate::{Assignment, NameRef, State}; +use bstr::{BString, ByteSlice}; +use gix_glob::Pattern; +use kstring::{KString, KStringRef}; +use std::borrow::Cow; +use std::path::Path; + +/// Initialization +impl<'pattern> Outcome<'pattern> { + /// Initialize this instance to collect outcomes for all names in `collection`, which represents all possible attributes + /// or macros we may visit. + /// + /// This must be called after each time `collection` changes. + pub fn initialize(&mut self, collection: &MetadataCollection) { + if self.matches_by_id.len() != collection.name_to_meta.len() { + let global_num_attrs = collection.name_to_meta.len(); + + self.matches_by_id.resize(global_num_attrs, Default::default()); + + // NOTE: This works only under the assumption that macros remain defined. + for (order, macro_attributes) in collection.iter().filter_map(|(_, meta)| { + (!meta.macro_attributes.is_empty()).then_some((meta.id.0, &meta.macro_attributes)) + }) { + self.matches_by_id[order].macro_attributes = macro_attributes.clone() + } + } + self.reset(); + } + + /// Like [`initialize()`][Self::initialize()], but limits the set of attributes to look for and fill in + /// to `attribute_names`. + /// Users of this instance should prefer to limit their search as this would allow it to finish earlier. + /// + /// Note that `attribute_names` aren't validated to be valid names here, as invalid names definitely will always be unspecified. + pub fn initialize_with_selection<'a>( + &mut self, + collection: &MetadataCollection, + attribute_names: impl IntoIterator>>, + ) { + self.initialize(collection); + + self.selected.clear(); + self.selected.extend(attribute_names.into_iter().map(|name| { + let name = name.into(); + ( + name.to_owned(), + collection.name_to_meta.get(name.as_str()).map(|meta| meta.id), + ) + })); + self.reset_remaining(); + } + + /// Prepare for a new search over the known set of attributes by resetting our state. + pub fn reset(&mut self) { + self.matches_by_id.iter_mut().for_each(|item| item.r#match = None); + self.attrs_stack.clear(); + self.reset_remaining(); + } + + fn reset_remaining(&mut self) { + self.remaining = Some(if self.selected.is_empty() { + self.matches_by_id.len() + } else { + self.selected.iter().filter(|(_name, id)| id.is_some()).count() + }); + } +} + +/// Access +impl<'pattern> Outcome<'pattern> { + /// Return an iterator over all filled attributes we were initialized with. + /// + /// ### Note + /// + /// If [`initialize_with_selection`][Self::initialize_with_selection()] was used, + /// use [`iter_selected()`][Self::iter_selected()] instead. + /// + /// ### Deviation + /// + /// It's possible that the order in which the attribute are returned (if not limited to a set of attributes) isn't exactly + /// the same as what `git` provides. + /// Ours is in order of declaration, whereas `git` seems to list macros first somehow. Since the values are the same, this + /// shouldn't be an issue. + pub fn iter<'a>(&'a self) -> impl Iterator> + 'a { + self.matches_by_id.iter().filter_map(|item| item.r#match.as_ref()) + } + + /// Iterate over all matches of the attribute selection in their original order. + pub fn iter_selected<'a>(&'a self) -> impl Iterator>> + 'a { + static DUMMY: Pattern = Pattern { + text: BString::new(Vec::new()), + mode: gix_glob::pattern::Mode::empty(), + first_wildcard_pos: None, + }; + self.selected.iter().map(|(name, id)| { + id.and_then(|id| self.matches_by_id[id.0].r#match.as_ref()) + .map(Cow::Borrowed) + .unwrap_or_else(|| { + Cow::Owned(Match { + pattern: &DUMMY, + assignment: Assignment { + name: NameRef::try_from(name.as_bytes().as_bstr()) + .unwrap_or_else(|_| NameRef("invalid".into())) + .to_owned(), + state: State::Unspecified, + }, + kind: MatchKind::Attribute { macro_id: None }, + location: MatchLocation { + source: None, + sequence_number: 0, + }, + }) + }) + }) + } + + /// Obtain a match by the order of its attribute, if the order exists in our initialized attribute list and there was a match. + pub fn match_by_id(&self, id: AttributeId) -> Option<&Match<'pattern>> { + self.matches_by_id.get(id.0).and_then(|m| m.r#match.as_ref()) + } +} + +/// Mutation +impl<'pattern> Outcome<'pattern> { + /// Fill all `attrs` and resolve them recursively if they are macros. Return `true` if there is no attribute left to be resolved and + /// we are totally done. + /// `pattern` is what matched a patch and is passed for contextual information, + /// providing `sequence_number` and `source` as well. + pub(crate) fn fill_attributes<'a>( + &mut self, + attrs: impl Iterator, + pattern: &'pattern gix_glob::Pattern, + source: Option<&'pattern Path>, + sequence_number: usize, + ) -> bool { + self.attrs_stack.extend(attrs.filter_map(|attr| { + self.matches_by_id[attr.id.0] + .r#match + .is_none() + .then(|| (attr.id, attr.inner.clone(), None)) + })); + while let Some((id, assignment, parent_order)) = self.attrs_stack.pop() { + let slot = &mut self.matches_by_id[id.0]; + if slot.r#match.is_some() { + continue; + } + // Let's be explicit - this is only non-empty for macros. + let is_macro = !slot.macro_attributes.is_empty(); + + slot.r#match = Some(Match { + pattern, + assignment: assignment.to_owned(), + kind: if is_macro { + MatchKind::Macro { + parent_macro_id: parent_order, + } + } else { + MatchKind::Attribute { macro_id: parent_order } + }, + location: MatchLocation { + source, + sequence_number, + }, + }); + if self.reduce_and_check_if_done(id) { + return true; + } + + if is_macro { + // TODO(borrowchk): one fine day we should be able to re-borrow `slot` without having to redo the array access. + let slot = &self.matches_by_id[id.0]; + self.attrs_stack.extend(slot.macro_attributes.iter().filter_map(|attr| { + self.matches_by_id[attr.id.0] + .r#match + .is_none() + .then(|| (attr.id, attr.inner.clone(), Some(id))) + })); + } + } + false + } +} + +impl<'attr> Outcome<'attr> { + /// Given a list of `attrs` by order, return true if at least one of them is not set + pub(crate) fn has_unspecified_attributes(&self, mut attrs: impl Iterator) -> bool { + attrs.any(|order| self.matches_by_id[order.0].r#match.is_none()) + } + /// Return the amount of attributes haven't yet been found. + /// + /// If this number reaches 0, then the search can be stopped as there is nothing more to fill in. + pub(crate) fn remaining(&self) -> usize { + self.remaining + .expect("BUG: instance must be initialized for each search set") + } + + /// Return true if there is nothing more to be done as all attributes were filled. + pub(crate) fn is_done(&self) -> bool { + self.remaining() == 0 + } + + fn reduce_and_check_if_done(&mut self, attr: AttributeId) -> bool { + if self.selected.is_empty() + || self + .selected + .iter() + .any(|(_name, id)| id.map_or(false, |id| id == attr)) + { + *self.remaining.as_mut().expect("initialized") -= 1; + } + self.is_done() + } +} + +/// Mutation +impl MetadataCollection { + /// Assign order ids to each attribute either in macros (along with macros themselves) or attributes of patterns, and store + /// them in this collection. + /// + /// Must be called before querying matches. + pub fn update_from_list(&mut self, list: &mut gix_glob::search::pattern::List) { + for pattern in &mut list.patterns { + match &mut pattern.value { + Value::MacroAssignments { id: order, assignments } => { + *order = self.id_for_macro( + pattern + .pattern + .text + .to_str() + .expect("valid macro names are always UTF8 and this was verified"), + assignments, + ); + } + Value::Assignments(assignments) => { + self.assign_order_to_attributes(assignments); + } + } + } + } +} + +/// Access +impl MetadataCollection { + /// Return an iterator over the contents of the map in an easy-to-consume form. + pub fn iter(&self) -> impl Iterator { + self.name_to_meta.iter().map(|(k, v)| (k.as_str(), v)) + } +} + +impl MetadataCollection { + pub(crate) fn id_for_macro(&mut self, name: &str, attrs: &mut Assignments) -> AttributeId { + let order = match self.name_to_meta.get_mut(name) { + Some(meta) => meta.id, + None => { + let order = AttributeId(self.name_to_meta.len()); + self.name_to_meta.insert( + KString::from_ref(name), + Metadata { + id: order, + macro_attributes: Default::default(), + }, + ); + order + } + }; + + self.assign_order_to_attributes(attrs); + self.name_to_meta.get_mut(name).expect("just added").macro_attributes = attrs.clone(); + + order + } + pub(crate) fn id_for_attribute(&mut self, name: &str) -> AttributeId { + match self.name_to_meta.get(name) { + Some(meta) => meta.id, + None => { + let order = AttributeId(self.name_to_meta.len()); + self.name_to_meta.insert(KString::from_ref(name), order.into()); + order + } + } + } + pub(crate) fn assign_order_to_attributes(&mut self, attributes: &mut [TrackedAssignment]) { + for TrackedAssignment { + id: order, + inner: crate::Assignment { name, .. }, + } in attributes + { + *order = self.id_for_attribute(&name.0); + } + } +} + +impl From for Metadata { + fn from(order: AttributeId) -> Self { + Metadata { + id: order, + macro_attributes: Default::default(), + } + } +} + +impl MatchKind { + /// return the id of the macro that resolved us, or `None` if that didn't happen. + pub fn source_id(&self) -> Option { + match self { + MatchKind::Attribute { macro_id: id } | MatchKind::Macro { parent_macro_id: id } => *id, + } + } +} diff --git a/gix-attributes/src/source.rs b/gix-attributes/src/source.rs new file mode 100644 index 00000000000..566e291b2fe --- /dev/null +++ b/gix-attributes/src/source.rs @@ -0,0 +1,28 @@ +use crate::Source; +use std::borrow::Cow; +use std::ffi::OsString; +use std::path::Path; + +impl Source { + /// Produce a storage location for the this source while potentially querying environment variables using `env_var()`, + /// or `None` if the storage location could not be obtained. + /// + /// Note that local sources are returned as relative paths to be joined with the base in a separate step. + pub fn storage_location(self, env_var: &mut dyn FnMut(&str) -> Option) -> Option> { + use Source::*; + Some(match self { + GitInstallation => gix_path::env::installation_config_prefix()? + .join("gitattributes") + .into(), + System => { + if env_var("GIT_ATTR_NOSYSTEM").is_some() { + return None; + } else { + gix_path::env::system_prefix()?.join("etc/gitattributes").into() + } + } + Git => return gix_path::env::xdg_config("attributes", env_var).map(Cow::Owned), + Local => Cow::Borrowed(Path::new("info/attributes")), + }) + } +} diff --git a/gix-attributes/src/state.rs b/gix-attributes/src/state.rs index 02dc8ee0da1..cde0e6a0d14 100644 --- a/gix-attributes/src/state.rs +++ b/gix-attributes/src/state.rs @@ -1,7 +1,85 @@ -use bstr::ByteSlice; +use bstr::{BStr, ByteSlice}; +use kstring::{KString, KStringRef}; use crate::{State, StateRef}; +/// A container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Value(KString); + +/// A reference container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct ValueRef<'a>(#[cfg_attr(feature = "serde1", serde(borrow))] KStringRef<'a>); + +/// Conversions +impl<'a> ValueRef<'a> { + /// Keep `input` as our value. + pub fn from_bytes(input: &'a [u8]) -> Self { + Self(KStringRef::from_ref( + // SAFETY: our API makes accessing that value as `str` impossible, so illformed UTF8 is never exposed as such. + #[allow(unsafe_code)] + unsafe { + std::str::from_utf8_unchecked(input) + }, + )) + } + + /// Access this value as byte string. + pub fn as_bstr(&self) -> &BStr { + self.0.as_bytes().as_bstr() + } + + /// Convert this instance into its owned form. + pub fn to_owned(self) -> Value { + self.into() + } +} + +impl<'a> From<&'a str> for ValueRef<'a> { + fn from(v: &'a str) -> Self { + ValueRef(v.into()) + } +} + +impl<'a> From> for Value { + fn from(v: ValueRef<'a>) -> Self { + Value(v.0.into()) + } +} + +impl From<&str> for Value { + fn from(v: &str) -> Self { + Value(KString::from_ref(v)) + } +} + +/// Access +impl Value { + /// Return ourselves as reference. + pub fn as_ref(&self) -> ValueRef<'_> { + ValueRef(self.0.as_ref()) + } +} + +/// Access +impl State { + /// Return `true` if the associated attribute was set to be unspecified using the `!attr` prefix or it wasn't mentioned. + pub fn is_unspecified(&self) -> bool { + matches!(self, State::Unspecified) + } +} + +/// Initialization +impl<'a> StateRef<'a> { + /// Keep `input` in one of our enums. + pub fn from_bytes(input: &'a [u8]) -> Self { + Self::Value(ValueRef::from_bytes(input)) + } +} + +/// Access impl<'a> StateRef<'a> { /// Turn ourselves into our owned counterpart. pub fn to_owned(self) -> State { @@ -13,7 +91,7 @@ impl<'a> State { /// Turn ourselves into our ref-type. pub fn as_ref(&'a self) -> StateRef<'a> { match self { - State::Value(v) => StateRef::Value(v.as_bytes().as_bstr()), + State::Value(v) => StateRef::Value(v.as_ref()), State::Set => StateRef::Set, State::Unset => StateRef::Unset, State::Unspecified => StateRef::Unspecified, @@ -24,7 +102,7 @@ impl<'a> State { impl<'a> From> for State { fn from(s: StateRef<'a>) -> Self { match s { - StateRef::Value(v) => State::Value(v.to_str().expect("no illformed unicode").into()), + StateRef::Value(v) => State::Value(v.into()), StateRef::Set => State::Set, StateRef::Unset => State::Unset, StateRef::Unspecified => State::Unspecified, diff --git a/gix-attributes/tests/attributes.rs b/gix-attributes/tests/attributes.rs index aba357837be..74b3f9bb249 100644 --- a/gix-attributes/tests/attributes.rs +++ b/gix-attributes/tests/attributes.rs @@ -1,3 +1,4 @@ pub use gix_testtools::Result; -mod match_group; mod parse; +mod search; +mod state; diff --git a/gix-attributes/tests/fixtures/generated-archives/.gitignore b/gix-attributes/tests/fixtures/generated-archives/.gitignore new file mode 100644 index 00000000000..fe98b6aba80 --- /dev/null +++ b/gix-attributes/tests/fixtures/generated-archives/.gitignore @@ -0,0 +1 @@ +make_attributes_baseline.tar.xz diff --git a/gix-attributes/tests/fixtures/generated-archives/make_global_and_external_and_dir_ignores.tar.xz b/gix-attributes/tests/fixtures/generated-archives/make_global_and_external_and_dir_ignores.tar.xz deleted file mode 100644 index ec3de4c93d3..00000000000 --- a/gix-attributes/tests/fixtures/generated-archives/make_global_and_external_and_dir_ignores.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b424f371fffe6557669ecaccd3e944e23c1efcd5764b017b5d6709058f6210b -size 10536 diff --git a/gix-attributes/tests/fixtures/ignore/various.txt b/gix-attributes/tests/fixtures/ignore/various.txt deleted file mode 100644 index 214e46f2d54..00000000000 --- a/gix-attributes/tests/fixtures/ignore/various.txt +++ /dev/null @@ -1,14 +0,0 @@ -# ignore objects and archives, anywhere in the tree. -*.[oa] - -# ignore generated html files, -*.html - -# except foo.html which is maintained by hand -!foo.html - -# exclude everything except directory foo/bar -/* -!/foo -/foo/* -!/foo/bar diff --git a/gix-attributes/tests/fixtures/make_attributes_baseline.sh b/gix-attributes/tests/fixtures/make_attributes_baseline.sh new file mode 100644 index 00000000000..ce48ebe1abe --- /dev/null +++ b/gix-attributes/tests/fixtures/make_attributes_baseline.sh @@ -0,0 +1,130 @@ +#!/bin/bash +set -eu -o pipefail + +mkdir basics; + +function baseline() { + { + echo "$1" + GIT_ATTR_NOSYSTEM=1 git -c core.attributesFile=$PWD/user.attributes check-attr -a "$1" + echo + } >> baseline +} + + +(cd basics + git init + + # based on https://github.com/git/git/blob/140b9478dad5d19543c1cb4fd293ccec228f1240/t/t0003-attributes.sh#L45 + mkdir -p a/b/d a/c b + ( + echo "[attr]notest !test" + echo "\" d \" test=d" + echo " e test=e" + echo " e\" test=e" + echo "f test=f" + echo "a/i test=a/i" + echo "onoff test -test" + echo "offon -test test" + echo "no notest" + echo "A/e/F test=A/e/F" + echo "\!escaped test-escaped" + echo "**/recursive test-double-star-slash" + echo "a**f test-double-star-no-slash" + echo "dir-slash/ never" + echo "dir/** always" + ) > .gitattributes + ( + echo "g test=a/g" + echo "b/g test=a/b/g" + ) > a/.gitattributes + ( + echo "h test=a/b/h" + echo "d/* test=a/b/d/*" + echo "d/yes notest" + ) > a/b/.gitattributes + ( + echo "global test=global" + echo "z/x/a global-no-wildcard-case-test" + echo "z/x/* global-wildcard-case-test" + ) > user.attributes + + git add . && git commit -qm c1 + + baseline z/x/a + baseline Z/x/a + baseline z/x/A + baseline Z/X/a + baseline Z/x/a + baseline " d " + baseline e + baseline f + baseline dir-slash + baseline dir-slash/a + baseline dir + baseline dir/a + baseline recursive + baseline a/recursive + baseline a/b/recursive + baseline a/b/c/recursive + baseline "!escaped" + baseline af + baseline axf + baseline a/b/d/no + baseline a/e/f + baseline a/f + baseline a/b/d/g + baseline a/B/D/g + baseline b/g + baseline a/c/f + baseline "e\"" + baseline a/i + baseline A/b/h + baseline A/B/D/NO + baseline subdir/a/i + baseline onoff + baseline offon + baseline no + baseline A/e/F + baseline a/e/F + baseline a/e/f + baseline a/g + baseline a/b/g + baseline a/b/h + baseline a/b/d/ANY + baseline a/b/d/yes + baseline global +) + +mkdir lookup-order +(cd lookup-order + + function baseline_selected() { + local path=${1?first argument is the path to match} + shift + { + echo "$path" + git -c core.attributesFile=$PWD/user.attributes check-attr $@ -- "$path" + echo + } >> baseline.selected + } + + git init + cat < user.attributes +[attr]my-text text +[attr]my-binary binary + +* location=user +EOF + cat < .gitattributes +[attr]b-cycle a-cycle my-text +[attr]a-cycle b-cycle my-binary +[attr]recursive recursively-assigned-attr +[attr]my-binary binary macro-overridden recursive + +* other a-cycle +* -other b-cycle +EOF + baseline any + baseline_selected any my-binary recursive unspecified +) diff --git a/gix-attributes/tests/fixtures/make_global_and_external_and_dir_ignores.sh b/gix-attributes/tests/fixtures/make_global_and_external_and_dir_ignores.sh deleted file mode 100644 index 195d47f4886..00000000000 --- a/gix-attributes/tests/fixtures/make_global_and_external_and_dir_ignores.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -cat <user.exclude -# a custom exclude configured per user -user-file-anywhere -/user-file-from-top - -user-dir-anywhere/ -/user-dir-from-top - -user-subdir/file -**/user-subdir-anywhere/file -EOF - -mkdir repo; -(cd repo - git init -q - git config core.excludesFile ../user.exclude - - cat <.git/info/exclude -# a sample .git/info/exclude -file-anywhere -/file-from-top - -dir-anywhere/ -/dir-from-top - -subdir/file -**/subdir-anywhere/file -EOF - - cat <.gitignore -# a sample .gitignore -top-level-local-file-anywhere -EOF - - mkdir dir-with-ignore - cat <dir-with-ignore/.gitignore -# a sample .gitignore -sub-level-local-file-anywhere -EOF - - git add .gitignore dir-with-ignore - git commit --allow-empty -m "init" - - mkdir user-dir-anywhere user-dir-from-top dir-anywhere dir-from-top - mkdir -p dir/user-dir-anywhere dir/dir-anywhere - - git check-ignore -vn --stdin 2>&1 <git-check-ignore.baseline || : -user-file-anywhere -dir/user-file-anywhere -user-file-from-top -no-match/user-file-from-top -user-dir-anywhere -user-dir-from-top -no-match/user-dir-from-top -user-subdir/file -subdir/user-subdir-anywhere/file -file-anywhere -dir/file-anywhere -file-from-top -no-match/file-from-top -dir-anywhere -dir/dir-anywhere -dir-from-top -no-match/dir-from-top -subdir/file -subdir/subdir-anywhere/file -top-level-local-file-anywhere -dir/top-level-local-file-anywhere -no-match/sub-level-local-file-anywhere -dir-with-ignore/sub-level-local-file-anywhere -dir-with-ignore/sub-dir/sub-level-local-file-anywhere -EOF - -) diff --git a/gix-attributes/tests/match_group/ignore.rs b/gix-attributes/tests/match_group/ignore.rs deleted file mode 100644 index e278fdedc09..00000000000 --- a/gix-attributes/tests/match_group/ignore.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::io::Read; - -use bstr::{BStr, ByteSlice}; -use gix_attributes::{Ignore, Match, MatchGroup}; -use gix_glob::pattern::Case; - -struct Expectations<'a> { - lines: bstr::Lines<'a>, -} - -impl<'a> Iterator for Expectations<'a> { - type Item = (&'a BStr, Option<(&'a BStr, usize, &'a BStr)>); - - fn next(&mut self) -> Option { - let line = self.lines.next()?; - let (left, value) = line.split_at(line.find_byte(b'\t').unwrap()); - let value = value[1..].as_bstr(); - - let source_and_line = if left == b"::" { - None - } else { - let mut tokens = left.split(|b| *b == b':'); - let source = tokens.next().unwrap().as_bstr(); - let line_number: usize = tokens.next().unwrap().to_str_lossy().parse().ok().unwrap(); - let pattern = tokens.next().unwrap().as_bstr(); - Some((source, line_number, pattern)) - }; - Some((value, source_and_line)) - } -} - -#[test] -fn from_git_dir() -> crate::Result { - let dir = gix_testtools::scripted_fixture_read_only("make_global_and_external_and_dir_ignores.sh")?; - let repo_dir = dir.join("repo"); - let git_dir = repo_dir.join(".git"); - let baseline = std::fs::read(git_dir.parent().unwrap().join("git-check-ignore.baseline"))?; - let mut buf = Vec::new(); - let mut group = MatchGroup::from_git_dir(git_dir, Some(dir.join("user.exclude")), &mut buf)?; - - assert!( - !group.add_patterns_file("not-a-file", false, None, &mut buf)?, - "missing files are no problem and cause a negative response" - ); - assert!( - group.add_patterns_file(repo_dir.join(".gitignore"), true, repo_dir.as_path().into(), &mut buf)?, - "existing files return true" - ); - - buf.clear(); - let ignore_file = repo_dir.join("dir-with-ignore").join(".gitignore"); - std::fs::File::open(&ignore_file)?.read_to_end(&mut buf)?; - group.add_patterns_buffer(&buf, ignore_file, repo_dir.as_path().into()); - - for (path, source_and_line) in (Expectations { - lines: baseline.lines(), - }) { - let actual = group.pattern_matching_relative_path( - path, - repo_dir - .join(path.to_str_lossy().as_ref()) - .metadata() - .ok() - .map(|m| m.is_dir()), - Case::Sensitive, - ); - match (actual, source_and_line) { - ( - Some(Match { - sequence_number, - pattern: _, - source, - value: _, - }), - Some((expected_source, line, _expected_pattern)), - ) => { - assert_eq!(sequence_number, line, "our counting should match the one used in git"); - assert_eq!( - source.map(|p| p.canonicalize().unwrap()), - Some(repo_dir.join(expected_source.to_str_lossy().as_ref()).canonicalize()?) - ); - } - (None, None) => {} - (actual, expected) => panic!("actual {actual:?} should match {expected:?} with path '{path}'"), - } - } - Ok(()) -} - -#[test] -fn from_overrides() { - let input = ["simple", "pattern/"]; - let group = gix_attributes::MatchGroup::::from_overrides(input); - assert_eq!( - group.pattern_matching_relative_path("Simple", None, gix_glob::pattern::Case::Fold), - Some(pattern_to_match(&gix_glob::parse("simple").unwrap(), 0)) - ); - assert_eq!( - group.pattern_matching_relative_path("pattern", Some(true), gix_glob::pattern::Case::Sensitive), - Some(pattern_to_match(&gix_glob::parse("pattern/").unwrap(), 1)) - ); - assert_eq!(group.patterns.len(), 1); - assert_eq!( - gix_attributes::PatternList::::from_overrides(input), - group.patterns.into_iter().next().unwrap() - ); -} - -fn pattern_to_match(pattern: &gix_glob::Pattern, sequence_number: usize) -> Match<'_, ()> { - Match { - pattern, - value: &(), - source: None, - sequence_number, - } -} diff --git a/gix-attributes/tests/match_group/mod.rs b/gix-attributes/tests/match_group/mod.rs deleted file mode 100644 index 5ae31c89f66..00000000000 --- a/gix-attributes/tests/match_group/mod.rs +++ /dev/null @@ -1 +0,0 @@ -mod ignore; diff --git a/gix-attributes/tests/parse/attribute.rs b/gix-attributes/tests/parse/attribute.rs deleted file mode 100644 index 16d2c99b0a2..00000000000 --- a/gix-attributes/tests/parse/attribute.rs +++ /dev/null @@ -1,317 +0,0 @@ -use bstr::{BString, ByteSlice}; -use gix_attributes::{parse, StateRef}; -use gix_glob::pattern::Mode; -use gix_testtools::fixture_bytes; - -#[test] -fn byte_order_marks_are_no_patterns() { - assert_eq!( - line("\u{feff}hello"), - (pattern(r"hello", Mode::NO_SUB_DIR, None), vec![], 1) - ); - assert_eq!( - line("\u{feff}\"hello\""), - (pattern(r"hello", Mode::NO_SUB_DIR, None), vec![], 1) - ); -} - -#[test] -fn line_numbers_are_counted_correctly() { - let input = fixture_bytes("attributes/various.txt"); - assert_eq!( - try_lines(&String::from_utf8(input).unwrap()).unwrap(), - vec![ - (pattern(r"*.[oa]", Mode::NO_SUB_DIR, Some(0)), vec![set("c")], 2), - ( - pattern(r"*.html", Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0)), - vec![set("a"), value("b", "c")], - 5 - ), - (pattern(r"!foo.html", Mode::NO_SUB_DIR, None), vec![set("x")], 8), - (pattern(r"#a/path", Mode::empty(), None), vec![unset("a")], 10), - ( - pattern(r"*", Mode::ABSOLUTE | Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0)), - vec![unspecified("b")], - 11 - ), - ] - ); -} - -#[test] -fn line_endings_can_be_windows_or_unix() { - assert_eq!( - try_lines("unix\nwindows\r\nlast").unwrap(), - vec![ - (pattern(r"unix", Mode::NO_SUB_DIR, None), vec![], 1), - (pattern(r"windows", Mode::NO_SUB_DIR, None), vec![], 2), - (pattern(r"last", Mode::NO_SUB_DIR, None), vec![], 3) - ] - ); -} - -#[test] -fn comment_lines_are_ignored_as_well_as_empty_ones() { - assert!(gix_attributes::parse(b"# hello world").next().is_none()); - assert!(gix_attributes::parse(b"# \"hello world\"").next().is_none()); - assert!( - gix_attributes::parse(b" \t\r# \"hello world\"").next().is_none(), - "also behind leading whitespace" - ); - assert!(gix_attributes::parse(b"\n\r\n\t\t \n").next().is_none()); -} - -#[test] -fn leading_whitespace_is_ignored() { - assert_eq!(line(" \r\tp"), (pattern(r"p", Mode::NO_SUB_DIR, None), vec![], 1)); - assert_eq!(line(" \r\t\"p\""), (pattern(r"p", Mode::NO_SUB_DIR, None), vec![], 1)); -} - -#[test] -fn quotes_separate_attributes_even_without_whitespace() { - assert_eq!( - line(r#""path"a b"#), - (pattern(r"path", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1) - ); -} - -#[test] -fn comment_can_be_escaped_like_gitignore_or_quoted() { - assert_eq!( - line(r"\#hello"), - (pattern(r"#hello", Mode::NO_SUB_DIR, None), vec![], 1), - "undocumented, but definitely works" - ); - assert_eq!( - line("\"# hello\""), - (pattern(r"# hello", Mode::NO_SUB_DIR, None), vec![], 1) - ); -} - -#[test] -fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { - assert_eq!( - line(r"\!hello"), - (pattern(r"!hello", Mode::NO_SUB_DIR, None), vec![], 1) - ); - assert!(matches!( - try_line(r"!hello"), - Err(parse::Error::PatternNegation { line_number: 1, .. }) - )); - assert!( - matches!( - try_line(r#""!hello""#), - Err(parse::Error::PatternNegation { line_number: 1, .. }), - ), - "even in quotes they trigger…" - ); - assert_eq!( - line(r#""\\!hello""#), - (pattern(r"!hello", Mode::NO_SUB_DIR, None), vec![], 1), - "…and must be double-escaped, once to get through quote, then to get through parse ignore line" - ); -} - -#[test] -fn invalid_escapes_in_quotes_are_an_error() { - assert!(matches!(try_line(r#""\!hello""#), Err(parse::Error::Unquote(_)),),); -} - -#[test] -fn custom_macros_can_be_differentiated() { - let output = line(r#"[attr]foo bar -baz"#); - match output.0 { - parse::Kind::Pattern(_) => unreachable!(), - parse::Kind::Macro(name) => { - assert_eq!( - (name.as_str(), output.1, output.2), - (r"foo", vec![set("bar"), unset("baz")], 1) - ); - } - } - - let output = line(r#""[attr]foo" bar -baz"#); - match output.0 { - parse::Kind::Pattern(_) => unreachable!(), - parse::Kind::Macro(name) => { - assert_eq!( - (name.as_str(), output.1, output.2), - (r"foo", vec![set("bar"), unset("baz")], 1), - "it works after unquoting even, making it harder to denote a file name with [attr] prefix" - ); - } - } -} - -#[test] -fn custom_macros_must_be_valid_attribute_names() { - assert!(matches!( - try_line(r"[attr]-prefixdash"), - Err(parse::Error::MacroName { line_number: 1, .. }) - )); - assert!(matches!( - try_line(r"[attr]你好"), - Err(parse::Error::MacroName { line_number: 1, .. }) - )); -} - -#[test] -fn attribute_names_must_not_begin_with_dash_and_must_be_ascii_only() { - assert!(matches!( - try_line(r"p !-a"), - Err(parse::Error::AttributeName { line_number: 1, .. }) - )); - assert!( - matches!( - try_line(r#"p !!a"#), - Err(parse::Error::AttributeName { line_number: 1, .. }) - ), - "exclamation marks aren't allowed either" - ); - assert!( - matches!( - try_line(r#"p 你好"#), - Err(parse::Error::AttributeName { line_number: 1, .. }) - ), - "nor is utf-8 encoded characters - gitoxide could consider to relax this when established" - ); -} - -#[test] -fn attributes_are_parsed_behind_various_whitespace_characters() { - assert_eq!( - line(r#"p a b"#), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), - "behind space" - ); - assert_eq!( - line(r#""p" a b"#), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), - "behind space" - ); - assert_eq!( - line("p\ta\tb"), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), - "behind tab" - ); - assert_eq!( - line("\"p\"\ta\tb"), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), - "behind tab" - ); - assert_eq!( - line("p \t a \t b"), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), - "behind a mix of space and tab" - ); - assert_eq!( - line("\"p\" \t a \t b"), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), - "behind a mix of space and tab" - ); -} - -#[test] -fn attributes_come_in_different_flavors_due_to_prefixes() { - assert_eq!( - line(r#"p set -unset !unspecified -set"#), - ( - pattern("p", Mode::NO_SUB_DIR, None), - vec![set("set"), unset("unset"), unspecified("unspecified"), unset("set")], - 1 - ), - "the parser doesn't care about double-mentions either" - ); -} - -#[test] -fn attributes_can_have_values() { - assert_eq!( - line(r#"p a=one b=2 c=你好 "#), - ( - pattern("p", Mode::NO_SUB_DIR, None), - vec![value("a", "one"), value("b", "2"), value("c", "你好")], - 1 - ), - "only non-whitespace ascii values are allowed, no escaping or anything fancy is possible there" - ); -} - -#[test] -fn attributes_see_state_adjustments_over_value_assignments() { - assert_eq!( - line(r#"p set -unset=a !unspecified=b"#), - ( - pattern("p", Mode::NO_SUB_DIR, None), - vec![set("set"), unset("unset"), unspecified("unspecified")], - 1 - ) - ); -} - -#[test] -fn trailing_whitespace_in_attributes_is_ignored() { - assert_eq!( - line("p a \r\t"), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a")], 1), - ); - assert_eq!( - line("\"p\" a \r\t"), - (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a")], 1), - ); -} - -type ExpandedAttribute<'a> = (parse::Kind, Vec<(BString, gix_attributes::StateRef<'a>)>, usize); - -fn set(attr: &str) -> (BString, StateRef) { - (attr.into(), StateRef::Set) -} - -fn unset(attr: &str) -> (BString, StateRef) { - (attr.into(), StateRef::Unset) -} - -fn unspecified(attr: &str) -> (BString, StateRef) { - (attr.into(), StateRef::Unspecified) -} - -fn value<'b>(attr: &str, value: &'b str) -> (BString, StateRef<'b>) { - (attr.into(), StateRef::Value(value.as_bytes().as_bstr())) -} - -fn pattern(name: &str, flags: gix_glob::pattern::Mode, first_wildcard_pos: Option) -> parse::Kind { - parse::Kind::Pattern(gix_glob::Pattern { - text: name.into(), - mode: flags, - first_wildcard_pos, - }) -} - -fn try_line(input: &str) -> Result { - let mut lines = gix_attributes::parse(input.as_bytes()); - let res = expand(lines.next().unwrap())?; - assert!(lines.next().is_none(), "expected only one line"); - Ok(res) -} - -fn line(input: &str) -> ExpandedAttribute { - try_line(input).unwrap() -} - -fn try_lines(input: &str) -> Result, parse::Error> { - gix_attributes::parse(input.as_bytes()).map(expand).collect() -} - -fn expand( - input: Result<(parse::Kind, parse::Iter<'_>, usize), parse::Error>, -) -> Result, parse::Error> { - let (pattern, attrs, line_no) = input?; - let attrs = attrs - .map(|r| r.map(|attr| (attr.name.as_str().into(), attr.state))) - .collect::, _>>() - .map_err(|e| parse::Error::AttributeName { - attribute: e.attribute, - line_number: line_no, - })?; - Ok((pattern, attrs, line_no)) -} diff --git a/gix-attributes/tests/parse/ignore.rs b/gix-attributes/tests/parse/ignore.rs deleted file mode 100644 index 00336c5c689..00000000000 --- a/gix-attributes/tests/parse/ignore.rs +++ /dev/null @@ -1,65 +0,0 @@ -use bstr::BString; -use gix_glob::{pattern::Mode, Pattern}; -use gix_testtools::fixture_bytes; - -#[test] -fn byte_order_marks_are_no_patterns() { - assert_eq!( - flatten(gix_attributes::parse::ignore("\u{feff}hello".as_bytes()).next()), - Some((r"hello".into(), Mode::NO_SUB_DIR, 1)) - ); -} - -#[test] -fn line_numbers_are_counted_correctly() { - let input = fixture_bytes("ignore/various.txt"); - let actual: Vec<_> = gix_attributes::parse::ignore(&input).map(flat_map).collect(); - assert_eq!( - actual, - vec![ - ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), - ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), - ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), - ("*".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH | Mode::ABSOLUTE, 11), - ("foo".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR | Mode::ABSOLUTE, 12), - ("foo/*".into(), Mode::ABSOLUTE, 13), - ("foo/bar".into(), Mode::ABSOLUTE | Mode::NEGATIVE, 14) - ] - ); -} - -#[test] -fn line_endings_can_be_windows_or_unix() { - assert_eq!( - gix_attributes::parse::ignore(b"unix\nwindows\r\nlast") - .map(flat_map) - .collect::>(), - vec![ - (r"unix".into(), Mode::NO_SUB_DIR, 1), - (r"windows".into(), Mode::NO_SUB_DIR, 2), - (r"last".into(), Mode::NO_SUB_DIR, 3) - ] - ); -} - -#[test] -fn comments_are_ignored_as_well_as_empty_ones() { - assert!(gix_attributes::parse::ignore(b"# hello world").next().is_none()); - assert!(gix_attributes::parse::ignore(b"\n\r\n\t\t \n").next().is_none()); -} - -#[test] -fn backslashes_before_hashes_are_no_comments() { - assert_eq!( - flatten(gix_attributes::parse::ignore(br"\#hello").next()), - Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) - ); -} - -fn flatten(input: Option<(Pattern, usize)>) -> Option<(BString, gix_glob::pattern::Mode, usize)> { - input.map(flat_map) -} - -fn flat_map(input: (Pattern, usize)) -> (BString, gix_glob::pattern::Mode, usize) { - (input.0.text, input.0.mode, input.1) -} diff --git a/gix-attributes/tests/parse/mod.rs b/gix-attributes/tests/parse/mod.rs index 3a142578d0d..e1fcb060e06 100644 --- a/gix-attributes/tests/parse/mod.rs +++ b/gix-attributes/tests/parse/mod.rs @@ -1,2 +1,370 @@ -mod attribute; -mod ignore; +use bstr::BString; +use gix_attributes::{parse, StateRef}; +use gix_glob::pattern::Mode; +use gix_testtools::fixture_bytes; + +#[test] +fn byte_order_marks_are_no_patterns() { + assert_eq!( + line("\u{feff}hello"), + (pattern(r"hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); + assert_eq!( + line("\u{feff}\"hello\""), + (pattern(r"hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); +} + +#[test] +fn line_numbers_are_counted_correctly() { + let input = fixture_bytes("attributes/various.txt"); + assert_eq!( + try_lines(&String::from_utf8(input).unwrap()).unwrap(), + vec![ + (pattern(r"*.[oa]", Mode::NO_SUB_DIR, Some(0)), vec![set("c")], 2), + ( + pattern(r"*.html", Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0)), + vec![set("a"), value("b", "c")], + 5 + ), + (pattern(r"!foo.html", Mode::NO_SUB_DIR, None), vec![set("x")], 8), + (pattern(r"#a/path", Mode::empty(), None), vec![unset("a")], 10), + ( + pattern(r"*", Mode::ABSOLUTE | Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0)), + vec![unspecified("b")], + 11 + ), + ] + ); +} + +#[test] +fn line_endings_can_be_windows_or_unix() { + assert_eq!( + try_lines("unix\nwindows\r\nlast").unwrap(), + vec![ + (pattern(r"unix", Mode::NO_SUB_DIR, None), vec![], 1), + (pattern(r"windows", Mode::NO_SUB_DIR, None), vec![], 2), + (pattern(r"last", Mode::NO_SUB_DIR, None), vec![], 3) + ] + ); +} + +#[test] +fn comment_lines_are_ignored_as_well_as_empty_ones() { + assert!(gix_attributes::parse(b"# hello world").next().is_none()); + assert!(gix_attributes::parse(b"# \"hello world\"").next().is_none()); + assert!( + gix_attributes::parse(b" \t\r# \"hello world\"").next().is_none(), + "also behind leading whitespace" + ); + assert!(gix_attributes::parse(b"\n\r\n\t\t \n").next().is_none()); +} + +#[test] +fn leading_whitespace_is_ignored() { + assert_eq!(line(" \r\tp"), (pattern(r"p", Mode::NO_SUB_DIR, None), vec![], 1)); + assert_eq!(line(" \r\t\"p\""), (pattern(r"p", Mode::NO_SUB_DIR, None), vec![], 1)); +} + +#[test] +fn quotes_separate_attributes_even_without_whitespace() { + assert_eq!( + line(r#""path"a b"#), + (pattern(r"path", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1) + ); +} + +#[test] +fn comment_can_be_escaped_like_gitignore_or_quoted() { + assert_eq!( + line(r"\#hello"), + (pattern(r"#hello", Mode::NO_SUB_DIR, None), vec![], 1), + "undocumented, but definitely works" + ); + assert_eq!( + line("\"# hello\""), + (pattern(r"# hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); +} + +#[test] +fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { + assert_eq!( + line(r"\!hello"), + (pattern(r"!hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); + assert!(matches!( + try_line(r"!hello"), + Err(parse::Error::PatternNegation { line_number: 1, .. }) + )); + assert!(lenient_lines(r#"!hello"#).is_empty()); + assert!( + matches!( + try_line(r#""!hello""#), + Err(parse::Error::PatternNegation { line_number: 1, .. }), + ), + "even in quotes they trigger…" + ); + assert!(lenient_lines(r#""!hello""#).is_empty()); + assert_eq!( + line(r#""\\!hello""#), + (pattern(r"!hello", Mode::NO_SUB_DIR, None), vec![], 1), + "…and must be double-escaped, once to get through quote, then to get through parse ignore line" + ); +} + +#[test] +fn invalid_escapes_in_quotes_are_an_error() { + assert!(matches!(try_line(r#""\!hello""#), Err(parse::Error::Unquote(_)),),); + assert!(lenient_lines(r#""\!hello""#).is_empty()); +} + +#[test] +fn custom_macros_can_be_differentiated() { + let output = line(r#"[attr]foo bar -baz"#); + match output.0 { + parse::Kind::Pattern(_) => unreachable!(), + parse::Kind::Macro(name) => { + assert_eq!( + (name.as_str(), output.1, output.2), + (r"foo", vec![set("bar"), unset("baz")], 1) + ); + } + } + + let output = line(r#""[attr]foo" bar -baz"#); + match output.0 { + parse::Kind::Pattern(_) => unreachable!(), + parse::Kind::Macro(name) => { + assert_eq!( + (name.as_str(), output.1, output.2), + (r"foo", vec![set("bar"), unset("baz")], 1), + "it works after unquoting even, making it harder to denote a file name with [attr] prefix" + ); + } + } +} + +#[test] +fn parsing_continues_even_in_the_face_of_invalid_lines_when_using_leniency() { + assert_eq!( + lenient_lines("[attr]-bad\np good\n[attr]-bad\np good2"), + vec![ + (pattern(r"p", Mode::NO_SUB_DIR, None), vec![set("good")], 2), + (pattern(r"p", Mode::NO_SUB_DIR, None), vec![set("good2")], 4), + ] + ); +} + +#[test] +fn macros_can_be_empty() { + let output = line(r#"[attr]macro"#); + match output.0 { + parse::Kind::Pattern(_) => unreachable!(), + parse::Kind::Macro(name) => { + assert_eq!((name.as_str(), output.1, output.2), (r"macro", vec![], 1)); + } + } +} + +#[test] +fn custom_macros_must_be_valid_attribute_names() { + assert!(matches!( + try_line(r"[attr]-prefixdash"), + Err(parse::Error::MacroName { line_number: 1, .. }) + )); + assert!(lenient_lines(r"[attr]-prefixdash").is_empty()); + assert!(matches!( + try_line(r"[attr]!exclamation"), + Err(parse::Error::MacroName { line_number: 1, .. }) + )); + assert!(matches!( + try_line(r"[attr]assignment=value"), + Err(parse::Error::MacroName { line_number: 1, .. }) + )); + assert!(matches!( + try_line(r"[attr]你好"), + Err(parse::Error::MacroName { line_number: 1, .. }) + )); + assert!(lenient_lines(r"[attr]你好").is_empty()); +} + +#[test] +fn attribute_names_must_not_begin_with_dash_and_must_be_ascii_only() { + assert!(matches!( + try_line(r"p !-a"), + Err(parse::Error::AttributeName { line_number: 1, .. }) + )); + assert!(lenient_lines(r"p !-a").is_empty()); + assert!( + matches!( + try_line(r#"p !!a"#), + Err(parse::Error::AttributeName { line_number: 1, .. }) + ), + "exclamation marks aren't allowed either" + ); + assert!(lenient_lines(r#"p !!a"#).is_empty()); + assert!( + matches!( + try_line(r#"p 你好"#), + Err(parse::Error::AttributeName { line_number: 1, .. }) + ), + "nor is utf-8 encoded characters - gitoxide could consider to relax this when established" + ); + assert!(lenient_lines(r#"p 你好"#).is_empty()); +} + +#[test] +fn attributes_are_parsed_behind_various_whitespace_characters() { + assert_eq!( + line(r#"p a b"#), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), + "behind space" + ); + assert_eq!( + line(r#""p" a b"#), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), + "behind space" + ); + assert_eq!( + line("p\ta\tb"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), + "behind tab" + ); + assert_eq!( + line("\"p\"\ta\tb"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), + "behind tab" + ); + assert_eq!( + line("p \t a \t b"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), + "behind a mix of space and tab" + ); + assert_eq!( + line("\"p\" \t a \t b"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), + "behind a mix of space and tab" + ); +} + +#[test] +fn attributes_come_in_different_flavors_due_to_prefixes() { + assert_eq!( + line(r#"p set -unset !unspecified -set"#), + ( + pattern("p", Mode::NO_SUB_DIR, None), + vec![set("set"), unset("unset"), unspecified("unspecified"), unset("set")], + 1 + ), + "the parser doesn't care about double-mentions either" + ); +} + +#[test] +fn attributes_can_have_values() { + assert_eq!( + line(r#"p a=one b=2 c=你好 "#), + ( + pattern("p", Mode::NO_SUB_DIR, None), + vec![value("a", "one"), value("b", "2"), value("c", "你好")], + 1 + ), + "only non-whitespace ascii values are allowed, no escaping or anything fancy is possible there" + ); +} + +#[test] +fn attributes_see_state_adjustments_over_value_assignments() { + assert_eq!( + line(r#"p set -unset=a !unspecified=b"#), + ( + pattern("p", Mode::NO_SUB_DIR, None), + vec![set("set"), unset("unset"), unspecified("unspecified")], + 1 + ) + ); +} + +#[test] +fn whitespace_around_patterns_can_be_quoted() { + assert_eq!( + line("\" p \" a \r\t"), + (pattern(" p ", Mode::NO_SUB_DIR, None), vec![set("a")], 1), + ); +} + +#[test] +fn trailing_whitespace_in_attributes_is_ignored() { + assert_eq!( + line("p a \r\t"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a")], 1), + ); + assert_eq!( + line("\"p\" a \r\t"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a")], 1), + ); +} + +type ExpandedAttribute<'a> = (parse::Kind, Vec<(BString, gix_attributes::StateRef<'a>)>, usize); + +fn set(attr: &str) -> (BString, StateRef) { + (attr.into(), StateRef::Set) +} + +fn unset(attr: &str) -> (BString, StateRef) { + (attr.into(), StateRef::Unset) +} + +fn unspecified(attr: &str) -> (BString, StateRef) { + (attr.into(), StateRef::Unspecified) +} + +fn value<'b>(attr: &str, value: &'b str) -> (BString, StateRef<'b>) { + (attr.into(), StateRef::Value(value.into())) +} + +fn pattern(name: &str, flags: gix_glob::pattern::Mode, first_wildcard_pos: Option) -> parse::Kind { + parse::Kind::Pattern(gix_glob::Pattern { + text: name.into(), + mode: flags, + first_wildcard_pos, + }) +} + +fn try_line(input: &str) -> Result { + let mut lines = gix_attributes::parse(input.as_bytes()); + let res = expand(lines.next().unwrap())?; + assert!(lines.next().is_none(), "expected only one line"); + Ok(res) +} + +fn line(input: &str) -> ExpandedAttribute { + try_line(input).unwrap() +} + +fn lenient_lines(input: &str) -> Vec { + gix_attributes::parse(input.as_bytes()) + .map(expand) + .filter_map(Result::ok) + .collect() +} + +fn try_lines(input: &str) -> Result, parse::Error> { + gix_attributes::parse(input.as_bytes()).map(expand).collect() +} + +fn expand( + input: Result<(parse::Kind, parse::Iter<'_>, usize), parse::Error>, +) -> Result, parse::Error> { + let (pattern, attrs, line_no) = input?; + let attrs = attrs + .map(|r| r.map(|attr| (attr.name.as_str().into(), attr.state))) + .collect::, _>>() + .map_err(|e| parse::Error::AttributeName { + attribute: e.attribute, + line_number: line_no, + })?; + Ok((pattern, attrs, line_no)) +} diff --git a/gix-attributes/tests/search/mod.rs b/gix-attributes/tests/search/mod.rs new file mode 100644 index 00000000000..7ab2b11d404 --- /dev/null +++ b/gix-attributes/tests/search/mod.rs @@ -0,0 +1,328 @@ +use bstr::{BStr, ByteSlice}; +use gix_attributes::search::{AttributeId, Outcome}; +use gix_attributes::{AssignmentRef, NameRef, StateRef}; +use gix_glob::pattern::Case; +use gix_utils::FilesystemCapabilities; +use std::collections::BTreeMap; + +mod specials { + use gix_attributes::search::{MetadataCollection, Outcome}; + use gix_attributes::Search; + use gix_glob::pattern::Case; + use std::path::Path; + + #[test] + fn dir_slash_never_matches_but_dir_slah_double_star_does() { + assert!(!searchi("dir/", "dir/a", None)); + } + + #[test] + fn dir_slash_double_star_matches_recursively() { + assert!(searchi("dir/**", "dir/a", None)); + assert!(!search("Dir/**", "dir/a", None), "case doesn't match"); + } + + #[test] + fn global_and_local_prefixes_respect_case_sensitivity() { + assert!(searchi("a/b/d/*", "a/B/D/g", None), "fold: this works globally…"); + assert!(searchi("D/*", "a/B/D/g", Some("a/b")), "fold: …and it works locally"); + assert!(!search("a/b/d/*", "a/B/D/g", None), "sensitive: won't match globally"); + assert!( + !search("D/*", "a/B/D/g", Some("a/b")), + "sensitive: …and also does not match locally!" + ); + } + + fn search_case(pattern: &str, path: &str, rela_containing_dir: Option<&str>, case: Case) -> bool { + let mut search = Search::default(); + let mut collection = MetadataCollection::default(); + search.add_patterns_buffer( + format!("{pattern} test").as_bytes(), + rela_containing_dir + .map(|d| Path::new(d).join("filename")) + .unwrap_or_else(|| Path::new("").into()), + rela_containing_dir.map(|_| Path::new("")), + &mut collection, + ); + let mut out = Outcome::default(); + out.initialize(&collection); + search.pattern_matching_relative_path(path, case, &mut out) + } + + fn searchi(pattern: &str, path: &str, rela_containing_dir: Option<&str>) -> bool { + search_case(pattern, path, rela_containing_dir, Case::Fold) + } + fn search(pattern: &str, path: &str, rela_containing_dir: Option<&str>) -> bool { + search_case(pattern, path, rela_containing_dir, Case::Sensitive) + } +} + +#[test] +fn baseline() -> crate::Result { + let mut buf = Vec::new(); + // Due to the way our setup differs from gits dynamic stack (which involves trying to read files from disk + // by path) we can only test one case baseline, so we require multiple platforms (or filesystems) to run this. + let case = if FilesystemCapabilities::probe("../.git").ignore_case { + Case::Fold + } else { + Case::Sensitive + }; + let (mut group, mut collection, base, input) = baseline::user_attributes("basics")?; + + // Note that we have to hard-code these files for a lack of dynamic stack. + // This isn't a problem as non-matching prefixes will simply be ignored. + for (file, use_base) in [ + (".gitattributes", false), + ("a/.gitattributes", true), + ("a/b/.gitattributes", true), + ] { + group.add_patterns_file( + base.join(file), + false, + use_base.then_some(base.as_path()), + &mut buf, + &mut collection, + )?; + } + assert_eq!( + group.num_pattern_lists(), + 1 + 4, + "should have loaded all files, and the builtins" + ); + + let mut actual = gix_attributes::search::Outcome::default(); + actual.initialize(&collection); + for (rela_path, expected) in (baseline::Expectations { lines: input.lines() }) { + actual.reset(); + let has_match = group.pattern_matching_relative_path(rela_path, case, &mut actual); + assert_references(&actual); + let actual: Vec<_> = actual + .iter() + .filter_map(|m| (!m.assignment.state.is_unspecified()).then(|| m.assignment.as_ref())) + .collect(); + assert_eq!(actual, expected, "we have the same matches: {rela_path:?}"); + assert_ne!(has_match, actual.is_empty()); + } + + Ok(()) +} + +fn assert_references(out: &Outcome) { + for m in out.iter() { + if let Some(source) = m.kind.source_id() { + let sm = out + .match_by_id(source) + .expect("sources are always available in the outcome"); + assert_ne!( + sm.assignment.name, m.assignment.name, + "it's impossible to resolve to ourselves" + ); + } + } +} + +#[test] +fn all_attributes_are_listed_in_declaration_order() -> crate::Result { + let (mut group, mut collection, base, input) = baseline::user_attributes("lookup-order")?; + + let mut buf = Vec::new(); + group.add_patterns_file(base.join(".gitattributes"), false, None, &mut buf, &mut collection)?; + + let mut out = gix_attributes::search::Outcome::default(); + out.initialize(&collection); + + let mut orders = collection + .iter() + .map(|attr| { + ( + attr.0, + attr.1.id, + attr.1 + .macro_attributes + .iter() + .map(|attr| (attr.id, attr.inner.name.as_ref())) + .collect::>(), + ) + }) + .collect::>(); + orders.sort_by_key(|t| t.1); + assert_eq!( + orders, + [ + ( + "binary", + AttributeId(0), + assignments([("diff", 1), ("merge", 2), ("text", 3)]).collect() + ), + ("diff", AttributeId(1), vec![]), + ("merge", AttributeId(2), vec![]), + ("text", AttributeId(3), vec![]), + ("my-text", AttributeId(4), assignments(Some(("text", 3))).collect()), + ( + "my-binary", + AttributeId(5), + assignments([("binary", 0), ("macro-overridden", 11), ("recursive", 9)]).collect() + ), + ("location", AttributeId(6), vec![]), + ( + "b-cycle", + AttributeId(7), + assignments([("a-cycle", 8), ("my-text", 4)]).collect() + ), + ( + "a-cycle", + AttributeId(8), + assignments([("b-cycle", 7), ("my-binary", 5)]).collect() + ), + ( + "recursive", + AttributeId(9), + assignments(Some(("recursively-assigned-attr", 10))).collect() + ), + ("recursively-assigned-attr", AttributeId(10), vec![]), + ("macro-overridden", AttributeId(11), vec![]), + ("other", AttributeId(12), vec![]) + ], + "binary is built-in, macros come first then their attributes (or macros)\ + , macros can be overridden, and it's exactly in declaration order" + ); + + for (rela_path, expected) in (baseline::Expectations { lines: input.lines() }) { + out.reset(); + group.pattern_matching_relative_path(rela_path, Case::Sensitive, &mut out); + assert_references(&out); + let actual: Vec<_> = out.iter().map(|m| m.assignment.as_ref()).collect(); + assert_eq!( + by_name(actual), + by_name(expected), + "{rela_path}: the order of everything matches perfectly" + ); + } + assert_eq!( + out.iter().count(), + collection.iter().count(), + "we manage to fill in all attributes in this baseline at least" + ); + Ok(()) +} + +#[test] +fn given_attributes_are_made_available_in_given_order() -> crate::Result { + let (mut group, mut collection, base, input) = + baseline::user_attributes_named_baseline("lookup-order", "baseline.selected")?; + + let mut buf = Vec::new(); + group.add_patterns_file(base.join(".gitattributes"), false, None, &mut buf, &mut collection)?; + + let mut out = gix_attributes::search::Outcome::default(); + out.initialize_with_selection(&collection, ["my-binary", "recursive", "unspecified"]); + + for (rela_path, expected) in (baseline::Expectations { lines: input.lines() }) { + out.reset(); + group.pattern_matching_relative_path(rela_path, Case::Sensitive, &mut out); + assert_references(&out); + let actual: Vec<_> = out.iter_selected().map(|m| m.into_owned().assignment).collect(); + assert_eq!( + actual.iter().map(|a| a.as_ref()).collect::>(), + expected, + "{rela_path}: the order of everything matches perfectly" + ); + } + assert_eq!( + out.iter().count(), + 6, + "the search stops early, leaving many attributes unspecified" + ); + Ok(()) +} + +fn by_name(assignments: Vec>) -> BTreeMap, StateRef<'_>> { + assignments.into_iter().map(|a| (a.name, a.state)).collect() +} + +fn assignments<'a>( + input: impl IntoIterator + 'a, +) -> impl Iterator)> + 'a { + input.into_iter().map(|(name, order)| { + ( + AttributeId(order), + gix_attributes::NameRef::try_from(BStr::new(name)).expect("valid name"), + ) + }) +} + +mod baseline { + use bstr::{BStr, ByteSlice}; + + /// Read user-attributes and baseline in one go. + pub fn user_attributes_named_baseline( + name: &str, + baseline: &str, + ) -> crate::Result<(gix_attributes::Search, MetadataCollection, PathBuf, Vec)> { + let dir = gix_testtools::scripted_fixture_read_only("make_attributes_baseline.sh")?; + let base = dir.join(name); + let input = std::fs::read(base.join(baseline))?; + + let mut buf = Vec::new(); + let mut collection = MetadataCollection::default(); + let group = gix_attributes::Search::new_globals([base.join("user.attributes")], &mut buf, &mut collection)?; + + Ok((group, collection, base, input)) + } + use gix_attributes::search::MetadataCollection; + use gix_attributes::{AssignmentRef, StateRef}; + use std::path::PathBuf; + + /// Read user-attributes and baseline in one go. + pub fn user_attributes( + name: &str, + ) -> crate::Result<(gix_attributes::Search, MetadataCollection, PathBuf, Vec)> { + user_attributes_named_baseline(name, "baseline") + } + + pub struct Expectations<'a> { + pub lines: bstr::Lines<'a>, + } + + impl<'a> Iterator for Expectations<'a> { + type Item = ( + &'a BStr, + // Names might refer to attributes or macros + Vec>, + ); + + fn next(&mut self) -> Option { + let path = self.lines.next()?; + let mut assignments = Vec::new(); + loop { + let line = self.lines.next()?; + if line.is_empty() { + return Some((path.as_bstr(), assignments)); + } + + let mut prev = None; + let mut tokens = line.splitn(3, |b| { + let is_match = *b == b' ' && prev.take() == Some(b':'); + prev = Some(*b); + is_match + }); + + if let Some(((_path, attr), info)) = tokens.next().zip(tokens.next()).zip(tokens.next()) { + let state = match info { + b"set" => StateRef::Set, + b"unset" => StateRef::Unset, + b"unspecified" => StateRef::Unspecified, + _ => StateRef::from_bytes(info), + }; + let attr = attr.trim_end_with(|b| b == ':'); + assignments.push(AssignmentRef { + name: gix_attributes::NameRef::try_from(attr.as_bstr()).expect("valid attributes"), + state, + }); + } else { + unreachable!("invalid line format: {line:?}", line = line.as_bstr()) + } + } + } + } +} diff --git a/gix-attributes/tests/state/mod.rs b/gix-attributes/tests/state/mod.rs new file mode 100644 index 00000000000..8c8e9c130ba --- /dev/null +++ b/gix-attributes/tests/state/mod.rs @@ -0,0 +1,23 @@ +use gix_attributes::StateRef; + +const ILLFORMED_UTF8: &[u8] = b"\xC3\x28\x41"; + +mod value { + use crate::state::ILLFORMED_UTF8; + use gix_attributes::state::ValueRef; + + #[test] + fn from_bytes() { + assert_eq!(ValueRef::from_bytes(ILLFORMED_UTF8).as_bstr(), ILLFORMED_UTF8); + assert_eq!(ValueRef::from_bytes("utf8".as_bytes()).as_bstr(), "utf8"); + } +} + +#[test] +fn from_value() { + assert!(std::str::from_utf8(ILLFORMED_UTF8).is_err()); + assert!( + matches!(StateRef::from_bytes(ILLFORMED_UTF8), StateRef::Value(v) if v.as_bstr() == ILLFORMED_UTF8), + "this can round-trip with care" + ); +} From 1755c81f64ce8a68807c2026eeae13dc46021db1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 3 Apr 2023 16:23:31 +0200 Subject: [PATCH 11/14] adjust to changes in `gix-attributes` --- Cargo.lock | 438 ++++++++++-------- gitoxide-core/src/repository/exclude.rs | 5 +- gix-worktree/Cargo.toml | 3 +- gix-worktree/src/fs/cache/platform.rs | 2 +- gix-worktree/src/fs/cache/state.rs | 39 +- gix-worktree/src/index/checkout.rs | 3 +- .../{ignore_and_attributes.rs => ignore.rs} | 6 +- gix-worktree/tests/worktree/fs/cache/mod.rs | 2 +- gix/Cargo.toml | 2 + gix/src/config/cache/access.rs | 8 +- gix/src/config/cache/init.rs | 2 +- gix/src/lib.rs | 1 + gix/src/open/repository.rs | 2 +- gix/src/worktree/mod.rs | 15 +- 14 files changed, 298 insertions(+), 230 deletions(-) rename gix-worktree/tests/worktree/fs/cache/{ignore_and_attributes.rs => ignore.rs} (95%) diff --git a/Cargo.lock b/Cargo.lock index 10be4e04f4d..803c0d585c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,46 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "342258dd14006105c2b75ab1bd7543a03bdf0cfc94383303ac212a04939dff6f" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-wincon", + "concolor-override", + "concolor-query", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" + +[[package]] +name = "anstyle-parse" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7d1bb534e9efed14f3e5f44e7dd1a4f709384023a4165199a4241e18dff0116" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-wincon" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3127af6145b149f3287bb9a0d10ad9c5692dba8c53ad48285e5bec4063834fa" +dependencies = [ + "anstyle", + "windows-sys 0.45.0", +] + [[package]] name = "anyhow" version = "1.0.70" @@ -96,9 +136,9 @@ dependencies = [ [[package]] name = "async-executor" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17adb73da160dfb475c183343c8cccd80721ea5a605d3eb57125f0a7b7a92d0b" +checksum = "6fa3dc5f2a8564f07759c008b9109dc0d39de92a88d5588b8a5036d286383afb" dependencies = [ "async-lock", "async-task", @@ -137,7 +177,7 @@ dependencies = [ "log", "parking", "polling", - "rustix 0.37.3", + "rustix", "slab", "socket2", "waker-fn", @@ -205,7 +245,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.14", ] [[package]] @@ -219,9 +259,9 @@ dependencies = [ [[package]] name = "atomic-waker" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "debc29dde2e69f9e47506b525f639ed42300fc014a3e007832592448fa8e4599" +checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" [[package]] name = "atty" @@ -254,9 +294,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1" +checksum = "c70beb79cbb5ce9c4f8e20849978f34225931f665bb49efa6982875a4d5facb3" dependencies = [ "serde", ] @@ -272,9 +312,9 @@ dependencies = [ [[package]] name = "blocking" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c67b173a56acffd6d2326fb7ab938ba0b00a71480e14902b2591c87bc5741e8" +checksum = "77231a1c8f801696fc0123ec6150ce92cffb8e164a02afb9c8ddee0e9b65ad65" dependencies = [ "async-channel", "async-lock", @@ -282,6 +322,7 @@ dependencies = [ "atomic-waker", "fastrand", "futures-lite", + "log", ] [[package]] @@ -355,10 +396,10 @@ name = "cargo-smart-release" version = "0.18.0" dependencies = [ "anyhow", - "bitflags 2.0.2", + "bitflags 2.1.0", "cargo_metadata", "cargo_toml", - "clap 4.1.13", + "clap 4.2.1", "crates-index", "env_logger", "git-conventional", @@ -376,9 +417,9 @@ dependencies = [ [[package]] name = "cargo_metadata" -version = "0.15.3" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a1ec454bc3eead8719cb56e15dbbfecdbc14e4b3a3ae4936cc6e31f5fc0d07" +checksum = "eee4243f1f26fc7a42710e7439c149e2b10b05472f88090acce52632f231a73a" dependencies = [ "camino", "cargo-platform", @@ -475,29 +516,39 @@ dependencies = [ [[package]] name = "clap" -version = "4.1.13" +version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c911b090850d79fc64fe9ea01e28e465f65e821e08813ced95bced72f7a8a9b" +checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" dependencies = [ - "bitflags 1.3.2", + "clap_builder", "clap_derive", - "clap_lex 0.3.3", - "is-terminal", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" +dependencies = [ + "anstream", + "anstyle", + "bitflags 1.3.2", + "clap_lex 0.4.1", "once_cell", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "4.1.12" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a932373bab67b984c790ddf2c9ca295d8e3af3b7ef92de5a5bacdccdee4b09b" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.14", ] [[package]] @@ -511,12 +562,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.3" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "033f6b7a4acb1f358c742aaca805c939ee73b4c6209ae4318ec7aca81c42e646" -dependencies = [ - "os_str_bytes", -] +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "clru" @@ -526,9 +574,9 @@ checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" [[package]] name = "cmake" -version = "0.1.49" +version = "0.1.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db34956e100b30725f2eb215f90d4871051239535632f84fea3bc92722c66b7c" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" dependencies = [ "cc", ] @@ -544,11 +592,26 @@ dependencies = [ "ryu", ] +[[package]] +name = "concolor-override" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a855d4a1978dc52fb0536a04d384c2c0c1aa273597f08b77c8c4d3b2eec6037f" + +[[package]] +name = "concolor-query" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" +dependencies = [ + "windows-sys 0.45.0", +] + [[package]] name = "concurrent-queue" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c278839b831783b70278b14df4d45e1beb1aad306c07bb796637de9a0e323e8e" +checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" dependencies = [ "crossbeam-utils", ] @@ -586,9 +649,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" @@ -696,9 +759,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -988,24 +1051,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", -] - -[[package]] -name = "errno" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -1059,14 +1111,14 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", - "redox_syscall", - "windows-sys 0.45.0", + "redox_syscall 0.2.16", + "windows-sys 0.48.0", ] [[package]] @@ -1125,9 +1177,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "futures-channel" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164713a5a0dcc3e7b4b1ed7d3b433cabc18025386f9339346e8daf15963cf7ac" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1135,21 +1187,21 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-io" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-lite" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" dependencies = [ "fastrand", "futures-core", @@ -1162,21 +1214,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-util" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-core", "futures-io", @@ -1189,9 +1241,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1208,9 +1260,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "js-sys", @@ -1250,7 +1302,7 @@ name = "gitoxide" version = "0.24.0" dependencies = [ "anyhow", - "clap 4.1.13", + "clap 4.2.1", "crosstermion", "document-features", "env_logger", @@ -1313,6 +1365,7 @@ dependencies = [ "gix-glob 0.5.5", "gix-hash 0.10.4", "gix-hashtable 0.1.3", + "gix-ignore", "gix-index 0.15.1", "gix-lock 5.0.0", "gix-mailmap", @@ -1395,7 +1448,7 @@ dependencies = [ "compact_str", "gix-features 0.26.5", "gix-glob 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", - "gix-path 0.7.2", + "gix-path 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "gix-quote 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "thiserror", "unicode-bom 1.1.4", @@ -1511,7 +1564,7 @@ dependencies = [ name = "gix-config-value" version = "0.10.2" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", "bstr", "document-features", "gix-path 0.7.3", @@ -1596,8 +1649,8 @@ checksum = "38029783886cb46fbe63e61b02a70404aa04cfeacfb53ed336832c20fcb1e281" dependencies = [ "bstr", "dunce", - "gix-hash 0.10.3", - "gix-path 0.7.2", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", + "gix-path 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "gix-ref 0.24.1", "gix-sec 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "thiserror", @@ -1627,7 +1680,7 @@ version = "0.26.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3402b831ea4bb3af36369d61dbf250eb0e1a8577d3cb77b9719c11a82485bfe9" dependencies = [ - "gix-hash 0.10.3", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", "libc", "prodash", "sha1_smol", @@ -1669,7 +1722,7 @@ version = "0.0.0" name = "gix-glob" version = "0.5.5" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", "bstr", "document-features", "gix-features 0.28.1", @@ -1690,33 +1743,31 @@ dependencies = [ [[package]] name = "gix-hash" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0c5a9f4d621d4f4ea046bb331df5c746ca735b8cae5b234cc2be70ee4dbef0" +version = "0.10.4" dependencies = [ + "document-features", + "gix-features 0.28.1", + "gix-testtools", "hex", + "serde", "thiserror", ] [[package]] name = "gix-hash" version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a258595457bc192d1f1c59d0d168a1e34e2be9b97a614e14995416185de41a7" dependencies = [ - "document-features", - "gix-features 0.28.1", - "gix-testtools", "hex", - "serde", "thiserror", ] [[package]] name = "gix-hashtable" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9609c1b8f36f12968e6a6098f7cdb52004f7d42d570f47a2d6d7c16612f19acb" +version = "0.1.3" dependencies = [ - "gix-hash 0.10.3", + "gix-hash 0.10.4", "hashbrown 0.13.2", "parking_lot", ] @@ -1724,8 +1775,10 @@ dependencies = [ [[package]] name = "gix-hashtable" version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4e55e40dfd694884f0eb78796c5bddcf2f8b295dace47039099dd7e76534973" dependencies = [ - "gix-hash 0.10.4", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", "hashbrown 0.13.2", "parking_lot", ] @@ -1756,7 +1809,7 @@ dependencies = [ "filetime", "gix-bitmap 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "gix-features 0.26.5", - "gix-hash 0.10.3", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", "gix-lock 3.0.2", "gix-object 0.26.4", "gix-traverse 0.22.2", @@ -1770,7 +1823,7 @@ dependencies = [ name = "gix-index" version = "0.15.1" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", "bstr", "btoi", "document-features", @@ -1852,8 +1905,8 @@ dependencies = [ "btoi", "gix-actor 0.17.2", "gix-features 0.26.5", - "gix-hash 0.10.3", - "gix-validate 0.7.3", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", + "gix-validate 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", "hex", "itoa", "nom", @@ -1967,21 +2020,21 @@ dependencies = [ [[package]] name = "gix-path" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6c104a66dec149cb8f7aaafc6ab797654cf82d67f050fd0cb7e7294e328354b" +version = "0.7.3" dependencies = [ "bstr", + "once_cell", + "tempfile", "thiserror", ] [[package]] name = "gix-path" version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32370dce200bb951df013e03dff35b4233fc7a89458642b047629b91734a7e19" dependencies = [ "bstr", - "once_cell", - "tempfile", "thiserror", ] @@ -1989,7 +2042,7 @@ dependencies = [ name = "gix-pathspec" version = "0.1.0" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", "bstr", "gix-attributes 0.10.0", "gix-glob 0.5.5", @@ -2067,12 +2120,12 @@ checksum = "93e85abee11aa093f24da7336bf0a8ad598f15da396b28cf1270ab1091137d35" dependencies = [ "gix-actor 0.17.2", "gix-features 0.26.5", - "gix-hash 0.10.3", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", "gix-lock 3.0.2", "gix-object 0.26.4", - "gix-path 0.7.2", + "gix-path 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "gix-tempfile 3.0.2", - "gix-validate 0.7.3", + "gix-validate 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", "memmap2", "nom", "thiserror", @@ -2162,7 +2215,7 @@ dependencies = [ name = "gix-sec" version = "0.6.2" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", "dirs 5.0.0", "document-features", "gix-path 0.7.3", @@ -2180,7 +2233,7 @@ checksum = "e8ffa5bf0772f9b01de501c035b6b084cf9b8bb07dec41e3afc6a17336a65f47" dependencies = [ "bitflags 1.3.2", "dirs 4.0.0", - "gix-path 0.7.2", + "gix-path 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "libc", "windows 0.43.0", ] @@ -2282,8 +2335,8 @@ version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b86456d713143fac5aea6787eb3ec6efc0f6dd90ed625fb3f0badf7936ef084" dependencies = [ - "gix-hash 0.10.3", - "gix-hashtable 0.1.2", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", + "gix-hashtable 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "gix-object 0.26.4", "thiserror", ] @@ -2336,20 +2389,20 @@ dependencies = [ [[package]] name = "gix-validate" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b69ddb780ea1465255e66818d75b7098371c58dbc9560da4488a44b9f5c7e443" +version = "0.7.4" dependencies = [ "bstr", + "gix-testtools", "thiserror", ] [[package]] name = "gix-validate" version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd629d3680773e1785e585d76fd4295b740b559cad9141517300d99a0c8c049" dependencies = [ "bstr", - "gix-testtools", "thiserror", ] @@ -2363,10 +2416,10 @@ dependencies = [ "gix-attributes 0.8.3", "gix-features 0.26.5", "gix-glob 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", - "gix-hash 0.10.3", + "gix-hash 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)", "gix-index 0.12.4", "gix-object 0.26.4", - "gix-path 0.7.2", + "gix-path 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "io-close", "thiserror", ] @@ -2381,6 +2434,7 @@ dependencies = [ "gix-features 0.28.1", "gix-glob 0.5.5", "gix-hash 0.10.4", + "gix-ignore", "gix-index 0.15.1", "gix-object 0.28.0", "gix-odb", @@ -2686,13 +2740,13 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -2709,20 +2763,20 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "is-terminal" -version = "0.4.5" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8687c819457e979cc940d09cb16e42a1bf70aa6b60a549de6d3a62a0ee90c69e" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", - "rustix 0.36.11", - "windows-sys 0.45.0", + "rustix", + "windows-sys 0.48.0", ] [[package]] @@ -2801,9 +2855,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.140" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libgit2-sys" @@ -2860,15 +2914,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" - -[[package]] -name = "linux-raw-sys" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" [[package]] name = "litrs" @@ -3105,9 +3153,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "openssl" -version = "0.10.48" +version = "0.10.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "518915b97df115dd36109bfa429a48b8f737bd05508cf9588977b599648926d2" +checksum = "7e30d8bc91859781f0a943411186324d580f2bbeb71b452fe91ae344806af3f1" dependencies = [ "bitflags 1.3.2", "cfg-if", @@ -3120,13 +3168,13 @@ dependencies = [ [[package]] name = "openssl-macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.14", ] [[package]] @@ -3137,11 +3185,10 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.83" +version = "0.9.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "666416d899cf077260dac8698d60a60b435a46d57e82acb1be3d0dad87284e5b" +checksum = "0d3d193fb1488ad46ffe3aaabc912cc931d02ee8518fe2959aea8ef52718b0c0" dependencies = [ - "autocfg", "cc", "libc", "pkg-config", @@ -3182,9 +3229,9 @@ dependencies = [ [[package]] name = "parking" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" +checksum = "14f2252c834a40ed9bb5422029649578e63aa341ac401f74e719dd1afda8394e" [[package]] name = "parking_lot" @@ -3204,7 +3251,7 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", "windows-sys 0.45.0", ] @@ -3263,9 +3310,9 @@ dependencies = [ [[package]] name = "polling" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e1f879b2998099c2d69ab9605d145d5b661195627eccc680002c4918a7fb6fa" +checksum = "4be1c66a6add46bff50935c313dae30a5030cf8385c5206e8a95e9e9def974aa" dependencies = [ "autocfg", "bitflags 1.3.2", @@ -3274,7 +3321,7 @@ dependencies = [ "libc", "log", "pin-project-lite", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -3297,9 +3344,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.53" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -3431,6 +3478,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -3438,7 +3494,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.16", "thiserror", ] @@ -3467,9 +3523,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "reqwest" -version = "0.11.15" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ba30cc2c0cd02af1222ed216ba659cdb2f879dfe3181852fe7c50b1d0005949" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64", "bytes", @@ -3539,7 +3595,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -3555,30 +3611,16 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.36.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" -dependencies = [ - "bitflags 1.3.2", - "errno 0.2.8", - "io-lifetimes", - "libc", - "linux-raw-sys 0.1.4", - "windows-sys 0.45.0", -] - -[[package]] -name = "rustix" -version = "0.37.3" +version = "0.37.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b24138615de35e32031d041a09032ef3487a616d901ca4db224e7d557efae2" +checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" dependencies = [ "bitflags 1.3.2", - "errno 0.3.0", + "errno", "io-lifetimes", "libc", - "linux-raw-sys 0.3.0", - "windows-sys 0.45.0", + "linux-raw-sys", + "windows-sys 0.48.0", ] [[package]] @@ -3682,29 +3724,29 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.158" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.158" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e801c1712f48475582b7696ac71e0ca34ebb30e09338425384269d9717c62cad" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.14", ] [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" dependencies = [ "itoa", "ryu", @@ -3752,7 +3794,7 @@ checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.14", ] [[package]] @@ -3892,9 +3934,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.10" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aad1363ed6d37b84299588d62d3a7d95b5a5c2d9aad5c85609fda12afaa1f40" +checksum = "fcf316d5356ed6847742d036f8a39c3b8435cac10bd528a4bd461928a6ab34d5" dependencies = [ "proc-macro2", "quote", @@ -3923,15 +3965,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", - "rustix 0.36.11", - "windows-sys 0.42.0", + "redox_syscall 0.3.5", + "rustix", + "windows-sys 0.45.0", ] [[package]] @@ -3966,7 +4008,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.14", ] [[package]] @@ -4025,14 +4067,13 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.26.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "pin-project-lite", @@ -4309,6 +4350,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "value-bag" version = "1.0.0-alpha.9" @@ -4552,6 +4599,15 @@ dependencies = [ "windows-targets 0.42.2", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-targets" version = "0.42.2" diff --git a/gitoxide-core/src/repository/exclude.rs b/gitoxide-core/src/repository/exclude.rs index 86054443bb2..4239d17339f 100644 --- a/gitoxide-core/src/repository/exclude.rs +++ b/gitoxide-core/src/repository/exclude.rs @@ -35,10 +35,7 @@ pub fn query( .worktree() .with_context(|| "Cannot check excludes without a current worktree")?; let index = worktree.index()?; - let mut cache = worktree.excludes( - &index, - Some(gix::attrs::MatchGroup::::from_overrides(overrides)), - )?; + let mut cache = worktree.excludes(&index, Some(gix::ignore::Search::from_overrides(overrides)))?; let prefix = repo.prefix().expect("worktree - we have an index by now")?; diff --git a/gix-worktree/Cargo.toml b/gix-worktree/Cargo.toml index c5f0446dc81..428905ad4fa 100644 --- a/gix-worktree/Cargo.toml +++ b/gix-worktree/Cargo.toml @@ -24,7 +24,7 @@ required-features = ["internal-testing-to-avoid-being-run-by-cargo-test-all"] [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde1 = [ "serde", "bstr/serde", "gix-index/serde1", "gix-hash/serde1", "gix-object/serde1" ] +serde1 = [ "serde", "bstr/serde", "gix-index/serde1", "gix-hash/serde1", "gix-object/serde1", "gix-attributes/serde1", "gix-ignore/serde1" ] internal-testing-gix-features-parallel = ["gix-features/parallel"] internal-testing-to-avoid-being-run-by-cargo-test-all = [] @@ -37,6 +37,7 @@ gix-object = { version = "^0.28.0", path = "../gix-object" } gix-glob = { version = "^0.5.5", path = "../gix-glob" } gix-path = { version = "^0.7.3", path = "../gix-path" } gix-attributes = { version = "^0.10.0", path = "../gix-attributes" } +gix-ignore = { version = "^0.1.0", path = "../gix-ignore" } gix-features = { version = "^0.28.0", path = "../gix-features" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/gix-worktree/src/fs/cache/platform.rs b/gix-worktree/src/fs/cache/platform.rs index 0b377acec6a..33819584226 100644 --- a/gix-worktree/src/fs/cache/platform.rs +++ b/gix-worktree/src/fs/cache/platform.rs @@ -34,7 +34,7 @@ impl<'a> Platform<'a> { /// # Panics /// /// If the cache was configured without exclude patterns. - pub fn matching_exclude_pattern(&self) -> Option> { + pub fn matching_exclude_pattern(&self) -> Option> { let ignore = self.parent.state.ignore_or_panic(); let relative_path = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(self.parent.stack.current_relative.as_path())); diff --git a/gix-worktree/src/fs/cache/state.rs b/gix-worktree/src/fs/cache/state.rs index 1692bfa5e34..bfe27ef74ef 100644 --- a/gix-worktree/src/fs/cache/state.rs +++ b/gix-worktree/src/fs/cache/state.rs @@ -6,8 +6,8 @@ use gix_hash::oid; use crate::fs::{cache::State, PathOidMapping}; -type AttributeMatchGroup = gix_attributes::MatchGroup; -type IgnoreMatchGroup = gix_attributes::MatchGroup; +type AttributeMatchGroup = gix_attributes::Search; +type IgnoreMatchGroup = gix_ignore::Search; /// State related to attributes associated with files in the repository. #[derive(Default, Clone)] @@ -44,6 +44,8 @@ pub struct Ignore { impl Ignore { /// The `exclude_file_name_for_directories` is an optional override for the filename to use when checking per-directory /// ignore files within the repository, defaults to`.gitignore`. + /// + // This is what it should be able represent: https://github.com/git/git/blob/140b9478dad5d19543c1cb4fd293ccec228f1240/dir.c#L3354 // TODO: more docs pub fn new( overrides: IgnoreMatchGroup, @@ -79,7 +81,7 @@ impl Ignore { relative_path: &BStr, is_dir: Option, case: Case, - ) -> Option> { + ) -> Option> { let groups = self.match_groups(); let mut dir_match = None; if let Some((source, mapping)) = self @@ -93,7 +95,7 @@ impl Ignore { }) .next() { - let match_ = gix_attributes::Match { + let match_ = gix_ignore::search::Match { pattern: &mapping.pattern, value: &mapping.value, sequence_number: mapping.sequence_number, @@ -135,8 +137,14 @@ impl Ignore { .enumerate() .rev() .find_map(|(plidx, pl)| { - pl.pattern_idx_matching_relative_path(relative_path, basename_pos, is_dir, case) - .map(|idx| (plidx, idx)) + gix_ignore::search::pattern_idx_matching_relative_path( + pl, + relative_path, + basename_pos, + is_dir, + case, + ) + .map(|idx| (plidx, idx)) }) .map(|(plidx, pidx)| (gidx, plidx, pidx)) }) @@ -163,17 +171,24 @@ impl Ignore { let ignore_file_in_index = attribute_files_in_index.binary_search_by(|t| t.0.as_bstr().cmp(ignore_path_relative.as_ref())); let follow_symlinks = ignore_file_in_index.is_err(); - if !self - .stack - .add_patterns_file(dir.join(".gitignore"), follow_symlinks, Some(root), buf)? - { + if !gix_glob::search::add_patterns_file( + &mut self.stack.patterns, + dir.join(".gitignore"), + follow_symlinks, + Some(root), + buf, + )? { match ignore_file_in_index { Ok(idx) => { let ignore_blob = find(&attribute_files_in_index[idx].1, buf) .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; let ignore_path = gix_path::from_bstring(ignore_path_relative.into_owned()); - self.stack - .add_patterns_buffer(ignore_blob.data, ignore_path, Some(root)); + gix_glob::search::add_patterns_buffer( + &mut self.stack.patterns, + ignore_blob.data, + ignore_path, + Some(root), + ); } Err(_) => { // Need one stack level per component so push and pop matches. diff --git a/gix-worktree/src/index/checkout.rs b/gix-worktree/src/index/checkout.rs index bd7bbfddf60..dcfea34afbc 100644 --- a/gix-worktree/src/index/checkout.rs +++ b/gix-worktree/src/index/checkout.rs @@ -1,6 +1,5 @@ #![allow(missing_docs)] use bstr::BString; -use gix_attributes::Attributes; use gix_utils::FilesystemCapabilities; #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] @@ -61,7 +60,7 @@ pub struct Options { /// Default true. pub check_stat: bool, /// A group of attribute patterns that are applied globally, i.e. aren't rooted within the repository itself. - pub attribute_globals: gix_attributes::MatchGroup, + pub attribute_globals: gix_attributes::Search, } impl Default for Options { diff --git a/gix-worktree/tests/worktree/fs/cache/ignore_and_attributes.rs b/gix-worktree/tests/worktree/fs/cache/ignore.rs similarity index 95% rename from gix-worktree/tests/worktree/fs/cache/ignore_and_attributes.rs rename to gix-worktree/tests/worktree/fs/cache/ignore.rs index bc5d6e1ffe8..e9619fa0093 100644 --- a/gix-worktree/tests/worktree/fs/cache/ignore_and_attributes.rs +++ b/gix-worktree/tests/worktree/fs/cache/ignore.rs @@ -45,7 +45,7 @@ fn special_exclude_cases_we_handle_differently() { Default::default(), gix_worktree::fs::cache::state::Ignore::new( Default::default(), - gix_attributes::MatchGroup::from_git_dir(&git_dir, None, &mut buf).unwrap(), + gix_ignore::Search::from_git_dir(&git_dir, None, &mut buf).unwrap(), None, case, ), @@ -99,8 +99,8 @@ fn check_against_baseline() -> crate::Result { let state = gix_worktree::fs::cache::State::for_add( Default::default(), // TODO: attribute tests gix_worktree::fs::cache::state::Ignore::new( - gix_attributes::MatchGroup::from_overrides(vec!["!force-include"]), - gix_attributes::MatchGroup::from_git_dir(&git_dir, Some(user_exclude_path), &mut buf)?, + gix_ignore::Search::from_overrides(vec!["!force-include"]), + gix_ignore::Search::from_git_dir(&git_dir, Some(user_exclude_path), &mut buf)?, None, case, ), diff --git a/gix-worktree/tests/worktree/fs/cache/mod.rs b/gix-worktree/tests/worktree/fs/cache/mod.rs index ed8b4a22265..2c045f96f47 100644 --- a/gix-worktree/tests/worktree/fs/cache/mod.rs +++ b/gix-worktree/tests/worktree/fs/cache/mod.rs @@ -1,4 +1,4 @@ mod create_directory; #[allow(unused)] -mod ignore_and_attributes; +mod ignore; diff --git a/gix/Cargo.toml b/gix/Cargo.toml index bd4266243e4..5e90a3c627e 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -65,6 +65,7 @@ serde1 = [ "serde", "gix-mailmap/serde1", "gix-url/serde1", "gix-attributes/serde1", + "gix-ignore/serde1", "gix-revision/serde1", "gix-credentials/serde1" ] @@ -138,6 +139,7 @@ gix-mailmap = { version = "^0.11.0", path = "../gix-mailmap" } gix-features = { version = "^0.28.1", path = "../gix-features", features = ["progress", "once_cell"] } gix-attributes = { version = "^0.10.0", path = "../gix-attributes" } +gix-ignore = { version = "^0.1.0", path = "../gix-ignore" } gix-glob = { version = "^0.5.5", path = "../gix-glob" } gix-credentials = { version = "^0.12.0", path = "../gix-credentials" } gix-prompt = { version = "^0.3.3", path = "../gix-prompt" } diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 00e98ccc76c..10d3e1c06e1 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -157,7 +157,7 @@ impl Cache { fn assemble_attribute_globals( me: &Cache, _git_dir: &std::path::Path, - ) -> Result { + ) -> Result { let _attributes_file = match me .trusted_file_path("core", None, Core::ATTRIBUTES_FILE.name) .transpose()? @@ -165,7 +165,7 @@ impl Cache { Some(attributes) => Some(attributes.into_owned()), None => me.xdg_config_path("attributes").ok().flatten(), }; - // TODO: implement gix_attributes::MatchGroup::::from_git_dir(), similar to what's done for `Ignore`. + // TODO: implement gix_attributes::Search::from_git_dir(), similar to what's done for `Ignore`. Ok(Default::default()) } @@ -203,7 +203,7 @@ impl Cache { std::env::var_os("XDG_CONFIG_HOME") .map(|path| (PathBuf::from(path), &self.xdg_config_home_env)) .or_else(|| { - gix_path::home_dir().map(|mut p| { + gix_path::env::home_dir().map(|mut p| { ( { p.push(".config"); @@ -225,6 +225,6 @@ impl Cache { /// We never fail for here even if the permission is set to deny as we `gix-config` will fail later /// if it actually wants to use the home directory - we don't want to fail prematurely. pub(crate) fn home_dir(&self) -> Option { - gix_path::home_dir().and_then(|path| self.home_env.check_opt(path)) + gix_path::env::home_dir().and_then(|path| self.home_env.check_opt(path)) } } diff --git a/gix/src/config/cache/init.rs b/gix/src/config/cache/init.rs index a6f6ba2a53d..5f455e9a81c 100644 --- a/gix/src/config/cache/init.rs +++ b/gix/src/config/cache/init.rs @@ -95,7 +95,7 @@ impl Cache { "HOME" => Some(home_env), _ => None, } - .and_then(|perm| perm.check_opt(name).and_then(gix_path::env_var)) + .and_then(|perm| perm.check_opt(name).and_then(gix_path::env::var)) }) .map(|p| (source, p.into_owned())) }) diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 99aaf721849..27562abcf7e 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -75,6 +75,7 @@ use gix_features::threading::OwnShared; pub use gix_features::{parallel, progress::Progress, threading}; pub use gix_glob as glob; pub use gix_hash as hash; +pub use gix_ignore as ignore; #[doc(inline)] pub use gix_index as index; pub use gix_lock as lock; diff --git a/gix/src/open/repository.rs b/gix/src/open/repository.rs index 99ff56c4f69..2164a143950 100644 --- a/gix/src/open/repository.rs +++ b/gix/src/open/repository.rs @@ -180,7 +180,7 @@ impl ThreadSafeRepository { }; let head = refs.find("HEAD").ok(); let git_install_dir = crate::path::install_dir().ok(); - let home = gix_path::home_dir().and_then(|home| env.home.check_opt(home)); + let home = gix_path::env::home_dir().and_then(|home| env.home.check_opt(home)); let mut filter_config_section = filter_config_section.unwrap_or(config::section::is_trusted); let config = config::Cache::from_stage_one( diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index 1e3aa0c6b09..3051e69f0db 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -119,19 +119,20 @@ pub mod excludes { impl<'repo> crate::Worktree<'repo> { /// Configure a file-system cache checking if files below the repository are excluded. /// - /// This takes into consideration all the usual repository configuration. + /// This takes into consideration all the usual repository configuration, namely: + /// + /// * `$XDG_CONFIG_HOME/…/ignore` if `core.excludesFile` is *not* set, otherwise use the configured file. + /// * `$GIT_DIR/info/exclude` if present. /// /// `index` may be used to obtain `.gitignore` files directly from the index under certain conditions. // TODO: test, provide higher-level interface that is much easier to use and doesn't panic when accessing entries // by non-relative path. // TODO: `index` might be so special (given the conditions we are talking about) that it's better obtained internally // so the caller won't have to care. - // TODO: global files like `~/.gitignore` seem to be missing here, but we need a way to control if these should be loaded. - // probably that needs another permission in the repo options or a custom config variable. The latter is easiest to manage. pub fn excludes( &self, index: &gix_index::State, - overrides: Option>, + overrides: Option, ) -> Result { let repo = self.parent; let case = if repo.config.ignore_case { @@ -146,11 +147,7 @@ pub mod excludes { }; let state = gix_worktree::fs::cache::State::IgnoreStack(gix_worktree::fs::cache::state::Ignore::new( overrides.unwrap_or_default(), - gix_attributes::MatchGroup::::from_git_dir( - repo.git_dir(), - excludes_file, - &mut buf, - )?, + gix_ignore::Search::from_git_dir(repo.git_dir(), excludes_file, &mut buf)?, None, case, )); From e24b6773b0626acdd156b2ff205532df530cbcd3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 3 Apr 2023 18:58:52 +0200 Subject: [PATCH 12/14] First steps of attribute cache initialization. This is done to fully understand how attribute initialization works with the current architecture, while taking care of its particular difference compared to the handling of exclude patterns. --- gix-worktree/src/fs/cache/state/attributes.rs | 50 ++++++ .../fs/cache/{state.rs => state/ignore.rs} | 145 +----------------- gix-worktree/src/fs/cache/state/mod.rs | 135 ++++++++++++++++ gix-worktree/src/index/checkout.rs | 7 +- gix-worktree/src/index/mod.rs | 4 +- .../tests/worktree/fs/cache/ignore.rs | 2 +- 6 files changed, 198 insertions(+), 145 deletions(-) create mode 100644 gix-worktree/src/fs/cache/state/attributes.rs rename gix-worktree/src/fs/cache/{state.rs => state/ignore.rs} (59%) create mode 100644 gix-worktree/src/fs/cache/state/mod.rs diff --git a/gix-worktree/src/fs/cache/state/attributes.rs b/gix-worktree/src/fs/cache/state/attributes.rs new file mode 100644 index 00000000000..6321dc05e58 --- /dev/null +++ b/gix-worktree/src/fs/cache/state/attributes.rs @@ -0,0 +1,50 @@ +use crate::fs::cache::state::{AttributeMatchGroup, Attributes}; +use gix_glob::pattern::Case; +use std::path::PathBuf; + +/// Decide where to read `.gitattributes` files from. +#[derive(Default, Debug, Clone, Copy)] +pub enum Source { + /// Retrieve attribute files from an attribute list, see + /// [State::attribute_list_from_index()][crate::fs::cache::State::attribute_list_from_index()]. + /// + /// The attribute list is typically produced from an index. If a tree should be the source, build an attribute list + /// from a tree instead. + #[default] + AttributeList, + /// Read from an attribute list and if not present, read from the worktree. + AttributeListThenWorktree, + /// Read from the worktree and if not present, read from the attribute list. + WorktreeThenAttributeList, +} + +/// Initialization +impl Attributes { + /// Create a new instance from an attribute match group that represents `globals`. + /// `globals` contribute first and consist of all globally available, static files. + pub fn new( + globals: AttributeMatchGroup, + info_attributes: Option, + case: Case, + source: Source, + collection: gix_attributes::search::MetadataCollection, + ) -> Self { + Attributes { + globals, + stack: Default::default(), + info_attributes, + case, + source, + collection, + } + } +} + +/// Builder +impl Attributes { + /// Set the case to use when matching attributes to paths. + pub fn with_case(mut self, case: gix_glob::pattern::Case) -> Self { + self.case = case; + self + } +} diff --git a/gix-worktree/src/fs/cache/state.rs b/gix-worktree/src/fs/cache/state/ignore.rs similarity index 59% rename from gix-worktree/src/fs/cache/state.rs rename to gix-worktree/src/fs/cache/state/ignore.rs index bfe27ef74ef..d569a0508cf 100644 --- a/gix-worktree/src/fs/cache/state.rs +++ b/gix-worktree/src/fs/cache/state/ignore.rs @@ -1,23 +1,9 @@ -use std::path::Path; - +use crate::fs::cache::state::IgnoreMatchGroup; +use crate::fs::PathOidMapping; use bstr::{BStr, BString, ByteSlice}; use gix_glob::pattern::Case; use gix_hash::oid; - -use crate::fs::{cache::State, PathOidMapping}; - -type AttributeMatchGroup = gix_attributes::Search; -type IgnoreMatchGroup = gix_ignore::Search; - -/// State related to attributes associated with files in the repository. -#[derive(Default, Clone)] -#[allow(unused)] -pub struct Attributes { - /// Attribute patterns that match the currently set directory (in the stack). - pub stack: AttributeMatchGroup, - /// Attribute patterns which aren't tied to the repository root, hence are global. They are consulted last. - pub globals: AttributeMatchGroup, -} +use std::path::Path; /// State related to the exclusion of files. #[derive(Default, Clone)] @@ -35,7 +21,7 @@ pub struct Ignore { /// (index into match groups, index into list of pattern lists, index into pattern list) matched_directory_patterns_stack: Vec>, /// The name of the file to look for in directories. - exclude_file_name_for_directories: BString, + pub(crate) exclude_file_name_for_directories: BString, /// The case to use when matching directories as they are pushed onto the stack. We run them against the exclude engine /// to know if an entire path can be ignored as a parent directory is ignored. case: Case, @@ -183,12 +169,8 @@ impl Ignore { let ignore_blob = find(&attribute_files_in_index[idx].1, buf) .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; let ignore_path = gix_path::from_bstring(ignore_path_relative.into_owned()); - gix_glob::search::add_patterns_buffer( - &mut self.stack.patterns, - ignore_blob.data, - ignore_path, - Some(root), - ); + self.stack + .add_patterns_buffer(ignore_blob.data, ignore_path, Some(root)); } Err(_) => { // Need one stack level per component so push and pop matches. @@ -199,118 +181,3 @@ impl Ignore { Ok(()) } } - -impl Attributes { - /// Create a new instance from an attribute match group that represents `globals`. - /// - /// A stack of attributes will be applied on top of it later. - pub fn new(globals: AttributeMatchGroup) -> Self { - Attributes { - globals, - stack: Default::default(), - } - } -} - -impl From for Attributes { - fn from(group: AttributeMatchGroup) -> Self { - Attributes::new(group) - } -} - -impl State { - /// Configure a state to be suitable for checking out files. - pub fn for_checkout(unlink_on_collision: bool, attributes: Attributes) -> Self { - State::CreateDirectoryAndAttributesStack { - unlink_on_collision, - #[cfg(debug_assertions)] - test_mkdir_calls: 0, - attributes, - } - } - - /// Configure a state for adding files. - pub fn for_add(attributes: Attributes, ignore: Ignore) -> Self { - State::AttributesAndIgnoreStack { attributes, ignore } - } - - /// Configure a state for status retrieval. - pub fn for_status(ignore: Ignore) -> Self { - State::IgnoreStack(ignore) - } -} - -impl State { - /// Returns a vec of tuples of relative index paths along with the best usable OID for either ignore, attribute files or both. - /// - /// - ignores entries which aren't blobs - /// - ignores ignore entries which are not skip-worktree - /// - within merges, picks 'our' stage both for ignore and attribute files. - pub fn build_attribute_list( - &self, - index: &gix_index::State, - paths: &gix_index::PathStorageRef, - case: Case, - ) -> Vec { - let a1_backing; - let a2_backing; - let names = match self { - State::IgnoreStack(v) => { - a1_backing = [(v.exclude_file_name_for_directories.as_bytes().as_bstr(), true)]; - a1_backing.as_ref() - } - State::AttributesAndIgnoreStack { ignore, .. } => { - a2_backing = [ - (ignore.exclude_file_name_for_directories.as_bytes().as_bstr(), true), - (".gitattributes".into(), false), - ]; - a2_backing.as_ref() - } - State::CreateDirectoryAndAttributesStack { .. } => { - a1_backing = [(".gitattributes".into(), true)]; - a1_backing.as_ref() - } - }; - - index - .entries() - .iter() - .filter_map(move |entry| { - let path = entry.path_in(paths); - - // Stage 0 means there is no merge going on, stage 2 means it's 'our' side of the merge, but then - // there won't be a stage 0. - if entry.mode == gix_index::entry::Mode::FILE && (entry.stage() == 0 || entry.stage() == 2) { - let basename = path - .rfind_byte(b'/') - .map(|pos| path[pos + 1..].as_bstr()) - .unwrap_or(path); - let is_ignore = names.iter().find_map(|t| { - match case { - Case::Sensitive => basename == t.0, - Case::Fold => basename.eq_ignore_ascii_case(t.0), - } - .then_some(t.1) - })?; - // See https://github.com/git/git/blob/master/dir.c#L912:L912 - if is_ignore && !entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { - return None; - } - Some((path.to_owned(), entry.id)) - } else { - None - } - }) - .collect() - } - - pub(crate) fn ignore_or_panic(&self) -> &Ignore { - match self { - State::IgnoreStack(v) => v, - State::AttributesAndIgnoreStack { ignore, .. } => ignore, - State::CreateDirectoryAndAttributesStack { .. } => { - unreachable!("BUG: must not try to check excludes without it being setup") - } - } - } -} diff --git a/gix-worktree/src/fs/cache/state/mod.rs b/gix-worktree/src/fs/cache/state/mod.rs new file mode 100644 index 00000000000..91d6ce032f1 --- /dev/null +++ b/gix-worktree/src/fs/cache/state/mod.rs @@ -0,0 +1,135 @@ +use bstr::ByteSlice; +use gix_glob::pattern::Case; +use std::path::PathBuf; + +use crate::fs::{cache::State, PathOidMapping}; + +type AttributeMatchGroup = gix_attributes::Search; +type IgnoreMatchGroup = gix_ignore::Search; + +/// State related to attributes associated with files in the repository. +#[derive(Default, Clone)] +#[allow(unused)] +pub struct Attributes { + /// Attribute patterns which aren't tied to the repository root, hence are global, they contribute first. + globals: AttributeMatchGroup, + /// Attribute patterns that match the currently set directory (in the stack). + /// + /// Note that the root-level file is always loaded, if present, followed by, the `$GIT_DIR/info/attributes`, if present, based + /// on the location of the `info_attributes` file. + stack: AttributeMatchGroup, + /// The first time we push the root, we have to load additional information from this file if it exists along with the root attributes + /// file if possible, and keep them there throughout. + info_attributes: Option, + /// A lookup table to accelerate searches. + collection: gix_attributes::search::MetadataCollection, + /// The case to use when matching directories as they are pushed onto the stack. We run them against the exclude engine + /// to know if an entire path can be ignored as a parent directory is ignored. + case: Case, + /// Where to read `.gitattributes` data from. + source: attributes::Source, +} + +/// +pub mod attributes; +mod ignore; +pub use ignore::Ignore; + +/// Initialization +impl State { + /// Configure a state to be suitable for checking out files. + pub fn for_checkout(unlink_on_collision: bool, attributes: Attributes) -> Self { + State::CreateDirectoryAndAttributesStack { + unlink_on_collision, + #[cfg(debug_assertions)] + test_mkdir_calls: 0, + attributes, + } + } + + /// Configure a state for adding files. + pub fn for_add(attributes: Attributes, ignore: Ignore) -> Self { + State::AttributesAndIgnoreStack { attributes, ignore } + } + + /// Configure a state for status retrieval. + pub fn for_status(ignore: Ignore) -> Self { + State::IgnoreStack(ignore) + } +} + +/// Utilities +impl State { + /// Returns a vec of tuples of relative index paths along with the best usable OID for either ignore, attribute files or both. + /// + /// - ignores entries which aren't blobs + /// - ignores ignore entries which are not skip-worktree + /// - within merges, picks 'our' stage both for ignore and attribute files. + pub fn attribute_list_from_index( + &self, + index: &gix_index::State, + paths: &gix_index::PathStorageRef, + case: Case, + ) -> Vec { + let a1_backing; + let a2_backing; + let names = match self { + State::IgnoreStack(v) => { + a1_backing = [(v.exclude_file_name_for_directories.as_bytes().as_bstr(), true)]; + a1_backing.as_ref() + } + State::AttributesAndIgnoreStack { ignore, .. } => { + a2_backing = [ + (ignore.exclude_file_name_for_directories.as_bytes().as_bstr(), true), + (".gitattributes".into(), false), + ]; + a2_backing.as_ref() + } + State::CreateDirectoryAndAttributesStack { .. } => { + a1_backing = [(".gitattributes".into(), true)]; + a1_backing.as_ref() + } + }; + + index + .entries() + .iter() + .filter_map(move |entry| { + let path = entry.path_in(paths); + + // Stage 0 means there is no merge going on, stage 2 means it's 'our' side of the merge, but then + // there won't be a stage 0. + if entry.mode == gix_index::entry::Mode::FILE && (entry.stage() == 0 || entry.stage() == 2) { + let basename = path + .rfind_byte(b'/') + .map(|pos| path[pos + 1..].as_bstr()) + .unwrap_or(path); + let is_ignore = names.iter().find_map(|t| { + match case { + Case::Sensitive => basename == t.0, + Case::Fold => basename.eq_ignore_ascii_case(t.0), + } + .then_some(t.1) + })?; + // See https://github.com/git/git/blob/master/dir.c#L912:L912 + if is_ignore && !entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { + return None; + } + Some((path.to_owned(), entry.id)) + } else { + None + } + }) + .collect() + } + + pub(crate) fn ignore_or_panic(&self) -> &Ignore { + match self { + State::IgnoreStack(v) => v, + State::AttributesAndIgnoreStack { ignore, .. } => ignore, + State::CreateDirectoryAndAttributesStack { .. } => { + unreachable!("BUG: must not try to check excludes without it being setup") + } + } + } +} diff --git a/gix-worktree/src/index/checkout.rs b/gix-worktree/src/index/checkout.rs index dcfea34afbc..de7e3a88b2c 100644 --- a/gix-worktree/src/index/checkout.rs +++ b/gix-worktree/src/index/checkout.rs @@ -1,4 +1,5 @@ #![allow(missing_docs)] + use bstr::BString; use gix_utils::FilesystemCapabilities; @@ -59,8 +60,8 @@ pub struct Options { /// /// Default true. pub check_stat: bool, - /// A group of attribute patterns that are applied globally, i.e. aren't rooted within the repository itself. - pub attribute_globals: gix_attributes::Search, + /// A stack of attributes to use with the filesystem cache to use as driver for filters. + pub attributes: crate::fs::cache::state::Attributes, } impl Default for Options { @@ -73,7 +74,7 @@ impl Default for Options { trust_ctime: true, check_stat: true, overwrite_existing: false, - attribute_globals: Default::default(), + attributes: Default::default(), } } } diff --git a/gix-worktree/src/index/mod.rs b/gix-worktree/src/index/mod.rs index 2703ebfce80..7cfa1c98d27 100644 --- a/gix-worktree/src/index/mod.rs +++ b/gix-worktree/src/index/mod.rs @@ -59,8 +59,8 @@ where None, ); - let state = fs::cache::State::for_checkout(options.overwrite_existing, options.attribute_globals.clone().into()); - let attribute_files = state.build_attribute_list(index, paths, case); + let state = fs::cache::State::for_checkout(options.overwrite_existing, options.attributes.clone().with_case(case)); + let attribute_files = state.attribute_list_from_index(index, paths, case); let mut ctx = chunk::Context { buf: Vec::new(), path_cache: fs::Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), diff --git a/gix-worktree/tests/worktree/fs/cache/ignore.rs b/gix-worktree/tests/worktree/fs/cache/ignore.rs index e9619fa0093..8736616ac03 100644 --- a/gix-worktree/tests/worktree/fs/cache/ignore.rs +++ b/gix-worktree/tests/worktree/fs/cache/ignore.rs @@ -106,7 +106,7 @@ fn check_against_baseline() -> crate::Result { ), ); let paths_storage = index.take_path_backing(); - let attribute_files_in_index = state.build_attribute_list(&index, &paths_storage, case); + let attribute_files_in_index = state.attribute_list_from_index(&index, &paths_storage, case); assert_eq!( attribute_files_in_index, vec![( From 358500f0efaec7c67b307a6a1aa27ecad7502eb7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 3 Apr 2023 21:14:18 +0200 Subject: [PATCH 13/14] feat: `open::Options` now allow controlling where gitattributes files are loaded from. That way it's possible to, for example, isolate all operations that rely on the `gitattribute` system, like checkouts or additions to the index. --- gix/src/config/cache/access.rs | 91 +++++++++++++++++------- gix/src/config/cache/init.rs | 49 ++++++++----- gix/src/config/mod.rs | 19 +++-- gix/src/open/repository.rs | 10 ++- gix/src/permissions.rs | 2 +- gix/src/repository/permissions.rs | 114 ++++++++++++++++++++++-------- gix/src/worktree/mod.rs | 2 +- gix/tests/object/tree/diff.rs | 1 + gix/tests/repository/object.rs | 17 ++++- 9 files changed, 225 insertions(+), 80 deletions(-) diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 10d3e1c06e1..be06f718ac8 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -1,4 +1,6 @@ #![allow(clippy::result_large_err)] +use gix_attributes::Source; +use gix_glob::pattern::Case; use std::{borrow::Cow, path::PathBuf, time::Duration}; use gix_lock::acquire::Fail; @@ -154,33 +156,30 @@ impl Cache { .unwrap_or(default)) } - fn assemble_attribute_globals( - me: &Cache, - _git_dir: &std::path::Path, - ) -> Result { - let _attributes_file = match me - .trusted_file_path("core", None, Core::ATTRIBUTES_FILE.name) - .transpose()? - { - Some(attributes) => Some(attributes.into_owned()), - None => me.xdg_config_path("attributes").ok().flatten(), - }; - // TODO: implement gix_attributes::Search::from_git_dir(), similar to what's done for `Ignore`. - Ok(Default::default()) - } - let thread_limit = self.apply_leniency( self.resolved .integer_filter_by_key("checkout.workers", &mut self.filter_config_section.clone()) .map(|value| Checkout::WORKERS.try_from_workers(value)), )?; + let capabilities = gix_utils::FilesystemCapabilities { + precompose_unicode: boolean(self, "core.precomposeUnicode", &Core::PRECOMPOSE_UNICODE, false)?, + ignore_case: boolean(self, "core.ignoreCase", &Core::IGNORE_CASE, false)?, + executable_bit: boolean(self, "core.fileMode", &Core::FILE_MODE, true)?, + symlink: boolean(self, "core.symlinks", &Core::SYMLINKS, true)?, + }; + let case = if capabilities.ignore_case { + Case::Fold + } else { + Case::Sensitive + }; Ok(gix_worktree::index::checkout::Options { - fs: gix_utils::FilesystemCapabilities { - precompose_unicode: boolean(self, "core.precomposeUnicode", &Core::PRECOMPOSE_UNICODE, false)?, - ignore_case: boolean(self, "core.ignoreCase", &Core::IGNORE_CASE, false)?, - executable_bit: boolean(self, "core.fileMode", &Core::FILE_MODE, true)?, - symlink: boolean(self, "core.symlinks", &Core::SYMLINKS, true)?, - }, + attributes: self.assemble_attribute_globals( + git_dir, + case, + gix_worktree::fs::cache::state::attributes::Source::AttributeListThenWorktree, + self.attributes, + )?, + fs: capabilities, thread_limit, destination_is_initially_empty: false, overwrite_existing: false, @@ -193,15 +192,57 @@ impl Cache { .map(|v| Core::CHECK_STAT.try_into_checkstat(v)), )? .unwrap_or(true), - attribute_globals: assemble_attribute_globals(self, git_dir)?, }) } + + // TODO: at least one test, maybe related to core.attributesFile configuration. + fn assemble_attribute_globals( + &self, + git_dir: &std::path::Path, + case: gix_glob::pattern::Case, + source: gix_worktree::fs::cache::state::attributes::Source, + attributes: crate::permissions::Attributes, + ) -> Result { + let configured_or_user_attributes = match self + .trusted_file_path("core", None, Core::ATTRIBUTES_FILE.name) + .transpose()? + { + Some(attributes) => Some(attributes), + None => { + if attributes.git { + self.xdg_config_path("attributes").ok().flatten().map(Cow::Owned) + } else { + None + } + } + }; + let attribute_files = [gix_attributes::Source::GitInstallation, gix_attributes::Source::System] + .into_iter() + .filter(|source| match source { + Source::GitInstallation => attributes.git_binary, + Source::System => attributes.system, + Source::Git | Source::Local => unreachable!("we don't offer turning this off right now"), + }) + .filter_map(|source| source.storage_location(&mut Self::make_source_env(self.environment))) + .chain(configured_or_user_attributes); + let info_attributes_path = git_dir.join("info").join("attributes"); + let mut buf = Vec::new(); + let mut collection = gix_attributes::search::MetadataCollection::default(); + Ok(gix_worktree::fs::cache::state::Attributes::new( + gix_attributes::Search::new_globals(attribute_files, &mut buf, &mut collection)?, + Some(info_attributes_path), + case, + source, + collection, + )) + } + pub(crate) fn xdg_config_path( &self, resource_file_name: &str, ) -> Result, gix_sec::permission::Error> { std::env::var_os("XDG_CONFIG_HOME") - .map(|path| (PathBuf::from(path), &self.xdg_config_home_env)) + .map(|path| (PathBuf::from(path), &self.environment.xdg_config_home)) .or_else(|| { gix_path::env::home_dir().map(|mut p| { ( @@ -209,7 +250,7 @@ impl Cache { p.push(".config"); p }, - &self.home_env, + &self.environment.home, ) }) }) @@ -225,6 +266,6 @@ impl Cache { /// We never fail for here even if the permission is set to deny as we `gix-config` will fail later /// if it actually wants to use the home directory - we don't want to fail prematurely. pub(crate) fn home_dir(&self) -> Option { - gix_path::env::home_dir().and_then(|path| self.home_env.check_opt(path)) + gix_path::env::home_dir().and_then(|path| self.environment.home.check_opt(path)) } } diff --git a/gix/src/config/cache/init.rs b/gix/src/config/cache/init.rs index 5f455e9a81c..61806b80d40 100644 --- a/gix/src/config/cache/init.rs +++ b/gix/src/config/cache/init.rs @@ -1,5 +1,6 @@ #![allow(clippy::result_large_err)] use std::borrow::Cow; +use std::ffi::OsString; use gix_sec::Permission; @@ -32,15 +33,16 @@ impl Cache { filter_config_section: fn(&gix_config::file::Metadata) -> bool, git_install_dir: Option<&std::path::Path>, home: Option<&std::path::Path>, - repository::permissions::Environment { + environment @ repository::permissions::Environment { git_prefix, - home: home_env, - xdg_config_home: xdg_config_home_env, ssh_prefix: _, + xdg_config_home: _, + home: _, http_transport, identity, objects, }: repository::permissions::Environment, + attributes: repository::permissions::Attributes, repository::permissions::Config { git_binary: use_installation, system: use_system, @@ -69,8 +71,6 @@ impl Cache { }; let config = { - let home_env = &home_env; - let xdg_config_home_env = &xdg_config_home_env; let git_prefix = &git_prefix; let metas = [ gix_config::source::Kind::GitInstallation, @@ -88,15 +88,7 @@ impl Cache { _ => {} } source - .storage_location(&mut |name| { - match name { - git_ if git_.starts_with("GIT_") => Some(git_prefix), - "XDG_CONFIG_HOME" => Some(xdg_config_home_env), - "HOME" => Some(home_env), - _ => None, - } - .and_then(|perm| perm.check_opt(name).and_then(gix_path::env::var)) - }) + .storage_location(&mut Self::make_source_env(environment)) .map(|p| (source, p.into_owned())) }) .map(|(source, path)| gix_config::file::Metadata { @@ -175,9 +167,9 @@ impl Cache { ignore_case, hex_len, filter_config_section, - xdg_config_home_env, - home_env, + environment, lenient_config, + attributes, user_agent: Default::default(), personas: Default::default(), url_rewrite: Default::default(), @@ -240,6 +232,31 @@ impl Cache { Ok(()) } + + pub(crate) fn make_source_env( + crate::permissions::Environment { + xdg_config_home, + git_prefix, + home, + .. + }: crate::permissions::Environment, + ) -> impl FnMut(&str) -> Option { + move |name| { + match name { + git_ if git_.starts_with("GIT_") => Some(git_prefix), + "XDG_CONFIG_HOME" => Some(xdg_config_home), + "HOME" => { + return if home.is_allowed() { + gix_path::env::home_dir().map(Into::into) + } else { + None + } + } + _ => None, + } + .and_then(|perm| perm.check_opt(name).and_then(gix_path::env::var)) + } + } } impl crate::Repository { diff --git a/gix/src/config/mod.rs b/gix/src/config/mod.rs index d9e53e37481..8fe8ce53f35 100644 --- a/gix/src/config/mod.rs +++ b/gix/src/config/mod.rs @@ -113,6 +113,19 @@ pub mod checkout_options { ConfigBoolean(#[from] super::boolean::Error), #[error(transparent)] CheckoutWorkers(#[from] super::checkout::workers::Error), + #[error(transparent)] + Attributes(#[from] super::attribute_stack::Error), + } +} + +/// +pub mod attribute_stack { + /// The error produced when setting up the attribute stack to query `gitattributes`. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("An attribute file could not be read")] + Io(#[from] std::io::Error), #[error("Failed to interpolate the attribute file configured at `core.attributesFile`")] AttributesFileInterpolation(#[from] gix_config::path::interpolate::Error), } @@ -449,9 +462,7 @@ pub(crate) struct Cache { /// If true, we should default what's possible if something is misconfigured, on case by case basis, to be more resilient. /// Also available in options! Keep in sync! pub lenient_config: bool, - /// Define how we can use values obtained with `xdg_config(…)` and its `XDG_CONFIG_HOME` variable. - xdg_config_home_env: gix_sec::Permission, - /// Define how we can use values obtained with `xdg_config(…)`. and its `HOME` variable. - home_env: gix_sec::Permission, + attributes: crate::permissions::Attributes, + environment: crate::permissions::Environment, // TODO: make core.precomposeUnicode available as well. } diff --git a/gix/src/open/repository.rs b/gix/src/open/repository.rs index 2164a143950..4163ce2618f 100644 --- a/gix/src/open/repository.rs +++ b/gix/src/open/repository.rs @@ -146,7 +146,12 @@ impl ThreadSafeRepository { lenient_config, bail_if_untrusted, open_path_as_is: _, - permissions: Permissions { ref env, config }, + permissions: + Permissions { + ref env, + config, + attributes, + }, ref api_config_overrides, ref cli_config_overrides, ref current_dir, @@ -190,7 +195,8 @@ impl ThreadSafeRepository { filter_config_section, git_install_dir.as_deref(), home.as_deref(), - env.clone(), + *env, + attributes, config, lenient_config, api_config_overrides, diff --git a/gix/src/permissions.rs b/gix/src/permissions.rs index f64bb3bc2c6..c1838bf27df 100644 --- a/gix/src/permissions.rs +++ b/gix/src/permissions.rs @@ -1 +1 @@ -pub use crate::repository::permissions::{Config, Environment}; +pub use crate::repository::permissions::{Attributes, Config, Environment}; diff --git a/gix/src/repository/permissions.rs b/gix/src/repository/permissions.rs index 88b61b73903..d6d0f6ee72c 100644 --- a/gix/src/repository/permissions.rs +++ b/gix/src/repository/permissions.rs @@ -3,10 +3,12 @@ use gix_sec::Trust; /// Permissions associated with various resources of a git repository #[derive(Debug, Clone)] pub struct Permissions { - /// Permissions related to the environment + /// Control which environment variables may be accessed. pub env: Environment, - /// Permissions related to the handling of git configuration. + /// Permissions related where git configuration should be loaded from. pub config: Config, + /// Permissions related to where `gitattributes` should be loaded from. + pub attributes: Attributes, } /// Configure from which sources git configuration may be loaded. @@ -17,7 +19,7 @@ pub struct Config { /// The git binary may come with configuration as part of its configuration, and if this is true (default false) /// we will load the configuration of the git binary, if present and not a duplicate of the ones below. /// - /// It's disable by default as it involves executing the git binary once per execution of the application. + /// It's disabled by default as it may involve executing the git binary once per execution of the application. pub git_binary: bool, /// Whether to use the system configuration. /// This is defined as `$(prefix)/etc/gitconfig` on unix. @@ -50,6 +52,18 @@ impl Config { includes: true, } } + + /// Load only configuration local to the git repository. + pub fn isolated() -> Self { + Config { + git_binary: false, + system: false, + git: false, + user: false, + env: false, + includes: false, + } + } } impl Default for Config { @@ -58,8 +72,55 @@ impl Default for Config { } } +/// Configure from which `gitattribute` files may be loaded. +/// +/// Note that `.gitattribute` files from within the repository are always loaded. +#[derive(Copy, Clone, Ord, PartialOrd, PartialEq, Eq, Debug, Hash)] +pub struct Attributes { + /// The git binary may come with attribute configuration in its installation directory, and if this is true (default false) + /// we will load the configuration of the git binary. + /// + /// It's disabled by default as it involves executing the git binary once per execution of the application. + pub git_binary: bool, + /// Whether to use the system configuration. + /// This is typically defined as `$(prefix)/etc/gitconfig`. + pub system: bool, + /// Whether to use the git application configuration. + /// + /// A platform defined location for where a user's git application configuration should be located. + /// If `$XDG_CONFIG_HOME` is not set or empty, `$HOME/.config/git/attributes` will be used + /// on unix. + pub git: bool, +} + +impl Attributes { + /// Allow everything which usually relates to a fully trusted environment + pub fn all() -> Self { + Attributes { + git_binary: false, + system: true, + git: true, + } + } + + /// Allow loading attributes that are local to the git repository. + pub fn isolated() -> Self { + Attributes { + git_binary: false, + system: false, + git: false, + } + } +} + +impl Default for Attributes { + fn default() -> Self { + Self::all() + } +} + /// Permissions related to the usage of environment variables -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub struct Environment { /// Control whether resources pointed to by `XDG_CONFIG_HOME` can be used when looking up common configuration values. /// @@ -101,18 +162,29 @@ impl Environment { objects: allow, } } + + /// Don't allow loading any environment variables. + pub fn isolated() -> Self { + let deny = gix_sec::Permission::Deny; + Environment { + xdg_config_home: deny, + home: deny, + ssh_prefix: deny, + git_prefix: deny, + http_transport: deny, + identity: deny, + objects: deny, + } + } } impl Permissions { - /// Return permissions that will not include configuration files not owned by the current user, - /// but trust system and global configuration files along with those which are owned by the current user. - /// - /// This allows to read and write repositories even if they aren't owned by the current user, but avoid using - /// anything else that could cause us to write into unknown locations or use programs beyond our `PATH`. + /// Secure permissions are similar to `all()` pub fn secure() -> Self { Permissions { env: Environment::all(), config: Config::all(), + attributes: Attributes::all(), } } @@ -122,32 +194,16 @@ impl Permissions { Permissions { env: Environment::all(), config: Config::all(), + attributes: Attributes::all(), } } /// Don't read any but the local git configuration and deny reading any environment variables. pub fn isolated() -> Self { Permissions { - config: Config { - git_binary: false, - system: false, - git: false, - user: false, - env: false, - includes: false, - }, - env: { - let deny = gix_sec::Permission::Deny; - Environment { - xdg_config_home: deny, - home: deny, - ssh_prefix: deny, - git_prefix: deny, - http_transport: deny, - identity: deny, - objects: deny, - } - }, + config: Config::isolated(), + attributes: Attributes::isolated(), + env: Environment::isolated(), } } } diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index 3051e69f0db..005393087ab 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -151,7 +151,7 @@ pub mod excludes { None, case, )); - let attribute_list = state.build_attribute_list(index, index.path_backing(), case); + let attribute_list = state.attribute_list_from_index(index, index.path_backing(), case); Ok(gix_worktree::fs::Cache::new( self.path, state, diff --git a/gix/tests/object/tree/diff.rs b/gix/tests/object/tree/diff.rs index 6efa203f9b8..8a4e6836ab9 100644 --- a/gix/tests/object/tree/diff.rs +++ b/gix/tests/object/tree/diff.rs @@ -153,6 +153,7 @@ mod track_rewrites { for percentage in [None, Some(0.5)] { let mut actual = Vec::new(); + #[cfg_attr(windows, allow(unused_variables))] let out = from .changes()? .track_path() diff --git a/gix/tests/repository/object.rs b/gix/tests/repository/object.rs index 6419db26ee8..ab9deafe72d 100644 --- a/gix/tests/repository/object.rs +++ b/gix/tests/repository/object.rs @@ -144,12 +144,19 @@ mod tag { } mod commit_as { + use crate::util::restricted_and_git; use gix_testtools::tempfile; #[test] fn specify_committer_and_author() -> crate::Result { let tmp = tempfile::tempdir()?; - let repo = gix::open_opts(gix::init(&tmp)?.path(), crate::restricted())?; + let repo = gix::ThreadSafeRepository::init_opts( + &tmp, + gix::create::Kind::WithWorktree, + Default::default(), + restricted_and_git(), + )? + .to_thread_local(); let empty_tree = repo.empty_tree(); let committer = gix::actor::Signature { name: "c".into(), @@ -210,7 +217,13 @@ mod commit { fn single_line_initial_commit_empty_tree_ref_nonexisting() -> crate::Result { let _env = freeze_time(); let tmp = tempfile::tempdir()?; - let repo = gix::open_opts(gix::init(&tmp)?.path(), restricted_and_git())?; + let repo = gix::ThreadSafeRepository::init_opts( + &tmp, + gix::create::Kind::WithWorktree, + Default::default(), + restricted_and_git(), + )? + .to_thread_local(); let empty_tree_id = repo.write_object(&gix::objs::Tree::empty())?; let commit_id = repo.commit("HEAD", "initial", empty_tree_id, gix::commit::NO_PARENT_IDS)?; assert_eq!( From 923712175fffaa6d0b71e109f243bdd6bea3ff36 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 5 Apr 2023 19:15:35 +0200 Subject: [PATCH 14/14] Assure we load all gitattributes when needed. This is an on-demand operation anyway, but now we turn on the loading of git binary-specific gitattributes which should help to emulate gits behaviour perfectly. --- gitoxide-core/src/index/checkout.rs | 2 +- src/plumbing/main.rs | 1 + .../refs/remote ref-list-no-networking-in-small-failure | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index 7b3ed28ea04..30268adfa79 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -56,7 +56,7 @@ pub fn checkout_exclusive( } let opts = gix::worktree::index::checkout::Options { - fs: gix::worktree::fs::Capabilities::probe(dest_directory), + fs: gix::utils::FilesystemCapabilities::probe(dest_directory), destination_is_initially_empty: true, overwrite_existing: false, diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 3c0af9d0549..3f35d5b2a0d 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -84,6 +84,7 @@ pub fn main() -> Result<()> { ); let to_match_settings = |mut opts: gix::open::Options| { opts.permissions.config.git_binary = git_installation; + opts.permissions.attributes.git_binary = git_installation; if config.is_empty() { opts } else { diff --git a/tests/snapshots/plumbing/repository/remote/refs/remote ref-list-no-networking-in-small-failure b/tests/snapshots/plumbing/repository/remote/refs/remote ref-list-no-networking-in-small-failure index b63991b2187..b6c897ecd38 100644 --- a/tests/snapshots/plumbing/repository/remote/refs/remote ref-list-no-networking-in-small-failure +++ b/tests/snapshots/plumbing/repository/remote/refs/remote ref-list-no-networking-in-small-failure @@ -1,7 +1,7 @@ error: unrecognized subcommand 'remote' - note: subcommands 'r', 'tree', 'free' exist - note: to pass 'remote' as a value, use 'gix -- remote' + tip: some similar subcommands exist: 'r', 'tree', 'free' + tip: to pass 'remote' as a value, use 'gix -- remote' Usage: gix [OPTIONS]