Skip to content

Commit e7b2ac1

Browse files
committed
feat: add current_dir(precompose_unicode).
1 parent 69c8232 commit e7b2ac1

File tree

6 files changed

+65
-7
lines changed

6 files changed

+65
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-fs/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ serde = ["dep:serde"]
1818

1919
[dependencies]
2020
gix-features = { version = "^0.37.1", path = "../gix-features" }
21+
gix-utils = { version = "^0.1.8", path = "../gix-utils" }
2122
serde = { version = "1.0.114", optional = true, default-features = false, features = ["std", "derive"] }
2223

2324
[dev-dependencies]

gix-fs/src/lib.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@ use std::path::PathBuf;
88
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
99
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
1010
pub struct Capabilities {
11-
/// If true, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that
12-
/// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
13-
/// using it. This also applies to input received from the command-line, so callers may have to be aware of this and
14-
/// perform conversions accordingly.
15-
/// If false, no conversions will be performed.
11+
/// If `true`, the filesystem will consider the precomposed umlaut `ä` similiar to its decomposed form `"a\u{308}"` and consider them the same.
12+
/// If `false`, the filesystem will only see bytes which means that the above example could live side-by-side.
13+
///
14+
/// Even though a filesystem that treats both forms the same will still reproduce the exact same byte sequence during traversal for instance,
15+
/// this might also mean that we see paths in their decomposed form (this happens when creating directory `ä` in MacOS Finder for example).
16+
///
17+
/// If Git would store such decomposed paths in the repository, which only sees bytes, on linux this might mean the path will look strange
18+
/// at best, which is why it prefers to store precomposed unicode on systems where it matters, like MacOS and Windows.
19+
///
20+
/// For best compatibility, and with this value being `true`, we will turn decomposed paths and input like command-line arguments into their
21+
/// precomposed forms, so no decomposed byte sequences should end up in storage.
1622
pub precompose_unicode: bool,
1723
/// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`.
1824
/// This is also called case-folding.
@@ -35,6 +41,22 @@ pub mod symlink;
3541
///
3642
pub mod dir;
3743

44+
/// Like [`std::env::current_dir()`], but it will `precompose_unicode` if that value is true, if the current directory
45+
/// is valid unicode and if there are decomposed unicode codepoints.
46+
///
47+
/// Thus, it will turn `"a\u{308}"` into `ä` if `true`.
48+
/// Keeping it `false` will not alter the output.
49+
///
50+
/// Note that `precompose_unicode` most be set using the `core.precomposeUnicode` git configuration.
51+
pub fn current_dir(precompose_unicode: bool) -> std::io::Result<PathBuf> {
52+
let cwd = std::env::current_dir()?;
53+
Ok(if precompose_unicode {
54+
gix_utils::str::precompose_path(cwd.into()).into_owned()
55+
} else {
56+
cwd
57+
})
58+
}
59+
3860
/// A stack of path components with the delegation of side-effects as the currently set path changes, component by component.
3961
#[derive(Clone)]
4062
pub struct Stack {

gix-fs/tests/capabilities/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
fn probe() {
33
let dir = tempfile::tempdir().unwrap();
44
std::fs::File::create(dir.path().join("config")).unwrap();
5-
let ctx = gix_fs::Capabilities::probe(dir.path());
6-
dbg!(ctx);
5+
gix_fs::Capabilities::probe(dir.path());
6+
77
let entries: Vec<_> = std::fs::read_dir(dir.path())
88
.unwrap()
99
.filter_map(Result::ok)

gix-fs/tests/current_dir.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#[test]
2+
fn precompose_unicode() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
3+
let tmp = tempfile::TempDir::new()?;
4+
5+
let decomposed = "a\u{308}";
6+
// Note that even on MacOS decomposition doesn't happen reliably, so we assure it's decomposed
7+
// which then should work everywhere.
8+
let cwd = tmp.path().join(decomposed);
9+
10+
std::fs::create_dir(&cwd)?;
11+
std::env::set_current_dir(&cwd)?;
12+
13+
let keep_as_is = false;
14+
let dirname = gix_fs::current_dir(keep_as_is)?
15+
.file_name()
16+
.expect("present")
17+
.to_str()
18+
.expect("no illformed unicode")
19+
.to_owned();
20+
21+
assert_eq!(dirname.chars().count(), decomposed.chars().count());
22+
23+
let precomposed = "ä";
24+
let precompose_unicode = true;
25+
let dirname = gix_fs::current_dir(precompose_unicode)?
26+
.file_name()
27+
.expect("present")
28+
.to_str()
29+
.expect("no illformed unicode")
30+
.to_owned();
31+
assert_eq!(dirname.chars().count(), precomposed.chars().count());
32+
Ok(())
33+
}

gix-fs/tests/fs.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
type Result<T = ()> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync + 'static>>;
2+
23
mod capabilities;
34
mod dir;
45
mod stack;

0 commit comments

Comments
 (0)