Skip to content

Commit a15c379

Browse files
committed
Handle win32 separator & prefixes for cygwin paths
1 parent 99426c5 commit a15c379

File tree

6 files changed

+627
-173
lines changed

6 files changed

+627
-173
lines changed

library/std/src/path.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,8 +1315,17 @@ impl PathBuf {
13151315
need_sep = false
13161316
}
13171317

1318+
let need_clear = if cfg!(target_os = "cygwin") {
1319+
// If path is absolute and its prefix is none, it is like `/foo`,
1320+
// and will be handled below.
1321+
path.prefix().is_some()
1322+
} else {
1323+
// On Unix: prefix is always None.
1324+
path.is_absolute() || path.prefix().is_some()
1325+
};
1326+
13181327
// absolute `path` replaces `self`
1319-
if path.is_absolute() || path.prefix().is_some() {
1328+
if need_clear {
13201329
self.inner.truncate(0);
13211330

13221331
// verbatim paths need . and .. removed
@@ -3615,6 +3624,9 @@ impl Error for NormalizeError {}
36153624
/// paths, this is currently equivalent to calling
36163625
/// [`GetFullPathNameW`][windows-path].
36173626
///
3627+
/// On Cygwin, this is currently equivalent to calling [`cygwin_conv_path`][cygwin-path]
3628+
/// with mode `CCP_WIN_A_TO_POSIX`.
3629+
///
36183630
/// Note that these [may change in the future][changes].
36193631
///
36203632
/// # Errors
@@ -3667,11 +3679,36 @@ impl Error for NormalizeError {}
36673679
/// # fn main() {}
36683680
/// ```
36693681
///
3682+
/// ## Cygwin paths
3683+
///
3684+
/// ```
3685+
/// # #[cfg(target_os = "cygwin")]
3686+
/// fn main() -> std::io::Result<()> {
3687+
/// use std::path::{self, Path};
3688+
///
3689+
/// // Relative to absolute
3690+
/// let absolute = path::absolute("foo/./bar")?;
3691+
/// assert!(absolute.ends_with(r"foo/bar"));
3692+
///
3693+
/// // Windows absolute to absolute
3694+
/// let absolute = path::absolute(r"C:\foo//test\..\./bar.rs")?;
3695+
/// assert!(absolute.ends_with("/c/foo/bar.rs"));
3696+
///
3697+
/// // POSIX absolute to absolute
3698+
/// let absolute = path::absolute("/foo//test/.././bar.rs")?;
3699+
/// assert_eq!(absolute, Path::new("/foo//test/.././bar.rs"));
3700+
/// Ok(())
3701+
/// }
3702+
/// # #[cfg(not(target_os = "cygwin"))]
3703+
/// # fn main() {}
3704+
/// ```
3705+
///
36703706
/// Note that this [may change in the future][changes].
36713707
///
36723708
/// [changes]: io#platform-specific-behavior
36733709
/// [posix-semantics]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
36743710
/// [windows-path]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
3711+
/// [cygwin-path]: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
36753712
#[stable(feature = "absolute_path", since = "1.79.0")]
36763713
pub fn absolute<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
36773714
let path = path.as_ref();

library/std/src/sys/path/cygwin.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use crate::ffi::OsString;
2+
use crate::os::unix::ffi::OsStringExt;
3+
use crate::path::{Path, PathBuf};
4+
use crate::sys::common::small_c_string::run_path_with_cstr;
5+
use crate::sys::cvt;
6+
use crate::{io, ptr};
7+
8+
#[inline]
9+
pub fn is_sep_byte(b: u8) -> bool {
10+
b == b'/' || b == b'\\'
11+
}
12+
13+
/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\`
14+
/// internally when calling Win32 APIs. Therefore, the server component of path
15+
/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost`
16+
/// on Cygwin.
17+
#[inline]
18+
pub fn is_verbatim_sep(b: u8) -> bool {
19+
b == b'/' || b == b'\\'
20+
}
21+
22+
pub use super::windows_prefix::parse_prefix;
23+
24+
pub const MAIN_SEP_STR: &str = "/";
25+
pub const MAIN_SEP: char = '/';
26+
27+
unsafe extern "C" {
28+
// Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
29+
// Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902
30+
// Safety:
31+
// * `what` should be `CCP_WIN_A_TO_POSIX` here
32+
// * `from` is null-terminated UTF-8 path
33+
// * `to` is buffer, the buffer size is `size`.
34+
//
35+
// Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path.
36+
fn cygwin_conv_path(
37+
what: libc::c_uint,
38+
from: *const libc::c_char,
39+
to: *mut u8,
40+
size: libc::size_t,
41+
) -> libc::ssize_t;
42+
}
43+
44+
const CCP_WIN_A_TO_POSIX: libc::c_uint = 2;
45+
46+
/// Make a POSIX path absolute.
47+
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
48+
run_path_with_cstr(path, &|path| {
49+
let conv = CCP_WIN_A_TO_POSIX;
50+
let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?;
51+
// If success, size should not be 0.
52+
debug_assert!(size >= 1);
53+
let size = size as usize;
54+
let mut buffer = Vec::with_capacity(size);
55+
cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?;
56+
unsafe {
57+
buffer.set_len(size - 1);
58+
}
59+
Ok(PathBuf::from(OsString::from_vec(buffer)))
60+
})
61+
}
62+
63+
pub(crate) fn is_absolute(path: &Path) -> bool {
64+
path.has_root()
65+
}

library/std/src/sys/path/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cfg_if::cfg_if! {
22
if #[cfg(target_os = "windows")] {
33
mod windows;
4+
mod windows_prefix;
45
pub use windows::*;
56
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
67
mod sgx;
@@ -11,6 +12,10 @@ cfg_if::cfg_if! {
1112
} else if #[cfg(target_os = "uefi")] {
1213
mod uefi;
1314
pub use uefi::*;
15+
} else if #[cfg(target_os = "cygwin")] {
16+
mod cygwin;
17+
mod windows_prefix;
18+
pub use cygwin::*;
1419
} else {
1520
mod unix;
1621
pub use unix::*;

library/std/src/sys/path/windows.rs

Lines changed: 3 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
use crate::ffi::{OsStr, OsString};
2-
use crate::path::{Path, PathBuf, Prefix};
2+
use crate::path::{Path, PathBuf};
33
use crate::sys::api::utf16;
44
use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
55
use crate::{io, ptr};
66

77
#[cfg(test)]
88
mod tests;
99

10+
pub use super::windows_prefix::parse_prefix;
11+
1012
pub const MAIN_SEP_STR: &str = "\\";
1113
pub const MAIN_SEP: char = '\\';
1214

@@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
7779
path.into()
7880
}
7981

80-
struct PrefixParser<'a, const LEN: usize> {
81-
path: &'a OsStr,
82-
prefix: [u8; LEN],
83-
}
84-
85-
impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
86-
#[inline]
87-
fn get_prefix(path: &OsStr) -> [u8; LEN] {
88-
let mut prefix = [0; LEN];
89-
// SAFETY: Only ASCII characters are modified.
90-
for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
91-
prefix[i] = if ch == b'/' { b'\\' } else { ch };
92-
}
93-
prefix
94-
}
95-
96-
fn new(path: &'a OsStr) -> Self {
97-
Self { path, prefix: Self::get_prefix(path) }
98-
}
99-
100-
fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
101-
PrefixParserSlice {
102-
path: self.path,
103-
prefix: &self.prefix[..LEN.min(self.path.len())],
104-
index: 0,
105-
}
106-
}
107-
}
108-
109-
struct PrefixParserSlice<'a, 'b> {
110-
path: &'a OsStr,
111-
prefix: &'b [u8],
112-
index: usize,
113-
}
114-
115-
impl<'a> PrefixParserSlice<'a, '_> {
116-
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
117-
self.prefix[self.index..]
118-
.starts_with(prefix.as_bytes())
119-
.then_some(Self { index: self.index + prefix.len(), ..*self })
120-
}
121-
122-
fn prefix_bytes(&self) -> &'a [u8] {
123-
&self.path.as_encoded_bytes()[..self.index]
124-
}
125-
126-
fn finish(self) -> &'a OsStr {
127-
// SAFETY: The unsafety here stems from converting between &OsStr and
128-
// &[u8] and back. This is safe to do because (1) we only look at ASCII
129-
// contents of the encoding and (2) new &OsStr values are produced only
130-
// from ASCII-bounded slices of existing &OsStr values.
131-
unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
132-
}
133-
}
134-
135-
pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
136-
use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
137-
138-
let parser = PrefixParser::<8>::new(path);
139-
let parser = parser.as_slice();
140-
if let Some(parser) = parser.strip_prefix(r"\\") {
141-
// \\
142-
143-
// The meaning of verbatim paths can change when they use a different
144-
// separator.
145-
if let Some(parser) = parser.strip_prefix(r"?\")
146-
&& !parser.prefix_bytes().iter().any(|&x| x == b'/')
147-
{
148-
// \\?\
149-
if let Some(parser) = parser.strip_prefix(r"UNC\") {
150-
// \\?\UNC\server\share
151-
152-
let path = parser.finish();
153-
let (server, path) = parse_next_component(path, true);
154-
let (share, _) = parse_next_component(path, true);
155-
156-
Some(VerbatimUNC(server, share))
157-
} else {
158-
let path = parser.finish();
159-
160-
// in verbatim paths only recognize an exact drive prefix
161-
if let Some(drive) = parse_drive_exact(path) {
162-
// \\?\C:
163-
Some(VerbatimDisk(drive))
164-
} else {
165-
// \\?\prefix
166-
let (prefix, _) = parse_next_component(path, true);
167-
Some(Verbatim(prefix))
168-
}
169-
}
170-
} else if let Some(parser) = parser.strip_prefix(r".\") {
171-
// \\.\COM42
172-
let path = parser.finish();
173-
let (prefix, _) = parse_next_component(path, false);
174-
Some(DeviceNS(prefix))
175-
} else {
176-
let path = parser.finish();
177-
let (server, path) = parse_next_component(path, false);
178-
let (share, _) = parse_next_component(path, false);
179-
180-
if !server.is_empty() && !share.is_empty() {
181-
// \\server\share
182-
Some(UNC(server, share))
183-
} else {
184-
// no valid prefix beginning with "\\" recognized
185-
None
186-
}
187-
}
188-
} else {
189-
// If it has a drive like `C:` then it's a disk.
190-
// Otherwise there is no prefix.
191-
parse_drive(path).map(Disk)
192-
}
193-
}
194-
195-
// Parses a drive prefix, e.g. "C:" and "C:\whatever"
196-
fn parse_drive(path: &OsStr) -> Option<u8> {
197-
// In most DOS systems, it is not possible to have more than 26 drive letters.
198-
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
199-
fn is_valid_drive_letter(drive: &u8) -> bool {
200-
drive.is_ascii_alphabetic()
201-
}
202-
203-
match path.as_encoded_bytes() {
204-
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
205-
_ => None,
206-
}
207-
}
208-
209-
// Parses a drive prefix exactly, e.g. "C:"
210-
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
211-
// only parse two bytes: the drive letter and the drive separator
212-
if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
213-
parse_drive(path)
214-
} else {
215-
None
216-
}
217-
}
218-
219-
// Parse the next path component.
220-
//
221-
// Returns the next component and the rest of the path excluding the component and separator.
222-
// Does not recognize `/` as a separator character if `verbatim` is true.
223-
fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
224-
let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
225-
226-
match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
227-
Some(separator_start) => {
228-
let separator_end = separator_start + 1;
229-
230-
let component = &path.as_encoded_bytes()[..separator_start];
231-
232-
// Panic safe
233-
// The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
234-
let path = &path.as_encoded_bytes()[separator_end..];
235-
236-
// SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
237-
// is encoded in a single byte, therefore `bytes[separator_start]` and
238-
// `bytes[separator_end]` must be code point boundaries and thus
239-
// `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
240-
unsafe {
241-
(
242-
OsStr::from_encoded_bytes_unchecked(component),
243-
OsStr::from_encoded_bytes_unchecked(path),
244-
)
245-
}
246-
}
247-
None => (path, OsStr::new("")),
248-
}
249-
}
250-
25182
/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
25283
///
25384
/// This path may or may not have a verbatim prefix.

0 commit comments

Comments
 (0)