Skip to content

Handle win32 separator for cygwin paths #141864

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1315,8 +1315,17 @@ impl PathBuf {
need_sep = false
}

let need_clear = if cfg!(target_os = "cygwin") {
// If path is absolute and its prefix is none, it is like `/foo`,
// and will be handled below.
path.prefix().is_some()
} else {
// On Unix: prefix is always None.
path.is_absolute() || path.prefix().is_some()
};

// absolute `path` replaces `self`
if path.is_absolute() || path.prefix().is_some() {
if need_clear {
self.inner.truncate(0);

// verbatim paths need . and .. removed
Expand Down Expand Up @@ -3615,6 +3624,9 @@ impl Error for NormalizeError {}
/// paths, this is currently equivalent to calling
/// [`GetFullPathNameW`][windows-path].
///
/// On Cygwin, this is currently equivalent to calling [`cygwin_conv_path`][cygwin-path]
/// with mode `CCP_WIN_A_TO_POSIX`.
///
/// Note that these [may change in the future][changes].
///
/// # Errors
Expand Down Expand Up @@ -3667,11 +3679,36 @@ impl Error for NormalizeError {}
/// # fn main() {}
/// ```
///
/// ## Cygwin paths
///
/// ```
/// # #[cfg(target_os = "cygwin")]
/// fn main() -> std::io::Result<()> {
/// use std::path::{self, Path};
///
/// // Relative to absolute
/// let absolute = path::absolute("foo/./bar")?;
/// assert!(absolute.ends_with(r"foo/bar"));
///
/// // Windows absolute to absolute
/// let absolute = path::absolute(r"C:\foo//test\..\./bar.rs")?;
/// assert!(absolute.ends_with("/c/foo/bar.rs"));
///
/// // POSIX absolute to absolute
/// let absolute = path::absolute("/foo//test/.././bar.rs")?;
/// assert_eq!(absolute, Path::new("/foo//test/.././bar.rs"));
/// Ok(())
/// }
/// # #[cfg(not(target_os = "cygwin"))]
/// # fn main() {}
/// ```
///
/// Note that this [may change in the future][changes].
///
/// [changes]: io#platform-specific-behavior
/// [posix-semantics]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
/// [windows-path]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
/// [cygwin-path]: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
#[stable(feature = "absolute_path", since = "1.79.0")]
pub fn absolute<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
let path = path.as_ref();
Expand Down
65 changes: 65 additions & 0 deletions library/std/src/sys/path/cygwin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use crate::ffi::OsString;
use crate::os::unix::ffi::OsStringExt;
use crate::path::{Path, PathBuf};
use crate::sys::common::small_c_string::run_path_with_cstr;
use crate::sys::cvt;
use crate::{io, ptr};

#[inline]
pub fn is_sep_byte(b: u8) -> bool {
b == b'/' || b == b'\\'
}

/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\`
/// internally when calling Win32 APIs. Therefore, the server component of path
/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost`
/// on Cygwin.
#[inline]
pub fn is_verbatim_sep(b: u8) -> bool {
b == b'/' || b == b'\\'
}

pub use super::windows_prefix::parse_prefix;

pub const MAIN_SEP_STR: &str = "/";
pub const MAIN_SEP: char = '/';

unsafe extern "C" {
// Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
// Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902
// Safety:
// * `what` should be `CCP_WIN_A_TO_POSIX` here
// * `from` is null-terminated UTF-8 path
// * `to` is buffer, the buffer size is `size`.
//
// Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path.
fn cygwin_conv_path(
what: libc::c_uint,
from: *const libc::c_char,
to: *mut u8,
size: libc::size_t,
) -> libc::ssize_t;
}

const CCP_WIN_A_TO_POSIX: libc::c_uint = 2;

/// Make a POSIX path absolute.
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
run_path_with_cstr(path, &|path| {
let conv = CCP_WIN_A_TO_POSIX;
let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?;
// If success, size should not be 0.
debug_assert!(size >= 1);
let size = size as usize;
let mut buffer = Vec::with_capacity(size);
cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?;
unsafe {
buffer.set_len(size - 1);
}
Ok(PathBuf::from(OsString::from_vec(buffer)))
})
}

pub(crate) fn is_absolute(path: &Path) -> bool {
path.has_root()
}
5 changes: 5 additions & 0 deletions library/std/src/sys/path/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
cfg_if::cfg_if! {
if #[cfg(target_os = "windows")] {
mod windows;
mod windows_prefix;
pub use windows::*;
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
mod sgx;
Expand All @@ -11,6 +12,10 @@ cfg_if::cfg_if! {
} else if #[cfg(target_os = "uefi")] {
mod uefi;
pub use uefi::*;
} else if #[cfg(target_os = "cygwin")] {
mod cygwin;
mod windows_prefix;
pub use cygwin::*;
} else {
mod unix;
pub use unix::*;
Expand Down
175 changes: 3 additions & 172 deletions library/std/src/sys/path/windows.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use crate::ffi::{OsStr, OsString};
use crate::path::{Path, PathBuf, Prefix};
use crate::path::{Path, PathBuf};
use crate::sys::api::utf16;
use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
use crate::{io, ptr};

#[cfg(test)]
mod tests;

pub use super::windows_prefix::parse_prefix;

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';

Expand Down Expand Up @@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
path.into()
}

struct PrefixParser<'a, const LEN: usize> {
path: &'a OsStr,
prefix: [u8; LEN],
}

impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
#[inline]
fn get_prefix(path: &OsStr) -> [u8; LEN] {
let mut prefix = [0; LEN];
// SAFETY: Only ASCII characters are modified.
for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
prefix[i] = if ch == b'/' { b'\\' } else { ch };
}
prefix
}

fn new(path: &'a OsStr) -> Self {
Self { path, prefix: Self::get_prefix(path) }
}

fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
PrefixParserSlice {
path: self.path,
prefix: &self.prefix[..LEN.min(self.path.len())],
index: 0,
}
}
}

struct PrefixParserSlice<'a, 'b> {
path: &'a OsStr,
prefix: &'b [u8],
index: usize,
}

impl<'a> PrefixParserSlice<'a, '_> {
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
self.prefix[self.index..]
.starts_with(prefix.as_bytes())
.then_some(Self { index: self.index + prefix.len(), ..*self })
}

fn prefix_bytes(&self) -> &'a [u8] {
&self.path.as_encoded_bytes()[..self.index]
}

fn finish(self) -> &'a OsStr {
// SAFETY: The unsafety here stems from converting between &OsStr and
// &[u8] and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced only
// from ASCII-bounded slices of existing &OsStr values.
unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
}
}

pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};

let parser = PrefixParser::<8>::new(path);
let parser = parser.as_slice();
if let Some(parser) = parser.strip_prefix(r"\\") {
// \\

// The meaning of verbatim paths can change when they use a different
// separator.
if let Some(parser) = parser.strip_prefix(r"?\")
&& !parser.prefix_bytes().iter().any(|&x| x == b'/')
{
// \\?\
if let Some(parser) = parser.strip_prefix(r"UNC\") {
// \\?\UNC\server\share

let path = parser.finish();
let (server, path) = parse_next_component(path, true);
let (share, _) = parse_next_component(path, true);

Some(VerbatimUNC(server, share))
} else {
let path = parser.finish();

// in verbatim paths only recognize an exact drive prefix
if let Some(drive) = parse_drive_exact(path) {
// \\?\C:
Some(VerbatimDisk(drive))
} else {
// \\?\prefix
let (prefix, _) = parse_next_component(path, true);
Some(Verbatim(prefix))
}
}
} else if let Some(parser) = parser.strip_prefix(r".\") {
// \\.\COM42
let path = parser.finish();
let (prefix, _) = parse_next_component(path, false);
Some(DeviceNS(prefix))
} else {
let path = parser.finish();
let (server, path) = parse_next_component(path, false);
let (share, _) = parse_next_component(path, false);

if !server.is_empty() && !share.is_empty() {
// \\server\share
Some(UNC(server, share))
} else {
// no valid prefix beginning with "\\" recognized
None
}
}
} else {
// If it has a drive like `C:` then it's a disk.
// Otherwise there is no prefix.
parse_drive(path).map(Disk)
}
}

// Parses a drive prefix, e.g. "C:" and "C:\whatever"
fn parse_drive(path: &OsStr) -> Option<u8> {
// In most DOS systems, it is not possible to have more than 26 drive letters.
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
fn is_valid_drive_letter(drive: &u8) -> bool {
drive.is_ascii_alphabetic()
}

match path.as_encoded_bytes() {
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
_ => None,
}
}

// Parses a drive prefix exactly, e.g. "C:"
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
// only parse two bytes: the drive letter and the drive separator
if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
parse_drive(path)
} else {
None
}
}

// Parse the next path component.
//
// Returns the next component and the rest of the path excluding the component and separator.
// Does not recognize `/` as a separator character if `verbatim` is true.
fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };

match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
Some(separator_start) => {
let separator_end = separator_start + 1;

let component = &path.as_encoded_bytes()[..separator_start];

// Panic safe
// The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
let path = &path.as_encoded_bytes()[separator_end..];

// SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
// is encoded in a single byte, therefore `bytes[separator_start]` and
// `bytes[separator_end]` must be code point boundaries and thus
// `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
unsafe {
(
OsStr::from_encoded_bytes_unchecked(component),
OsStr::from_encoded_bytes_unchecked(path),
)
}
}
None => (path, OsStr::new("")),
}
}

/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
///
/// This path may or may not have a verbatim prefix.
Expand Down
Loading
Loading