-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Windows: set main thread name without re-encoding #123534
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,102 @@ use core::ptr::addr_of; | |
|
||
use super::c; | ||
|
||
/// Creates a null-terminated UTF-16 string from a str. | ||
pub macro wide_str($str:literal) {{ | ||
const _: () = { | ||
if core::slice::memchr::memchr(0, $str.as_bytes()).is_some() { | ||
panic!("null terminated strings cannot contain interior nulls"); | ||
} | ||
}; | ||
crate::sys::pal::windows::api::utf16!(concat!($str, '\0')) | ||
}} | ||
|
||
/// Creates a UTF-16 string from a str without null termination. | ||
pub macro utf16($str:expr) {{ | ||
const UTF8: &str = $str; | ||
const UTF16_LEN: usize = crate::sys::pal::windows::api::utf16_len(UTF8); | ||
const UTF16: [u16; UTF16_LEN] = crate::sys::pal::windows::api::to_utf16(UTF8); | ||
&UTF16 | ||
}} | ||
|
||
#[cfg(test)] | ||
mod tests; | ||
|
||
/// Gets the UTF-16 length of a UTF-8 string, for use in the wide_str macro. | ||
pub const fn utf16_len(s: &str) -> usize { | ||
let s = s.as_bytes(); | ||
let mut i = 0; | ||
let mut len = 0; | ||
while i < s.len() { | ||
// the length of a UTF-8 encoded code-point is given by the number of | ||
// leading ones, except in the case of ASCII. | ||
let utf8_len = match s[i].leading_ones() { | ||
0 => 1, | ||
n => n as usize, | ||
}; | ||
i += utf8_len; | ||
// Note that UTF-16 surrogates (U+D800 to U+DFFF) are not encodable as UTF-8, | ||
// so (unlike with WTF-8) we don't have to worry about how they'll get re-encoded. | ||
len += if utf8_len < 4 { 1 } else { 2 }; | ||
} | ||
len | ||
} | ||
|
||
/// Const convert UTF-8 to UTF-16, for use in the wide_str macro. | ||
/// | ||
/// Note that this is designed for use in const contexts so is not optimized. | ||
pub const fn to_utf16<const UTF16_LEN: usize>(s: &str) -> [u16; UTF16_LEN] { | ||
jieyouxu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let mut output = [0_u16; UTF16_LEN]; | ||
let mut pos = 0; | ||
let s = s.as_bytes(); | ||
let mut i = 0; | ||
while i < s.len() { | ||
match s[i].leading_ones() { | ||
// Decode UTF-8 based on its length. | ||
// See https://en.wikipedia.org/wiki/UTF-8 | ||
0 => { | ||
// ASCII is the same in both encodings | ||
output[pos] = s[i] as u16; | ||
i += 1; | ||
pos += 1; | ||
} | ||
2 => { | ||
// Bits: 110xxxxx 10xxxxxx | ||
output[pos] = ((s[i] as u16 & 0b11111) << 6) | (s[i + 1] as u16 & 0b111111); | ||
i += 2; | ||
pos += 1; | ||
} | ||
3 => { | ||
// Bits: 1110xxxx 10xxxxxx 10xxxxxx | ||
output[pos] = ((s[i] as u16 & 0b1111) << 12) | ||
| ((s[i + 1] as u16 & 0b111111) << 6) | ||
| (s[i + 2] as u16 & 0b111111); | ||
i += 3; | ||
pos += 1; | ||
} | ||
4 => { | ||
// Bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ||
let mut c = ((s[i] as u32 & 0b111) << 18) | ||
| ((s[i + 1] as u32 & 0b111111) << 12) | ||
| ((s[i + 2] as u32 & 0b111111) << 6) | ||
| (s[i + 3] as u32 & 0b111111); | ||
// re-encode as UTF-16 (see https://en.wikipedia.org/wiki/UTF-16) | ||
// - Subtract 0x10000 from the code point | ||
// - For the high surrogate, shift right by 10 then add 0xD800 | ||
// - For the low surrogate, take the low 10 bits then add 0xDC00 | ||
c -= 0x10000; | ||
output[pos] = ((c >> 10) + 0xD800) as u16; | ||
output[pos + 1] = ((c & 0b1111111111) + 0xDC00) as u16; | ||
i += 4; | ||
pos += 2; | ||
} | ||
// valid UTF-8 cannot have any other values | ||
_ => unreachable!(), | ||
} | ||
} | ||
output | ||
} | ||
Comment on lines
+78
to
+131
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice work. I feel like at least some of this should be using more public std API instead of a bunch of sorcerous isopsephia, but I looked for equivalents and couldn't find any in the stdlib, so this will do for now. |
||
|
||
/// Helper method for getting the size of `T` as a u32. | ||
/// Errors at compile time if the size would overflow. | ||
/// | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
use crate::sys::pal::windows::api::{utf16, wide_str}; | ||
|
||
macro_rules! check_utf16 { | ||
($str:literal) => {{ | ||
assert!(wide_str!($str).iter().copied().eq($str.encode_utf16().chain([0]))); | ||
assert!(utf16!($str).iter().copied().eq($str.encode_utf16())); | ||
}}; | ||
} | ||
|
||
#[test] | ||
fn test_utf16_macros() { | ||
check_utf16!("hello world"); | ||
check_utf16!("€4.50"); | ||
check_utf16!("𨉟呐㗂越"); | ||
workingjubilee marked this conversation as resolved.
Show resolved
Hide resolved
|
||
check_utf16!("Pchnąć w tę łódź jeża lub ośm skrzyń fig"); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.