diff --git a/Cargo.lock b/Cargo.lock index 177ff6594e244..7b117130683ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2170,6 +2170,7 @@ dependencies = [ name = "lint-docs" version = "0.1.0" dependencies = [ + "rustc-literal-escaper", "serde_json", "tempfile", "walkdir", diff --git a/compiler/rustc_lint/src/early/diagnostics.rs b/compiler/rustc_lint/src/early/diagnostics.rs index 40ca9e05d95d6..3acbe3ea61b84 100644 --- a/compiler/rustc_lint/src/early/diagnostics.rs +++ b/compiler/rustc_lint/src/early/diagnostics.rs @@ -187,6 +187,27 @@ pub(super) fn decorate_lint( lints::ReservedMultihash { suggestion }.decorate_lint(diag); } } + BuiltinLintDiag::HiddenUnicodeCodepoints { + label, + count, + span_label, + labels, + escape, + spans, + } => { + lints::HiddenUnicodeCodepointsDiag { + label: &label, + count, + span_label, + labels: labels.map(|spans| lints::HiddenUnicodeCodepointsDiagLabels { spans }), + sub: if escape { + lints::HiddenUnicodeCodepointsDiagSub::Escape { spans } + } else { + lints::HiddenUnicodeCodepointsDiagSub::NoEscape { spans } + }, + } + .decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs deleted file mode 100644 index 491c2826baaa1..0000000000000 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ /dev/null @@ -1,136 +0,0 @@ -use ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars}; -use rustc_ast as ast; -use rustc_session::{declare_lint, declare_lint_pass}; -use rustc_span::{BytePos, Span, Symbol}; - -use crate::lints::{ - HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels, HiddenUnicodeCodepointsDiagSub, -}; -use crate::{EarlyContext, EarlyLintPass, LintContext}; - -declare_lint! { - #[allow(text_direction_codepoint_in_literal)] - /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the - /// visual representation of text on screen in a way that does not correspond to their on - /// memory representation. - /// - /// ### Explanation - /// - /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, - /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change - /// its direction on software that supports these codepoints. This makes the text "abc" display - /// as "cba" on screen. By leveraging software that supports these, people can write specially - /// crafted literals that make the surrounding code seem like it's performing one action, when - /// in reality it is performing another. Because of this, we proactively lint against their - /// presence to avoid surprises. - /// - /// ### Example - /// - /// ```rust,compile_fail - /// #![deny(text_direction_codepoint_in_literal)] - /// fn main() { - /// println!("{:?}", '‮'); - /// } - /// ``` - /// - /// {{produces}} - /// - pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, - Deny, - "detect special Unicode codepoints that affect the visual representation of text on screen, \ - changing the direction in which text flows", -} - -declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); - -impl HiddenUnicodeCodepoints { - fn lint_text_direction_codepoint( - &self, - cx: &EarlyContext<'_>, - text: Symbol, - span: Span, - padding: u32, - point_at_inner_spans: bool, - label: &str, - ) { - // Obtain the `Span`s for each of the forbidden chars. - let spans: Vec<_> = text - .as_str() - .char_indices() - .filter_map(|(i, c)| { - TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { - let lo = span.lo() + BytePos(i as u32 + padding); - (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) - }) - }) - .collect(); - - let count = spans.len(); - let labels = point_at_inner_spans - .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() }); - let sub = if point_at_inner_spans && !spans.is_empty() { - HiddenUnicodeCodepointsDiagSub::Escape { spans } - } else { - HiddenUnicodeCodepointsDiagSub::NoEscape { spans } - }; - - cx.emit_span_lint( - TEXT_DIRECTION_CODEPOINT_IN_LITERAL, - span, - HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub }, - ); - } - - fn check_literal( - &mut self, - cx: &EarlyContext<'_>, - text: Symbol, - lit_kind: ast::token::LitKind, - span: Span, - label: &'static str, - ) { - if !contains_text_flow_control_chars(text.as_str()) { - return; - } - let (padding, point_at_inner_spans) = match lit_kind { - // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), - // account for `c"` - ast::token::LitKind::CStr => (2, true), - // account for `r###"` - ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), - // account for `cr###"` - ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), - // suppress bad literals. - ast::token::LitKind::Err(_) => return, - // Be conservative just in case new literals do support these. - _ => (0, false), - }; - self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label); - } -} - -impl EarlyLintPass for HiddenUnicodeCodepoints { - fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { - if let ast::AttrKind::DocComment(_, comment) = attr.kind { - if contains_text_flow_control_chars(comment.as_str()) { - self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); - } - } - } - - #[inline] - fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { - // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` - match &expr.kind { - ast::ExprKind::Lit(token_lit) => { - self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal"); - } - ast::ExprKind::FormatArgs(args) => { - let (lit_kind, text) = args.uncooked_fmt_str; - self.check_literal(cx, text, lit_kind, args.span, "format string"); - } - _ => {} - }; - } -} diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index 4ff586a79a6ec..97095aa24f4fc 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -48,7 +48,6 @@ mod errors; mod expect; mod for_loops_over_fallibles; mod foreign_modules; -pub mod hidden_unicode_codepoints; mod if_let_rescope; mod impl_trait_overcaptures; mod internal; @@ -91,7 +90,6 @@ use deref_into_dyn_supertrait::*; use drop_forget_useless::*; use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums; use for_loops_over_fallibles::*; -use hidden_unicode_codepoints::*; use if_let_rescope::IfLetRescope; use impl_trait_overcaptures::ImplTraitOvercaptures; use internal::*; @@ -174,7 +172,6 @@ early_lint_methods!( DeprecatedAttr: DeprecatedAttr::default(), WhileTrue: WhileTrue, NonAsciiIdents: NonAsciiIdents, - HiddenUnicodeCodepoints: HiddenUnicodeCodepoints, IncompleteInternalFeatures: IncompleteInternalFeatures, RedundantSemicolons: RedundantSemicolons, UnusedDocComment: UnusedDocComment, diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs index b8d242bad86ac..56b9891ae0d1f 100644 --- a/compiler/rustc_lint_defs/src/builtin.rs +++ b/compiler/rustc_lint_defs/src/builtin.rs @@ -104,6 +104,7 @@ declare_lint_pass! { TAIL_EXPR_DROP_ORDER, TEST_UNSTABLE_LINT, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + TEXT_DIRECTION_CODEPOINT_IN_LITERAL, TRIVIAL_CASTS, TRIVIAL_NUMERIC_CASTS, TYVAR_BEHIND_RAW_POINTER, @@ -3784,7 +3785,6 @@ declare_lint! { } declare_lint! { - #[allow(text_direction_codepoint_in_literal)] /// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that /// change the visual representation of text on screen in a way that does not correspond to /// their on memory representation. @@ -3794,7 +3794,7 @@ declare_lint! { /// ```rust,compile_fail /// #![deny(text_direction_codepoint_in_comment)] /// fn main() { - /// println!("{:?}"); // '‮'); + #[doc = " println!(\"{:?}\"); // '\u{202E}');"] /// } /// ``` /// @@ -3809,7 +3809,43 @@ declare_lint! { /// their use. pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT, Deny, - "invisible directionality-changing codepoints in comment" + "invisible directionality-changing codepoints in comment", + crate_level_only +} + +declare_lint! { + /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the + /// visual representation of text on screen in a way that does not correspond to their on + /// memory representation. + /// + /// ### Explanation + /// + /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, + /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change + /// its direction on software that supports these codepoints. This makes the text "abc" display + /// as "cba" on screen. By leveraging software that supports these, people can write specially + /// crafted literals that make the surrounding code seem like it's performing one action, when + /// in reality it is performing another. Because of this, we proactively lint against their + /// presence to avoid surprises. + /// + /// ### Example + /// + /// ```rust,compile_fail + /// #![deny(text_direction_codepoint_in_literal)] + /// fn main() { + // ` - convince tidy that backticks match + #[doc = " println!(\"{:?}\", '\u{202E}');"] + // ` + /// } + /// ``` + /// + /// {{produces}} + /// + pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + Deny, + "detect special Unicode codepoints that affect the visual representation of text on screen, \ + changing the direction in which text flows", + crate_level_only } declare_lint! { diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index b4069b317bfa1..34297dcb2f521 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -698,6 +698,14 @@ pub enum BuiltinLintDiag { is_string: bool, suggestion: Span, }, + HiddenUnicodeCodepoints { + label: String, + count: usize, + span_label: Span, + labels: Option>, + escape: bool, + spans: Vec<(char, Span)>, + }, TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), UnicodeTextFlow(Span, String), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 78c5742414b81..2845bbed1c0ee 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; -use rustc_ast::util::unicode::contains_text_flow_control_chars; +use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars}; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::{ @@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode} use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, - TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL, }; use rustc_session::parse::ParseSess; use rustc_span::{BytePos, Pos, Span, Symbol, sym}; @@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Opening delimiter of the length 3 is not included into the symbol. let content_start = start + BytePos(3); let content = self.str_from(content_start); + self.lint_doc_comment_unicode_text_flow(start, content); self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) } rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { @@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let content_start = start + BytePos(3); let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); let content = self.str_from_to(content_start, content_end); + self.lint_doc_comment_unicode_text_flow(start, content); self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => { @@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } else { None }; + self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal"); token::Literal(token::Lit { kind, symbol, suffix }) } rustc_lexer::TokenKind::Lifetime { starts_with_number } => { @@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } } + fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) { + if contains_text_flow_control_chars(content) { + self.report_text_direction_codepoint( + content, + self.mk_sp(start, self.pos), + 0, + false, + "doc comment", + ); + } + } + + fn lint_literal_unicode_text_flow( + &mut self, + text: Symbol, + lit_kind: token::LitKind, + span: Span, + label: &'static str, + ) { + if !contains_text_flow_control_chars(text.as_str()) { + return; + } + let (padding, point_at_inner_spans) = match lit_kind { + // account for `"` or `'` + token::LitKind::Str | token::LitKind::Char => (1, true), + // account for `c"` + token::LitKind::CStr => (2, true), + // account for `r###"` + token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), + }; + self.report_text_direction_codepoint( + text.as_str(), + span, + padding, + point_at_inner_spans, + label, + ); + } + + fn report_text_direction_codepoint( + &self, + text: &str, + span: Span, + padding: u32, + point_at_inner_spans: bool, + label: &str, + ) { + // Obtain the `Span`s for each of the forbidden chars. + let spans: Vec<_> = text + .char_indices() + .filter_map(|(i, c)| { + TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(i as u32 + padding); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + + let count = spans.len(); + let labels = point_at_inner_spans.then_some(spans.clone()); + + self.psess.buffer_lint( + TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::HiddenUnicodeCodepoints { + label: label.to_string(), + count, + span_label: span, + labels, + escape: point_at_inner_spans && !spans.is_empty(), + spans, + }, + ); + } + fn validate_frontmatter( &self, start: BytePos, diff --git a/src/tools/lint-docs/Cargo.toml b/src/tools/lint-docs/Cargo.toml index 3578bda8276e7..f1ffda75ac0f7 100644 --- a/src/tools/lint-docs/Cargo.toml +++ b/src/tools/lint-docs/Cargo.toml @@ -7,6 +7,7 @@ description = "A script to extract the lint documentation for the rustc book." # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +rustc-literal-escaper = "0.0.2" serde_json = "1.0.57" tempfile = "3.1.0" walkdir = "2.3.1" diff --git a/src/tools/lint-docs/src/lib.rs b/src/tools/lint-docs/src/lib.rs index cacce01675fe2..6bb18c2bced70 100644 --- a/src/tools/lint-docs/src/lib.rs +++ b/src/tools/lint-docs/src/lib.rs @@ -4,6 +4,7 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; +use rustc_literal_escaper::{Mode, unescape_unicode}; use walkdir::WalkDir; mod groups; @@ -214,6 +215,16 @@ impl<'a> LintExtractor<'a> { let line = line.trim(); if let Some(text) = line.strip_prefix("/// ") { doc_lines.push(text.to_string()); + } else if let Some(text) = line.strip_prefix("#[doc = \"") { + let escaped = text.strip_suffix("\"]").unwrap(); + let mut buf = String::new(); + unescape_unicode(escaped, Mode::Str, &mut |_, c| match c { + Ok(c) => buf.push(c), + Err(err) => { + assert!(!err.is_fatal(), "failed to unescape string literal") + } + }); + doc_lines.push(buf); } else if line == "///" { doc_lines.push("".to_string()); } else if line.starts_with("// ") { diff --git a/tests/crashes/140281.rs b/tests/crashes/140281.rs deleted file mode 100644 index 76858cfc74a55..0000000000000 --- a/tests/crashes/140281.rs +++ /dev/null @@ -1,18 +0,0 @@ -//@ known-bug: #140281 - -macro_rules! foo { - ($x:expr) => { $x } -} - -fn main() { - let t = vec![ - /// ‮test⁦ RTL in doc in vec! - // ICE (Sadly) - 1 - ]; - - foo!( - /// ‮test⁦ RTL in doc in macro - 1 - ); -} diff --git a/tests/ui/parser/macro/auxiliary/unicode-control.rs b/tests/ui/parser/macro/auxiliary/unicode-control.rs new file mode 100644 index 0000000000000..8e73e3985ce9a --- /dev/null +++ b/tests/ui/parser/macro/auxiliary/unicode-control.rs @@ -0,0 +1,19 @@ +#![allow(text_direction_codepoint_in_literal)] + +extern crate proc_macro; +use proc_macro::*; + +#[proc_macro] +pub fn create_rtl_in_string(_: TokenStream) -> TokenStream { + r#""‮test⁦ RTL in string literal""#.parse().unwrap() +} + +#[proc_macro] +pub fn forward_stream(s: TokenStream) -> TokenStream { + s +} + +#[proc_macro] +pub fn recollect_stream(s: TokenStream) -> TokenStream { + s.into_iter().collect() +} diff --git a/tests/ui/parser/macro/unicode-control-codepoints-macros.rs b/tests/ui/parser/macro/unicode-control-codepoints-macros.rs new file mode 100644 index 0000000000000..775c50779760a --- /dev/null +++ b/tests/ui/parser/macro/unicode-control-codepoints-macros.rs @@ -0,0 +1,49 @@ +// Regression test for #140281 +//@ edition: 2021 +//@ proc-macro: unicode-control.rs + +extern crate unicode_control; +use unicode_control::*; + +macro_rules! foo { + ($x:expr) => { + $x + }; +} + +macro_rules! empty { + ($x:expr) => {}; +} + +fn main() { + let t = vec![ + /// ‮test⁦ RTL in doc in vec + //~^ ERROR unicode codepoint changing visible direction of text present in doc comment + 1 + ]; + foo!( + /** + * ‮test⁦ RTL in doc in macro + */ + //~^^^ ERROR unicode codepoint changing visible direction of text present in doc comment + 1 + ); + empty!( + /** + * ‮test⁦ RTL in doc in macro + */ + //~^^^ ERROR unicode codepoint changing visible direction of text present in doc comment + 1 + ); + let x = create_rtl_in_string!(); // OK + forward_stream!( + /// ‮test⁦ RTL in doc in proc macro + //~^ ERROR unicode codepoint changing visible direction of text present in doc comment + mod a {} + ); + recollect_stream!( + /// ‮test⁦ RTL in doc in proc macro + //~^ ERROR unicode codepoint changing visible direction of text present in doc comment + mod b {} + ); +} diff --git a/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr b/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr new file mode 100644 index 0000000000000..ca813399eac27 --- /dev/null +++ b/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr @@ -0,0 +1,57 @@ +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints-macros.rs:20:9 + | +LL | /// �test� RTL in doc in vec + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this doc comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}' + = note: `#[deny(text_direction_codepoint_in_literal)]` on by default + +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints-macros.rs:25:9 + | +LL | / /** +LL | | * �test� RTL in doc in macro +LL | | */ + | |___________^ this doc comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}' + +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints-macros.rs:32:9 + | +LL | / /** +LL | | * �test� RTL in doc in macro +LL | | */ + | |___________^ this doc comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}' + +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints-macros.rs:40:9 + | +LL | /// �test� RTL in doc in proc macro + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this doc comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}' + +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints-macros.rs:45:9 + | +LL | /// �test� RTL in doc in proc macro + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this doc comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}' + +error: aborting due to 5 previous errors + diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs index 14e1cfe59d39a..e3c906063c473 100644 --- a/tests/ui/parser/unicode-control-codepoints.rs +++ b/tests/ui/parser/unicode-control-codepoints.rs @@ -34,7 +34,7 @@ fn main() { //~^ ERROR unicode codepoint changing visible direction of text present in literal println!("{{‮}}"); - //~^ ERROR unicode codepoint changing visible direction of text present in format string + //~^ ERROR unicode codepoint changing visible direction of text present in literal } //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index 27b95f9ac6115..7978c1435f609 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -100,21 +100,6 @@ LL | // if access_level != "us�e�r" { // Check if admin = note: `#[deny(text_direction_codepoint_in_comment)]` on by default = help: if their presence wasn't intentional, you can remove them -error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:40:1 - | -LL | //"/*� } �if isAdmin� � begin admins only */" - | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ - | | | | | | - | | | | | '\u{2066}' - | | | | '\u{2069}' - | | | '\u{2066}' - | | '\u{202e}' - | this comment contains invisible unicode text flow control codepoints - | - = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen - = help: if their presence wasn't intentional, you can remove them - error: unicode codepoint changing visible direction of text present in literal --> $DIR/unicode-control-codepoints.rs:13:22 | @@ -207,14 +192,14 @@ LL - let _ = cr#"�"#; LL + let _ = cr#"\u{202e}"#; | -error: unicode codepoint changing visible direction of text present in format string +error: unicode codepoint changing visible direction of text present in literal --> $DIR/unicode-control-codepoints.rs:36:14 | LL | println!("{{�}}"); | ^^^-^^^ | | | | | '\u{202e}' - | this format string contains an invisible unicode text flow control codepoint + | this literal contains an invisible unicode text flow control codepoint | = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen = help: if their presence wasn't intentional, you can remove them @@ -224,6 +209,21 @@ LL - println!("{{�}}"); LL + println!("{{\u{202e}}}"); | +error: unicode codepoint changing visible direction of text present in comment + --> $DIR/unicode-control-codepoints.rs:40:1 + | +LL | //"/*� } �if isAdmin� � begin admins only */" + | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ + | | | | | | + | | | | | '\u{2066}' + | | | | '\u{2069}' + | | | '\u{2066}' + | | '\u{202e}' + | this comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them + error: unicode codepoint changing visible direction of text present in doc comment --> $DIR/unicode-control-codepoints.rs:43:1 |