From 16b9fddf001a7f024f300e2bffa43203ff271318 Mon Sep 17 00:00:00 2001
From: yukang <moorekang@gmail.com>
Date: Thu, 26 Mar 2026 11:42:38 +0800
Subject: [PATCH] Improve doc comment unicode guidance

---
 compiler/rustc_parse/src/errors.rs                  | 10 +++++++---
 compiler/rustc_parse/src/lexer/mod.rs               |  5 ++++-
 .../macro/unicode-control-codepoints-macros.stderr  | 10 +++++-----
 tests/ui/parser/unicode-control-codepoints.stderr   |  4 ++--
 .../unicode-control-doc-comment-issue-153096.rs     |  8 ++++++++
 .../unicode-control-doc-comment-issue-153096.stderr | 13 +++++++++++++
 6 files changed, 39 insertions(+), 11 deletions(-)
 create mode 100644 tests/ui/parser/unicode-control-doc-comment-issue-153096.rs
 create mode 100644 tests/ui/parser/unicode-control-doc-comment-issue-153096.stderr

diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs
index 0e852d2abda04..6faaafcc01005 100644
--- a/compiler/rustc_parse/src/errors.rs
+++ b/compiler/rustc_parse/src/errors.rs
@@ -4344,7 +4344,7 @@ impl Subdiagnostic for HiddenUnicodeCodepointsDiagLabels {
 
 pub(crate) enum HiddenUnicodeCodepointsDiagSub {
     Escape { spans: Vec<(char, Span)> },
-    NoEscape { spans: Vec<(char, Span)> },
+    NoEscape { spans: Vec<(char, Span)>, is_doc_comment: bool },
 }
 
 // Used because of multiple multipart_suggestion and note
@@ -4370,7 +4370,7 @@ impl Subdiagnostic for HiddenUnicodeCodepointsDiagSub {
                     Applicability::MachineApplicable,
                 );
             }
-            HiddenUnicodeCodepointsDiagSub::NoEscape { spans } => {
+            HiddenUnicodeCodepointsDiagSub::NoEscape { spans, is_doc_comment } => {
                 // FIXME: in other suggestions we've reversed the inner spans of doc comments. We
                 // should do the same here to provide the same good suggestions as we do for
                 // literals above.
@@ -4383,7 +4383,11 @@ impl Subdiagnostic for HiddenUnicodeCodepointsDiagSub {
                         .join(", "),
                 );
                 diag.note(msg!("if their presence wasn't intentional, you can remove them"));
-                diag.note(msg!("if you want to keep them but make them visible in your source code, you can escape them: {$escaped}"));
+                if is_doc_comment {
+                    diag.note(msg!(r#"if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as {$escaped}"#));
+                } else {
+                    diag.note(msg!("if you want to keep them but make them visible in your source code, you can escape them: {$escaped}"));
+                }
             }
         }
     }
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 2223e6a265759..5766d25bc86ce 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -546,6 +546,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                 self.mk_sp(start, self.pos),
                 0,
                 false,
+                true,
                 "doc comment",
             );
         }
@@ -580,6 +581,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
             span,
             padding,
             point_at_inner_spans,
+            false,
             label,
         );
     }
@@ -590,6 +592,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         span: Span,
         padding: u32,
         point_at_inner_spans: bool,
+        is_doc_comment: bool,
         label: &str,
     ) {
         // Obtain the `Span`s for each of the forbidden chars.
@@ -610,7 +613,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         let sub = if point_at_inner_spans && !spans.is_empty() {
             errors::HiddenUnicodeCodepointsDiagSub::Escape { spans }
         } else {
-            errors::HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
+            errors::HiddenUnicodeCodepointsDiagSub::NoEscape { spans, is_doc_comment }
         };
 
         self.psess.buffer_lint(
diff --git a/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr b/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr
index 22fb1b945c654..b1d9c9d018744 100644
--- a/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr
+++ b/tests/ui/parser/macro/unicode-control-codepoints-macros.stderr
@@ -6,7 +6,7 @@ LL |         /// �test� RTL in doc in vec
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}', '\u{2066}'
    = note: `#[deny(text_direction_codepoint_in_literal)]` on by default
 
 error: unicode codepoint changing visible direction of text present in doc comment
@@ -19,7 +19,7 @@ LL | |          */
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}', '\u{2066}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
   --> $DIR/unicode-control-codepoints-macros.rs:33:9
@@ -31,7 +31,7 @@ LL | |          */
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}', '\u{2066}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
   --> $DIR/unicode-control-codepoints-macros.rs:41:9
@@ -41,7 +41,7 @@ LL |         /// �test� RTL in doc in proc macro
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}', '\u{2066}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
   --> $DIR/unicode-control-codepoints-macros.rs:46:9
@@ -51,7 +51,7 @@ LL |         /// �test� RTL in doc in proc macro
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}', '\u{2066}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}', '\u{2066}'
 
 error: aborting due to 5 previous errors
 
diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr
index 7978c1435f609..3b273f8212faa 100644
--- a/tests/ui/parser/unicode-control-codepoints.stderr
+++ b/tests/ui/parser/unicode-control-codepoints.stderr
@@ -232,7 +232,7 @@ LL | /**  '�'); */fn foo() {}
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
   --> $DIR/unicode-control-codepoints.rs:46:1
@@ -244,7 +244,7 @@ LL | |  *  '�'); */fn bar() {}
    |
    = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
    = note: if their presence wasn't intentional, you can remove them
-   = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{202e}'
 
 error: aborting due to 20 previous errors
 
diff --git a/tests/ui/parser/unicode-control-doc-comment-issue-153096.rs b/tests/ui/parser/unicode-control-doc-comment-issue-153096.rs
new file mode 100644
index 0000000000000..a7998daa72fdc
--- /dev/null
+++ b/tests/ui/parser/unicode-control-doc-comment-issue-153096.rs
@@ -0,0 +1,8 @@
+//@ edition: 2024
+
+#[allow(unused)]
+/// ⁨א⁩, ⁨ב⁩, ⁨ג⁩, ⁨ד⁩, ⁨ה⁩
+//~^ ERROR unicode codepoint changing visible direction of text present in doc comment
+fn foo() {}
+
+fn main() {}
diff --git a/tests/ui/parser/unicode-control-doc-comment-issue-153096.stderr b/tests/ui/parser/unicode-control-doc-comment-issue-153096.stderr
new file mode 100644
index 0000000000000..668a7844da635
--- /dev/null
+++ b/tests/ui/parser/unicode-control-doc-comment-issue-153096.stderr
@@ -0,0 +1,13 @@
+error: unicode codepoint changing visible direction of text present in doc comment
+  --> $DIR/unicode-control-doc-comment-issue-153096.rs:4:1
+   |
+LL | /// �א�, �ב�, �ג�, �ד�, �ה�
+   | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ this doc comment contains invisible unicode text flow control codepoints
+   |
+   = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
+   = note: if their presence wasn't intentional, you can remove them
+   = note: if you need to keep them and make them explicit in source, rewrite this doc comment as a `#[doc = "..."]` attribute and use Unicode escapes such as '\u{2068}', '\u{2069}', '\u{2068}', '\u{2069}', '\u{2068}', '\u{2069}', '\u{2068}', '\u{2069}', '\u{2068}', '\u{2069}'
+   = note: `#[deny(text_direction_codepoint_in_literal)]` on by default
+
+error: aborting due to 1 previous error
+