Skip to content

[clang-format] Handle Java text blocks #141334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 25, 2025
Merged

[clang-format] Handle Java text blocks #141334

merged 2 commits into from
May 25, 2025

Conversation

owenca
Copy link
Contributor

@owenca owenca commented May 24, 2025

Fix #61954

@llvmbot
Copy link
Member

llvmbot commented May 24, 2025

@llvm/pr-subscribers-clang-format

Author: Owen Pan (owenca)

Changes

Fix #61954


Full diff: https://github.com/llvm/llvm-project/pull/141334.diff

3 Files Affected:

  • (modified) clang/lib/Format/FormatTokenLexer.cpp (+45)
  • (modified) clang/lib/Format/FormatTokenLexer.h (+2)
  • (modified) clang/unittests/Format/FormatTestJava.cpp (+52)
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 864486a9b878d..31c3613c8b083 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -694,6 +694,49 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
   return true;
 }
 
+void FormatTokenLexer::tryParseJavaTextBlock() {
+  if (FormatTok->TokenText != "\"\"")
+    return;
+
+  const auto *Str = Lex->getBufferLocation();
+  const auto *End = Lex->getBuffer().end();
+
+  if (Str == End || *Str != '\"')
+    return;
+
+  // Skip the `"""` that begins a text block.
+  const auto *S = Str + 1;
+
+  // From docs.oracle.com/en/java/javase/15/text-blocks/#text-block-syntax:
+  // A text block begins with three double-quote characters followed by a line
+  // terminator.
+  while (S < End && *S != '\n') {
+    if (!isblank(*S))
+      return;
+    ++S;
+  }
+
+  // Find the `"""` that ends the text block.
+  for (int Count = 0; Count < 3; ++S) {
+    if (S == End)
+      return;
+
+    switch (*S) {
+    case '\\':
+      Count = -1;
+      break;
+    case '\"':
+      ++Count;
+      break;
+    default:
+      Count = 0;
+    }
+  }
+
+  // Skip the text block.
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));
+}
+
 // Tries to parse a JavaScript Regex literal starting at the current token,
 // if that begins with a slash and is in a location where JavaScript allows
 // regex literals. Changes the current token to a regex literal and updates
@@ -1374,6 +1417,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
     ++Column;
     StateStack.push(LexerState::TOKEN_STASHED);
+  } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {
+    tryParseJavaTextBlock();
   }
 
   if (Style.isVerilog() && Tokens.size() > 0 &&
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 105847b126e20..026383db1fe6c 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -72,6 +72,8 @@ class FormatTokenLexer {
 
   bool canPrecedeRegexLiteral(FormatToken *Prev);
 
+  void tryParseJavaTextBlock();
+
   // Tries to parse a JavaScript Regex literal starting at the current token,
   // if that begins with a slash and is in a location where JavaScript allows
   // regex literals. Changes the current token to a regex literal and updates
diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp
index e01c1d6d7e684..35ee257d015d3 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -791,6 +791,58 @@ TEST_F(FormatTestJava, AlignCaseArrows) {
                Style);
 }
 
+TEST_F(FormatTestJava, TextBlock) {
+  verifyNoChange("String myStr = \"\"\"\n"
+                 "hello\n"
+                 "there\n"
+                 "\"\"\";");
+
+  verifyNoChange("String tb = \"\"\"\n"
+                 "            the new\"\"\";");
+
+  verifyNoChange("System.out.println(\"\"\"\n"
+                 "    This is the first line\n"
+                 "    This is the second line\n"
+                 "    \"\"\");");
+
+  verifyNoChange("void writeHTML() {\n"
+                 "  String html = \"\"\" \n"
+                 "                <html>\n"
+                 "                    <p>Hello World.</p>\n"
+                 "                </html>\n"
+                 "\"\"\";\n"
+                 "  writeOutput(html);\n"
+                 "}");
+
+  verifyNoChange("String colors = \"\"\"\t\n"
+                 "    red\n"
+                 "    green\n"
+                 "    blue\"\"\".indent(4);");
+
+  verifyNoChange("String code = \"\"\"\n"
+                 "    String source = \\\"\"\"\n"
+                 "        String message = \"Hello, World!\";\n"
+                 "        System.out.println(message);\n"
+                 "        \\\"\"\";\n"
+                 "    \"\"\";");
+
+  verifyNoChange(
+      "class Outer {\n"
+      "  void printPoetry() {\n"
+      "    String lilacs = \"\"\"\n"
+      "Passing the apple-tree blows of white and pink in the orchards\n"
+      "\"\"\";\n"
+      "    System.out.println(lilacs);\n"
+      "  }\n"
+      "}");
+
+  verifyNoChange("String name = \"\"\"\n"
+                 "        red\n"
+                 "        green\n"
+                 "        blue\\\n"
+                 "    \"\"\";");
+}
+
 } // namespace
 } // namespace test
 } // namespace format

@github-project-automation github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status May 24, 2025
@tstellar tstellar moved this from Needs Merge to Needs Backport PR in LLVM Release Status May 25, 2025
@github-project-automation github-project-automation bot moved this from Needs Backport PR to Needs Merge in LLVM Release Status May 25, 2025
@owenca owenca merged commit b7f5950 into llvm:main May 25, 2025
11 checks passed
@owenca owenca deleted the 61954 branch May 25, 2025 22:40
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status May 25, 2025
@owenca
Copy link
Contributor Author

owenca commented May 25, 2025

/cherry-pick b7f5950

@llvmbot
Copy link
Member

llvmbot commented May 25, 2025

/pull-request #141433

@llvm llvm deleted a comment from llvm-ci May 26, 2025
swift-ci pushed a commit to swiftlang/llvm-project that referenced this pull request May 27, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

Java text blocks aren't understood by clang-format
4 participants