Merge pull request #1619 from ahoppen/ahoppen/unterminated-block-comment

ahoppen · web-flow · commit 2179d9fe6335 · 2023-05-02T14:48:41.000-07:00
Emit error for unterminated block comment
diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift
@@ -684,24 +684,25 @@ extension Lexer.Cursor {
 
   /// Returns `true` if the comment spaned multiple lines and `false` otherwise.
   /// Assumes that the curser is currently pointing at the `*` of the opening `/*`.
-  mutating func advanceToEndOfSlashStarComment() -> Bool {
+  mutating func advanceToEndOfSlashStarComment(slashPosition: Lexer.Cursor) -> TriviaResult {
     precondition(self.previous == UInt8(ascii: "/"))
     // Make sure to advance over the * so that we don't incorrectly handle /*/ as
     // the beginning and end of the comment.
     let consumedStar = self.advance(matching: "*")
     precondition(consumedStar)
 
     var depth = 1
-    var isMultiline = false
+    var newlinePresence = NewlinePresence.absent
+    var error: LexingDiagnostic? = nil
 
-    while true {
+    LOOP: while true {
       switch self.advance() {
       case UInt8(ascii: "*"):
         // Check for a '*/'
         if self.advance(matching: "/") {
           depth -= 1
           if depth == 0 {
-            return isMultiline
+            break LOOP
           }
         }
       case UInt8(ascii: "/"):
@@ -711,14 +712,17 @@ extension Lexer.Cursor {
         }
 
       case UInt8(ascii: "\n"), UInt8(ascii: "\r"):
-        isMultiline = true
+        newlinePresence = .present
         continue
       case nil:
-        return isMultiline
+        error = LexingDiagnostic(.unterminatedBlockComment, position: slashPosition)
+        break LOOP
       case .some:
         continue
       }
     }
+
+    return TriviaResult(newlinePresence: newlinePresence, error: error)
   }
 
   /// If this is the opening delimiter of a raw string literal, return the number
@@ -1063,7 +1067,7 @@ extension Lexer.Cursor {
 // MARK: - Trivia
 
 extension Lexer.Cursor {
-  fileprivate enum NewlinePresence {
+  enum NewlinePresence {
     case absent
     case present
   }
@@ -1080,7 +1084,7 @@ extension Lexer.Cursor {
     case escapedNewlineInMultiLineStringLiteral
   }
 
-  fileprivate struct TriviaResult {
+  struct TriviaResult {
     let newlinePresence: NewlinePresence
     let error: LexingDiagnostic?
   }
@@ -1137,7 +1141,11 @@ extension Lexer.Cursor {
           self.advanceToEndOfLine()
           continue
         case UInt8(ascii: "*"):
-          _ = self.advanceToEndOfSlashStarComment()
+          let starSlashResult = self.advanceToEndOfSlashStarComment(slashPosition: start)
+          if starSlashResult.newlinePresence == .present {
+            newlinePresence = .present
+          }
+          error = error ?? starSlashResult.error
           continue
         default:
           break
diff --git a/Sources/SwiftParser/TriviaParser.swift b/Sources/SwiftParser/TriviaParser.swift
@@ -191,7 +191,7 @@ extension Lexer.Cursor {
     // "/**/": .blockComment.
     precondition(self.previous == UInt8(ascii: "/") && self.is(at: "*"))
     let isDocComment = self.input.count > 2 && self.is(offset: 1, at: "*") && self.is(offset: 2, notAt: "/")
-    _ = self.advanceToEndOfSlashStarComment()
+    _ = self.advanceToEndOfSlashStarComment(slashPosition: start)
     let contents = start.text(upTo: self)
     return isDocComment ? .docBlockComment(contents) : .blockComment(contents)
   }
diff --git a/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift b/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift
@@ -58,6 +58,7 @@ public enum StaticTokenError: String, DiagnosticMessage {
   case spaceAtEndOfRegexLiteral = "bare slash regex literal may not end with space"
   case multilineRegexClosingNotOnNewline = "multi-line regex closing delimiter must appear on new line"
   case unprintableAsciiCharacter = "unprintable ASCII character found in source file"
+  case unterminatedBlockComment = "unterminated '/*' comment"
 
   public var message: String { self.rawValue }
 
@@ -160,16 +161,17 @@ public extension SwiftSyntax.TokenDiagnostic {
     case .invalidNumberOfHexDigitsInUnicodeEscape: return StaticTokenError.invalidNumberOfHexDigitsInUnicodeEscape
     case .invalidOctalDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .octal(scalarAtErrorOffset))
     case .invalidUtf8: return StaticTokenError.invalidUtf8
-    case .tokenDiagnosticOffsetOverflow: return StaticTokenError.tokenDiagnosticOffsetOverflow
+    case .multilineRegexClosingNotOnNewline: return StaticTokenError.multilineRegexClosingNotOnNewline
     case .nonBreakingSpace: return StaticTokenWarning.nonBreakingSpace
     case .nulCharacter: return StaticTokenWarning.nulCharacter
     case .sourceConflictMarker: return StaticTokenError.sourceConflictMarker
+    case .spaceAtEndOfRegexLiteral: return StaticTokenError.spaceAtEndOfRegexLiteral
+    case .spaceAtStartOfRegexLiteral: return StaticTokenError.spaceAtStartOfRegexLiteral
+    case .tokenDiagnosticOffsetOverflow: return StaticTokenError.tokenDiagnosticOffsetOverflow
     case .unexpectedBlockCommentEnd: return StaticTokenError.unexpectedBlockCommentEnd
     case .unicodeCurlyQuote: return StaticTokenError.unicodeCurlyQuote
-    case .spaceAtStartOfRegexLiteral: return StaticTokenError.spaceAtStartOfRegexLiteral
-    case .spaceAtEndOfRegexLiteral: return StaticTokenError.spaceAtEndOfRegexLiteral
-    case .multilineRegexClosingNotOnNewline: return StaticTokenError.multilineRegexClosingNotOnNewline
     case .unprintableAsciiCharacter: return StaticTokenError.unprintableAsciiCharacter
+    case .unterminatedBlockComment: return StaticTokenError.unterminatedBlockComment
     }
   }
 
diff --git a/Sources/SwiftSyntax/TokenDiagnostic.swift b/Sources/SwiftSyntax/TokenDiagnostic.swift
@@ -41,17 +41,18 @@ public struct TokenDiagnostic: Hashable {
     case invalidNumberOfHexDigitsInUnicodeEscape
     case invalidOctalDigitInIntegerLiteral
     case invalidUtf8
-    /// The lexer dicovered an error but was not able to represent the offset of the error because it would overflow `LexerErrorOffset`.
-    case tokenDiagnosticOffsetOverflow
+    case multilineRegexClosingNotOnNewline
     case nonBreakingSpace
     case nulCharacter
     case sourceConflictMarker
+    case spaceAtEndOfRegexLiteral
+    case spaceAtStartOfRegexLiteral
+    /// The lexer dicovered an error but was not able to represent the offset of the error because it would overflow `LexerErrorOffset`.
+    case tokenDiagnosticOffsetOverflow
     case unexpectedBlockCommentEnd
     case unicodeCurlyQuote
     case unprintableAsciiCharacter
-    case spaceAtStartOfRegexLiteral
-    case spaceAtEndOfRegexLiteral
-    case multilineRegexClosingNotOnNewline
+    case unterminatedBlockComment
   }
 
   public let kind: Kind
@@ -118,16 +119,17 @@ public struct TokenDiagnostic: Hashable {
     case .invalidNumberOfHexDigitsInUnicodeEscape: return .error
     case .invalidOctalDigitInIntegerLiteral: return .error
     case .invalidUtf8: return .error
-    case .tokenDiagnosticOffsetOverflow: return .error
+    case .multilineRegexClosingNotOnNewline: return .error
     case .nonBreakingSpace: return .warning
     case .nulCharacter: return .warning
     case .sourceConflictMarker: return .error
+    case .spaceAtEndOfRegexLiteral: return .error
+    case .spaceAtStartOfRegexLiteral: return .error
+    case .tokenDiagnosticOffsetOverflow: return .error
     case .unexpectedBlockCommentEnd: return .error
     case .unicodeCurlyQuote: return .error
     case .unprintableAsciiCharacter: return .error
-    case .spaceAtStartOfRegexLiteral: return .error
-    case .spaceAtEndOfRegexLiteral: return .error
-    case .multilineRegexClosingNotOnNewline: return .error
+    case .unterminatedBlockComment: return .error
     }
   }
 }
diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift
@@ -562,9 +562,9 @@ public class LexerTests: XCTestCase {
       ]
     )
     assertLexemes(
-      "^/*/",
+      "^1️⃣/*/",
       lexemes: [
-        LexemeSpec(.binaryOperator, text: "^", trailing: "/*/")
+        LexemeSpec(.binaryOperator, text: "^", trailing: "/*/", diagnostic: "unterminated '/*' comment")
       ]
     )
   }
@@ -1461,4 +1461,22 @@ public class LexerTests: XCTestCase {
       ]
     )
   }
+
+  func testUnterminatedBlockComment() {
+    assertLexemes(
+      "1️⃣/*",
+      lexemes: [
+        LexemeSpec(.eof, leading: "/*", text: "", diagnostic: "unterminated '/*' comment")
+      ]
+    )
+  }
+
+  func testSlashStartSlash() {
+    assertLexemes(
+      "1️⃣/*/",
+      lexemes: [
+        LexemeSpec(.eof, leading: "/*/", text: "", diagnostic: "unterminated '/*' comment")
+      ]
+    )
+  }
 }
diff --git a/Tests/SwiftParserTest/translated/GenericDisambiguationTests.swift b/Tests/SwiftParserTest/translated/GenericDisambiguationTests.swift
@@ -195,9 +195,6 @@ final class GenericDisambiguationTests: XCTestCase {
   func testGenericDisambiguation12() {
     assertParse(
       """
-      // FIXME: Nested generic types. Need to be able to express $T0<A, B, C> in the
-      // typechecker.
-      /*
       A<B>.C<D>.e()
       """
     )
@@ -216,11 +213,7 @@ final class GenericDisambiguationTests: XCTestCase {
       """
       meta(A<B>.C<D>.self)
       meta2(A<B>.C<D>.self, 0)
-       1️⃣*/
-      """,
-      diagnostics: [
-        DiagnosticSpec(message: "extraneous code '*/' at top level")
-      ]
+      """
     )
   }
 

Original file line number	Diff line number	Diff line change
`@@ -191,7 +191,7 @@ extension Lexer.Cursor {`
`191`	`191`	`// "/**/": .blockComment.`
`192`	`192`	`precondition(self.previous == UInt8(ascii: "/") && self.is(at: "*"))`
`193`	`193`	`let isDocComment = self.input.count > 2 && self.is(offset: 1, at: "*") && self.is(offset: 2, notAt: "/")`
`194`		`- _ = self.advanceToEndOfSlashStarComment()`
	`194`	`+ _ = self.advanceToEndOfSlashStarComment(slashPosition: start)`
`195`	`195`	`let contents = start.text(upTo: self)`
`196`	`196`	`return isDocComment ? .docBlockComment(contents) : .blockComment(contents)`
`197`	`197`	`}`
Original file line number	Diff line number	Diff line change
`@@ -562,9 +562,9 @@ public class LexerTests: XCTestCase {`
`562`	`562`	`]`
`563`	`563`	`)`
`564`	`564`	`assertLexemes(`
`565`		`- "^/*/",`
	`565`	`+ "^1️⃣/*/",`
`566`	`566`	`lexemes: [`
`567`		`- LexemeSpec(.binaryOperator, text: "^", trailing: "/*/")`
	`567`	`+ LexemeSpec(.binaryOperator, text: "^", trailing: "//", diagnostic: "unterminated '/' comment")`
`568`	`568`	`]`
`569`	`569`	`)`
`570`	`570`	`}`
`@@ -1461,4 +1461,22 @@ public class LexerTests: XCTestCase {`
`1461`	`1461`	`]`
`1462`	`1462`	`)`
`1463`	`1463`	`}`
	`1464`	`+`
	`1465`	`+ func testUnterminatedBlockComment() {`
	`1466`	`+ assertLexemes(`
	`1467`	`+ "1️⃣/*",`
	`1468`	`+ lexemes: [`
	`1469`	`+ LexemeSpec(.eof, leading: "/", text: "", diagnostic: "unterminated '/' comment")`
	`1470`	`+ ]`
	`1471`	`+ )`
	`1472`	`+ }`
	`1473`	`+`
	`1474`	`+ func testSlashStartSlash() {`
	`1475`	`+ assertLexemes(`
	`1476`	`+ "1️⃣/*/",`
	`1477`	`+ lexemes: [`
	`1478`	`+ LexemeSpec(.eof, leading: "//", text: "", diagnostic: "unterminated '/' comment")`
	`1479`	`+ ]`
	`1480`	`+ )`
	`1481`	`+ }`
`1464`	`1482`	`}`