swiftlang · ahoppen · Jul 11, 2023 · Jul 10, 2023 · ahoppen · Jul 6, 2023
@@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
       /// Parse the source code in the given string as Swift source file. See
       /// `Parser.init` for more details.
       public static func parse(
-        source: String,
-        parseTransition: IncrementalParseTransition? = nil
+        source: String
       ) -> SourceFileSyntax {
         var parser = Parser(source)
         return SourceFileSyntax.parse(from: &parser)
@@ -35,18 +34,66 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
 
     DeclSyntax(
       """
-      /// Parse the source code in the given string as Swift source file. See
+      /// Parse the source code in the given buffer as Swift source file. See
       /// `Parser.init` for more details.
       public static func parse(
         source: UnsafeBufferPointer<UInt8>,
-        maximumNestingLevel: Int? = nil,
-        parseTransition: IncrementalParseTransition? = nil
+        maximumNestingLevel: Int? = nil
       ) -> SourceFileSyntax {
         var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
         return SourceFileSyntax.parse(from: &parser)
       }
       """
     )
+
+    DeclSyntax(
+      """
+      /// Parse the source code in the given string as Swift source file with support
+      /// for incremental parsing.
+      ///
+      /// When parsing a source file for the first time, invoke `parseIncrementally`
+      /// with `parseTransition: nil`. This returns the initial tree as well as
+      /// ``LookaheadRanges``. If an edit is made to the source file, an
+      /// ``IncrementalParseTransition`` can be constructed from the initial tree
+      /// and its ``LookaheadRanges``. When invoking `parseIncrementally` again with
+      /// the post-edit source and that parse transition, the parser will re-use
+      /// nodes that haven’t changed.
+      ///
+      /// - Parameters:
+      ///   - source: The source code to parse
+      ///   - parseTransition: If a similar source file has already been parsed, the
+      ///     ``IncrementalParseTransition`` that contains the previous tree as well
+      ///     as the edits that were performed to it.
+      /// - Returns: The parsed tree as well as the ``LookaheadRanges`` that describe
+      ///            how far the parser looked ahead while parsing a node, which is
+      ///            necessary to construct an ``IncrementalParseTransition`` for a
+      ///            subsequent incremental parse
+      public static func parseIncrementally(
+        source: String,
+        parseTransition: IncrementalParseTransition?
+      ) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
+        var parser = Parser(source, parseTransition: parseTransition)
+        return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
+      }
+      """
+    )
+
+    DeclSyntax(
+      """
+      /// Parse the source code in the given buffer as Swift source file with support
+      /// for incremental parsing.
+      ///
+      /// See doc comments in ``Parser/parseIncrementally(source:parseTransition:)``
+      public static func parseIncrementally(
+        source: UnsafeBufferPointer<UInt8>,
+        maximumNestingLevel: Int? = nil,
+        parseTransition: IncrementalParseTransition?
+      ) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
+        var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition)
+        return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
+      }
+      """
+    )
   }
 
   DeclSyntax(

@@ -11,6 +11,30 @@
 //===----------------------------------------------------------------------===//
 
 @_spi(RawSyntax) import SwiftSyntax
+
+extension Parser {
+  mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? {
+    guard parseLookup != nil else {
+      return nil
+    }
+
+    let currentOffset = self.lexemes.getOffsetToStart(self.currentToken)
+    if let node = parseLookup!.lookUp(currentOffset, kind: kind) {
+      self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken)
+      return node
+    }
+
+    return nil
+  }
+
+  mutating func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) {
+    lookaheadRanges.registerNodeForIncrementalParse(
+      node: node,
+      lookaheadLength: lexemes.lookaheadTracker.pointee.furthestOffset - self.lexemes.getOffsetToStart(startToken)
+    )
+  }
+}
+
 /// Accepts the re-used ``Syntax`` nodes that `IncrementalParseTransition`
 /// determined they should be re-used for a parse invocation.
 ///
@@ -20,26 +44,25 @@
 /// This is also used for testing purposes to ensure incremental reparsing
 /// worked as expected.
 public protocol IncrementalParseReusedNodeDelegate {
-  /// Accepts the range and ``Syntax`` node of skipped source region.
+  /// Accepts ``Syntax`` node of skipped source region.
   ///
   /// - Parameters:
-  ///   - range: The source region of the currently parsed source.
   ///   - previousNode: The node from the previous tree that is associated with
   ///                   the skipped source region.
-  func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
+  func parserReusedNode(previousNode: Syntax)
 }
 
 /// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
 /// the range and re-used node into an array.
 public final class IncrementalParseReusedNodeCollector:
   IncrementalParseReusedNodeDelegate
 {
-  public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
+  public var nodes: [Syntax] = []
 
   public init() {}
 
-  public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
-    rangeAndNodes.append((range, previousNode))
+  public func parserReusedNode(previousNode: Syntax) {
+    nodes.append(previousNode)
   }
 }
 
@@ -48,6 +71,7 @@ public final class IncrementalParseReusedNodeCollector:
 public final class IncrementalParseTransition {
   fileprivate let previousTree: SourceFileSyntax
   fileprivate let edits: ConcurrentEdits
+  fileprivate let lookaheadRanges: LookaheadRanges
   fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?
 
   /// - Parameters:
@@ -59,17 +83,19 @@ public final class IncrementalParseTransition {
   public init(
     previousTree: SourceFileSyntax,
     edits: ConcurrentEdits,
+    lookaheadRanges: LookaheadRanges,
     reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil
   ) {
     self.previousTree = previousTree
     self.edits = edits
+    self.lookaheadRanges = lookaheadRanges
     self.reusedDelegate = reusedNodeDelegate
   }
 }
 
 /// Provides a mechanism for the parser to skip regions of an incrementally
 /// updated source that was already parsed during a previous parse invocation.
-public struct IncrementalParseLookup {
+struct IncrementalParseLookup {
   fileprivate let transition: IncrementalParseTransition
   fileprivate var cursor: SyntaxCursor
 
@@ -100,23 +126,21 @@ public struct IncrementalParseLookup {
   /// - Returns: A ``Syntax`` node from the previous parse invocation,
   ///            representing the contents of this region, if it is still valid
   ///            to re-use. `nil` otherwise.
-  @_spi(RawSyntax)
-  public mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? {
+  fileprivate mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? {
     guard let prevOffset = translateToPreEditOffset(newOffset) else {
       return nil
     }
     let prevPosition = AbsolutePosition(utf8Offset: prevOffset)
     let node = cursorLookup(prevPosition: prevPosition, kind: kind)
     if let delegate = reusedDelegate, let node {
       delegate.parserReusedNode(
-        range: ByteSourceRange(offset: newOffset, length: node.byteSize),
         previousNode: node
       )
     }
     return node
   }
 
-  mutating fileprivate func cursorLookup(
+  fileprivate mutating func cursorLookup(
     prevPosition: AbsolutePosition,
     kind: SyntaxKind
   ) -> Syntax? {
@@ -148,24 +172,13 @@ public struct IncrementalParseLookup {
       return true
     }
 
-    // Node can also not be reused if an edit has been made in the next token's
-    // text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
-    // a StructDecl inside and `private struc Foo {}` parses as two
-    // CodeBlockItems one for `private` and one for `struc Foo {}`
-    var nextLeafNodeLength: SourceLength = .zero
-    if let nextSibling = cursor.nextSibling {
-      // Fast path check: if next sibling is before all the edits then we can
-      // re-use the node.
-      if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
-        return true
-      }
-      if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
-        nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
-      }
+    guard let nodeAffectRangeLength = transition.lookaheadRanges.lookaheadRanges[node.raw.id] else {
+      return false
     }
+
     let nodeAffectRange = ByteSourceRange(
       offset: node.position.utf8Offset,
-      length: (node.totalLength + nextLeafNodeLength).utf8Length
+      length: nodeAffectRangeLength
     )
 
     for edit in edits.edits {

@@ -32,24 +32,60 @@ extension Lexer {
     /// usually less than 0.1% of the memory allocated by the syntax arena.
     var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)
 
-    fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
+    /// The offset of the trailing trivia end of `nextToken` relative to the source buffer’s start.
+    var offsetToNextTokenEnd: Int {
+      self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
+    }
+
+    /// See doc comments in ``LookaheadTracker``
+    ///
+    /// This is an `UnsafeMutablePointer` for two reasons
+    ///  - When `LexemeSequence` gets copied (e.g. when a ``Lookahead`` gets created), it should still reference the same ``LookaheadTracker`` so that any lookahead performed in the ``Lookahead`` also affects the original ``Parser``. It thus needs to be a reference type
+    ///  - ``LookaheadTracker`` is not a class to avoid reference counting it. The ``Parser`` that creates the ``LexemeSequence`` will always outlive any ``Lookahead`` created for it.
+    let lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
+
+    fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor, lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>) {
       self.sourceBufferStart = sourceBufferStart
       self.cursor = cursor
       self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
+      self.lookaheadTracker = lookaheadTracker
     }
 
     @_spi(Testing)
     public mutating func next() -> Lexer.Lexeme? {
       return self.advance()
     }
 
+    /// Record the offset of the end of `nextToken` as the furthest offset in ``LookaheadTracker``
+    private func recordNextTokenInLookaheadTracker() {
+      self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
+    }
+
     mutating func advance() -> Lexer.Lexeme {
       defer {
         self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
       }
+      self.recordNextTokenInLookaheadTracker()
       return self.nextToken
     }
 
+    /// Get the offset of the leading trivia start of `token` relative to `sourceBufferStart`.
+    func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
+      return self.sourceBufferStart.distance(to: token.cursor)
+    }
+
+    /// Advance the the cursor by `offset` and reset `currentToken`
+    ///
+    /// - Important: This should only be used for incremental parsing.
+    mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
+      self.cursor = currentToken.cursor
+      self.cursor.position = self.cursor.position.advanced(by: offset)
+
+      self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
+
+      currentToken = self.advance()
+    }
+
     /// Reset the lexeme sequence to the state we were in when lexing `splitToken`
     /// but after we consumed `consumedPrefix` bytes from `splitToken`.
     /// - Warning: Do not add more usages of this function.
@@ -63,6 +99,7 @@ extension Lexer {
     }
 
     func peek() -> Lexer.Lexeme {
+      self.recordNextTokenInLookaheadTracker()
       return self.nextToken
     }
 
@@ -104,12 +141,13 @@ extension Lexer {
   @_spi(Testing)
   public static func tokenize(
     _ input: UnsafeBufferPointer<UInt8>,
-    from startIndex: Int = 0
+    from startIndex: Int = 0,
+    lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
   ) -> LexemeSequence {
     precondition(input.isEmpty || startIndex < input.endIndex)
     let startChar = startIndex == input.startIndex ? UInt8(ascii: "\0") : input[startIndex - 1]
     let start = Cursor(input: input, previous: UInt8(ascii: "\0"))
     let cursor = Cursor(input: UnsafeBufferPointer(rebasing: input[startIndex...]), previous: startChar)
-    return LexemeSequence(sourceBufferStart: start, cursor: cursor)
+    return LexemeSequence(sourceBufferStart: start, cursor: cursor, lookaheadTracker: lookaheadTracker)
   }
 }