swiftlang
diff --git a/‎CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift
Lines changed: 30 additions & 5 deletions b/‎CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift
Lines changed: 30 additions & 5 deletions
diff --git a/‎Sources/SwiftParser/IncrementalParseTransition.swift
Lines changed: 30 additions & 22 deletions b/‎Sources/SwiftParser/IncrementalParseTransition.swift
Lines changed: 30 additions & 22 deletions
diff --git a/‎Sources/SwiftParser/Lexer/LexemeSequence.swift
Lines changed: 30 additions & 0 deletions b/‎Sources/SwiftParser/Lexer/LexemeSequence.swift
Lines changed: 30 additions & 0 deletions
diff --git a/‎Sources/SwiftParser/Lookahead.swift
Lines changed: 2 additions & 0 deletions b/‎Sources/SwiftParser/Lookahead.swift
Lines changed: 2 additions & 0 deletions
diff --git a/‎Sources/SwiftParser/Parser.swift
Lines changed: 66 additions & 2 deletions b/‎Sources/SwiftParser/Parser.swift
Lines changed: 66 additions & 2 deletions
diff --git a/‎Sources/SwiftParser/TopLevel.swift
Lines changed: 12 additions & 1 deletion b/‎Sources/SwiftParser/TopLevel.swift
Lines changed: 12 additions & 1 deletion
@@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
       /// Parse the source code in the given string as Swift source file. See
       /// `Parser.init` for more details.
       public static func parse(
-        source: String,
-        parseTransition: IncrementalParseTransition? = nil
+        source: String
       ) -> SourceFileSyntax {
         var parser = Parser(source)
         return SourceFileSyntax.parse(from: &parser)
@@ -35,18 +34,44 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
 
     DeclSyntax(
       """
-      /// Parse the source code in the given string as Swift source file. See
+      /// Parse the source code in the given buffer as Swift source file. See
       /// `Parser.init` for more details.
       public static func parse(
         source: UnsafeBufferPointer<UInt8>,
-        maximumNestingLevel: Int? = nil,
-        parseTransition: IncrementalParseTransition? = nil
+        maximumNestingLevel: Int? = nil
       ) -> SourceFileSyntax {
         var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
         return SourceFileSyntax.parse(from: &parser)
       }
       """
     )
+
+    DeclSyntax(
+      """
+      /// Parse the source code in the given string as Swift source file while return `nodeAffectRangeCollector` to enable incremental parse.
+      public static func parse(
+        source: String,
+        parseTransition: IncrementalParseTransition? = nil
+      ) -> (tree: SourceFileSyntax, nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector) {
+        var parser = Parser(source, parseTransition: parseTransition)
+        return (SourceFileSyntax.parse(from: &parser), parser.parseNodeAffectRange)
+      }
+      """
+    )
+
+    DeclSyntax(
+      """
+      /// parse the source code in the given buffer as Swift source file while return `nodeAffectRangeCollector` to enable incremental parse.
+      public static func parse(
+        source: UnsafeBufferPointer<UInt8>,
+        maximumNestingLevel: Int? = nil,
+        parseTransition: IncrementalParseTransition? = nil
+      ) -> (tree: SourceFileSyntax, nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector) {
+        var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition)
+        return (SourceFileSyntax.parse(from: &parser), parser.parseNodeAffectRange)
+      }
+      """
+    )
   }
 
   DeclSyntax(
 
@@ -20,26 +20,39 @@
 /// This is also used for testing purposes to ensure incremental reparsing
 /// worked as expected.
 public protocol IncrementalParseReusedNodeDelegate {
-  /// Accepts the range and ``Syntax`` node of skipped source region.
+  /// Accepts ``Syntax`` node of skipped source region.
   ///
   /// - Parameters:
-  ///   - range: The source region of the currently parsed source.
   ///   - previousNode: The node from the previous tree that is associated with
   ///                   the skipped source region.
-  func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
+  func parserReusedNode(previousNode: Syntax)
 }
 
 /// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
 /// the range and re-used node into an array.
 public final class IncrementalParseReusedNodeCollector:
   IncrementalParseReusedNodeDelegate
 {
-  public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
+  public var nodes: [Syntax] = []
 
   public init() {}
 
-  public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
-    rangeAndNodes.append((range, previousNode))
+  public func parserReusedNode(previousNode: Syntax) {
+    nodes.append(previousNode)
+  }
+}
+
+/// Record the affect range for potential re-used nodes. When edits intersect the affect range, the node is not able to be re-used.
+public struct IncrementalParseNodeAffectRangeCollector {
+  /// A dict to record the utf8 length in source that might affect the parse of a node.
+  /// This information is used to determine whether a node can be reused
+  fileprivate var nodeAffectRange: [RawSyntax.ID: Int] = [:]
+
+  public init() {}
+
+  @_spi(RawSyntax)
+  public mutating func registerNodeForIncrementalParse(node: RawSyntax, length: Int) {
+    self.nodeAffectRange[node.id] = length
   }
 }
 
@@ -48,6 +61,7 @@ public final class IncrementalParseReusedNodeCollector:
 public final class IncrementalParseTransition {
   fileprivate let previousTree: SourceFileSyntax
   fileprivate let edits: ConcurrentEdits
+  fileprivate let nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector
   fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?
 
   /// - Parameters:
@@ -59,10 +73,12 @@ public final class IncrementalParseTransition {
   public init(
     previousTree: SourceFileSyntax,
     edits: ConcurrentEdits,
+    nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector,
     reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil
   ) {
     self.previousTree = previousTree
     self.edits = edits
+    self.nodeAffectRangeCollector = nodeAffectRangeCollector
     self.reusedDelegate = reusedNodeDelegate
   }
 }
@@ -73,6 +89,10 @@ public struct IncrementalParseLookup {
   fileprivate let transition: IncrementalParseTransition
   fileprivate var cursor: SyntaxCursor
 
+  fileprivate var nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector {
+    return transition.nodeAffectRangeCollector
+  }
+
   /// Create a new ``IncrementalParseLookup`` that can look nodes up based on the
   /// given ``IncrementalParseTransition``.
   public init(transition: IncrementalParseTransition) {
@@ -109,7 +129,6 @@ public struct IncrementalParseLookup {
     let node = cursorLookup(prevPosition: prevPosition, kind: kind)
     if let delegate = reusedDelegate, let node {
       delegate.parserReusedNode(
-        range: ByteSourceRange(offset: newOffset, length: node.byteSize),
         previousNode: node
       )
     }
@@ -148,24 +167,13 @@ public struct IncrementalParseLookup {
       return true
     }
 
-    // Node can also not be reused if an edit has been made in the next token's
-    // text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
-    // a StructDecl inside and `private struc Foo {}` parses as two
-    // CodeBlockItems one for `private` and one for `struc Foo {}`
-    var nextLeafNodeLength: SourceLength = .zero
-    if let nextSibling = cursor.nextSibling {
-      // Fast path check: if next sibling is before all the edits then we can
-      // re-use the node.
-      if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
-        return true
-      }
-      if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
-        nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
-      }
+    guard let nodeAffectRangeLength = nodeAffectRangeCollector.nodeAffectRange[node.raw.id] else {
+      return false
     }
+
     let nodeAffectRange = ByteSourceRange(
       offset: node.position.utf8Offset,
-      length: (node.totalLength + nextLeafNodeLength).utf8Length
+      length: nodeAffectRangeLength
     )
 
     for edit in edits.edits {
 
@@ -32,24 +32,54 @@ extension Lexer {
     /// usually less than 0.1% of the memory allocated by the syntax arena.
     var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)
 
+    /// Compute the offset of the end of next token
+    var offsetToNextTokenEnd: Int {
+      self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
+    }
+
+    /// See doc comments in ``LookaheadTracker``
+    var lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
+
     fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
       self.sourceBufferStart = sourceBufferStart
       self.cursor = cursor
       self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
+      self.lookaheadTracker = .allocate(capacity: 1)
+      self.lookaheadTracker.initialize(to: LookaheadTracker())
     }
 
     @_spi(Testing)
     public mutating func next() -> Lexer.Lexeme? {
       return self.advance()
     }
 
+    /// Record the offset of the end of next token as the furthest offset in ``LookaheadTracker``
+    func recordFurthestOffset() {
+      self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
+    }
+
     mutating func advance() -> Lexer.Lexeme {
       defer {
         self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
       }
       return self.nextToken
     }
 
+    /// Get the offset of `token` to `sourceBufferStart`.
+    func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
+      return self.sourceBufferStart.distance(to: token.cursor)
+    }
+
+    /// Advance the the cursor by `offset` and reset `currentToken`
+    mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
+      self.cursor = currentToken.cursor
+      self.cursor.position = self.cursor.position.advanced(by: offset)
+
+      self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
+
+      currentToken = self.advance()
+    }
+
     /// Reset the lexeme sequence to the state we were in when lexing `splitToken`
     /// but after we consumed `consumedPrefix` bytes from `splitToken`.
     /// - Warning: Do not add more usages of this function.
 
@@ -33,6 +33,7 @@ extension Parser {
     ) {
       self.lexemes = lexemes
       self.currentToken = currentToken
+      self.lexemes.recordFurthestOffset()
     }
 
     fileprivate init(cloning other: Parser) {
@@ -90,6 +91,7 @@ extension Parser.Lookahead {
   mutating func consumeAnyToken() {
     tokensConsumed += 1
     self.currentToken = self.lexemes.advance()
+    self.lexemes.recordFurthestOffset()
   }
 
   mutating func consumeAnyToken(remapping: RawTokenKind) {
 
@@ -101,6 +101,12 @@ public struct Parser {
   /// When this nesting level is exceeded, the parser should stop parsing.
   let maximumNestingLevel: Int
 
+  /// See commens in ``IncrementalParseLookup``
+  let parseLookup: IncrementalParseLookup?
+
+  /// See comments in ``IncrementalParseNodeAffectRangeCollector``
+  var parseNodeAffectRange = IncrementalParseNodeAffectRangeCollector()
+
   /// A default maximum nesting level that is used if the client didn't
   /// explicitly specify one. Debug builds of the parser comume a lot more stack
   /// space and thus have a lower default maximum nesting level.
@@ -111,7 +117,11 @@ public struct Parser {
   #endif
 
   /// Initializes a ``Parser`` from the given string.
-  public init(_ input: String, maximumNestingLevel: Int? = nil) {
+  public init(
+    _ input: String,
+    maximumNestingLevel: Int? = nil,
+    parseTransition: IncrementalParseTransition? = nil
+  ) {
     self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
 
     self.arena = ParsingSyntaxArena(
@@ -126,6 +136,11 @@ public struct Parser {
 
     self.lexemes = Lexer.tokenize(interned)
     self.currentToken = self.lexemes.advance()
+    if let parseTransition {
+      self.parseLookup = IncrementalParseLookup(transition: parseTransition)
+    } else {
+      self.parseLookup = nil
+    }
   }
 
   /// Initializes a ``Parser`` from the given input buffer.
@@ -142,7 +157,12 @@ public struct Parser {
   ///            arena is created automatically, and `input` copied into the
   ///            arena. If non-`nil`, `input` must be within its registered
   ///            source buffer or allocator.
-  public init(_ input: UnsafeBufferPointer<UInt8>, maximumNestingLevel: Int? = nil, arena: ParsingSyntaxArena? = nil) {
+  public init(
+    _ input: UnsafeBufferPointer<UInt8>,
+    maximumNestingLevel: Int? = nil,
+    parseTransition: IncrementalParseTransition? = nil,
+    arena: ParsingSyntaxArena? = nil
+  ) {
     self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
 
     var sourceBuffer: UnsafeBufferPointer<UInt8>
@@ -159,6 +179,11 @@ public struct Parser {
 
     self.lexemes = Lexer.tokenize(sourceBuffer)
     self.currentToken = self.lexemes.advance()
+    if let parseTransition {
+      self.parseLookup = IncrementalParseLookup(transition: parseTransition)
+    } else {
+      self.parseLookup = nil
+    }
   }
 
   mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax {
@@ -237,6 +262,7 @@ public struct Parser {
 extension Parser {
   /// Retrieves the token following the current token without consuming it.
   func peek() -> Lexer.Lexeme {
+    lexemes.recordFurthestOffset()
     return self.lexemes.peek()
   }
 }
@@ -629,3 +655,41 @@ extension Parser {
     )
   }
 }
+
+// MARK: Incremental Parsing
+extension Parser {
+  mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? {
+    guard var parseLookup else {
+      return nil
+    }
+
+    let currentOffset = self.lexemes.getOffsetToStart(self.currentToken)
+    if let node = parseLookup.lookUp(currentOffset, kind: kind) {
+      self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken)
+      return node
+    }
+
+    return nil
+  }
+
+  mutating func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) {
+    parseNodeAffectRange.registerNodeForIncrementalParse(
+      node: node,
+      length: max(lookaheadFurthestOffset - self.lexemes.getOffsetToStart(startToken), node.byteLength + currentToken.byteLength)
+    )
+  }
+
+  public var lookaheadFurthestOffset: Int {
+    return lexemes.lookaheadTracker.pointee.furthestOffset
+  }
+}
+
+/// Record the furthest offset to `sourceBufferStart` that is reached by  ``Parser.Peek()`` or ``Lookahead`` in ``Lexer/LexemeSequence``
+struct LookaheadTracker {
+  private(set) var furthestOffset: Int = 0
+
+  public mutating func recordFurthestOffset(_ furthestOffset: Int) {
+    /// We could lookahead multi-times to find different valid part of a node, so we should take the maximum of the lookahead offset as the possible affect range of a node.
+    self.furthestOffset = max(furthestOffset, self.furthestOffset)
+  }
+}
@@ -151,6 +151,12 @@ extension Parser {
   ///     statement → compiler-control-statement
   ///     statements → statement statements?
   mutating func parseCodeBlockItem(isAtTopLevel: Bool, allowInitDecl: Bool) -> RawCodeBlockItemSyntax? {
+    let startToken = self.currentToken
+    if let syntax = self.loadCurrentSyntaxNodeFromCache(for: .codeBlockItem) {
+      self.registerNodeForIncrementalParse(node: syntax.raw, startToken: startToken)
+      return RawCodeBlockItemSyntax(syntax.raw)
+    }
+
     if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
       return RawCodeBlockItemSyntax(
         remainingTokens,
@@ -183,12 +189,17 @@ extension Parser {
     if item.raw.isEmpty && semi == nil && trailingSemis.isEmpty {
       return nil
     }
-    return RawCodeBlockItemSyntax(
+
+    let result = RawCodeBlockItemSyntax(
       item: item,
       semicolon: semi,
       RawUnexpectedNodesSyntax(trailingSemis, arena: self.arena),
       arena: self.arena
     )
+
+    self.registerNodeForIncrementalParse(node: result.raw, startToken: startToken)
+
+    return result
   }
 
   private mutating func parseStatementItem() -> RawCodeBlockItemSyntax.Item {
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ extension Parser {`
`33`	`33`	`) {`
`34`	`34`	`self.lexemes = lexemes`
`35`	`35`	`self.currentToken = currentToken`
	`36`	`+ self.lexemes.recordFurthestOffset()`
`36`	`37`	`}`
`37`	`38`
`38`	`39`	`fileprivate init(cloning other: Parser) {`
`@@ -90,6 +91,7 @@ extension Parser.Lookahead {`
`90`	`91`	`mutating func consumeAnyToken() {`
`91`	`92`	`tokensConsumed += 1`
`92`	`93`	`self.currentToken = self.lexemes.advance()`
	`94`	`+ self.lexemes.recordFurthestOffset()`
`93`	`95`	`}`
`94`	`96`
`95`	`97`	`mutating func consumeAnyToken(remapping: RawTokenKind) {`