From 1e6406dc6c57f0f728736adcd3068b52839e8fb2 Mon Sep 17 00:00:00 2001 From: stevenwong Date: Mon, 10 Jul 2023 22:40:25 +0800 Subject: [PATCH] Port incremental parse to `CodeBlockItem` Use `LookaheadTracker` to track the furthest offset we reached and use `LookaheadRanges` to record the offset as the source range where might affect the parse of a node. And we could use that information to judge whether a node can be reused --- .../swiftparser/ParserEntryFile.swift | 57 ++++++++++++-- .../IncrementalParseTransition.swift | 65 +++++++++------- .../SwiftParser/Lexer/LexemeSequence.swift | 44 ++++++++++- Sources/SwiftParser/Parser.swift | 78 ++++++++++++++++++- Sources/SwiftParser/TopLevel.swift | 13 +++- .../SwiftParser/generated/Parser+Entry.swift | 49 ++++++++++-- Sources/SwiftSyntax/Raw/RawSyntax.swift | 14 ++++ .../IncrementalParseTestUtils.swift | 28 +++++-- .../Commands/PerformanceTest.swift | 29 ++++++- Tests/SwiftParserTest/Assertions.swift | 7 +- .../IncrementalParsingTests.swift | 51 ++++++++++-- Tests/SwiftParserTest/LexerTests.swift | 7 +- 12 files changed, 380 insertions(+), 62 deletions(-) diff --git a/CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift b/CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift index 42d1f64c2e5..071bc3f330f 100644 --- a/CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift +++ b/CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift @@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) { /// Parse the source code in the given string as Swift source file. See /// `Parser.init` for more details. public static func parse( - source: String, - parseTransition: IncrementalParseTransition? = nil + source: String ) -> SourceFileSyntax { var parser = Parser(source) return SourceFileSyntax.parse(from: &parser) @@ -35,18 +34,66 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) { DeclSyntax( """ - /// Parse the source code in the given string as Swift source file. See + /// Parse the source code in the given buffer as Swift source file. See /// `Parser.init` for more details. public static func parse( source: UnsafeBufferPointer, - maximumNestingLevel: Int? = nil, - parseTransition: IncrementalParseTransition? = nil + maximumNestingLevel: Int? = nil ) -> SourceFileSyntax { var parser = Parser(source, maximumNestingLevel: maximumNestingLevel) return SourceFileSyntax.parse(from: &parser) } """ ) + + DeclSyntax( + """ + /// Parse the source code in the given string as Swift source file with support + /// for incremental parsing. + /// + /// When parsing a source file for the first time, invoke `parseIncrementally` + /// with `parseTransition: nil`. This returns the initial tree as well as + /// ``LookaheadRanges``. If an edit is made to the source file, an + /// ``IncrementalParseTransition`` can be constructed from the initial tree + /// and its ``LookaheadRanges``. When invoking `parseIncrementally` again with + /// the post-edit source and that parse transition, the parser will re-use + /// nodes that haven’t changed. + /// + /// - Parameters: + /// - source: The source code to parse + /// - parseTransition: If a similar source file has already been parsed, the + /// ``IncrementalParseTransition`` that contains the previous tree as well + /// as the edits that were performed to it. + /// - Returns: The parsed tree as well as the ``LookaheadRanges`` that describe + /// how far the parser looked ahead while parsing a node, which is + /// necessary to construct an ``IncrementalParseTransition`` for a + /// subsequent incremental parse + public static func parseIncrementally( + source: String, + parseTransition: IncrementalParseTransition? + ) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) { + var parser = Parser(source, parseTransition: parseTransition) + return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges) + } + """ + ) + + DeclSyntax( + """ + /// Parse the source code in the given buffer as Swift source file with support + /// for incremental parsing. + /// + /// See doc comments in ``Parser/parseIncrementally(source:parseTransition:)`` + public static func parseIncrementally( + source: UnsafeBufferPointer, + maximumNestingLevel: Int? = nil, + parseTransition: IncrementalParseTransition? + ) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) { + var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition) + return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges) + } + """ + ) } DeclSyntax( diff --git a/Sources/SwiftParser/IncrementalParseTransition.swift b/Sources/SwiftParser/IncrementalParseTransition.swift index 155f4b991a7..73eef81b5f9 100644 --- a/Sources/SwiftParser/IncrementalParseTransition.swift +++ b/Sources/SwiftParser/IncrementalParseTransition.swift @@ -11,6 +11,30 @@ //===----------------------------------------------------------------------===// @_spi(RawSyntax) import SwiftSyntax + +extension Parser { + mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? { + guard parseLookup != nil else { + return nil + } + + let currentOffset = self.lexemes.getOffsetToStart(self.currentToken) + if let node = parseLookup!.lookUp(currentOffset, kind: kind) { + self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken) + return node + } + + return nil + } + + mutating func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) { + lookaheadRanges.registerNodeForIncrementalParse( + node: node, + lookaheadLength: lexemes.lookaheadTracker.pointee.furthestOffset - self.lexemes.getOffsetToStart(startToken) + ) + } +} + /// Accepts the re-used ``Syntax`` nodes that `IncrementalParseTransition` /// determined they should be re-used for a parse invocation. /// @@ -20,13 +44,12 @@ /// This is also used for testing purposes to ensure incremental reparsing /// worked as expected. public protocol IncrementalParseReusedNodeDelegate { - /// Accepts the range and ``Syntax`` node of skipped source region. + /// Accepts ``Syntax`` node of skipped source region. /// /// - Parameters: - /// - range: The source region of the currently parsed source. /// - previousNode: The node from the previous tree that is associated with /// the skipped source region. - func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) + func parserReusedNode(previousNode: Syntax) } /// An implementation of `IncrementalParseReusedNodeDelegate` that just collects @@ -34,12 +57,12 @@ public protocol IncrementalParseReusedNodeDelegate { public final class IncrementalParseReusedNodeCollector: IncrementalParseReusedNodeDelegate { - public var rangeAndNodes: [(ByteSourceRange, Syntax)] = [] + public var nodes: [Syntax] = [] public init() {} - public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) { - rangeAndNodes.append((range, previousNode)) + public func parserReusedNode(previousNode: Syntax) { + nodes.append(previousNode) } } @@ -48,6 +71,7 @@ public final class IncrementalParseReusedNodeCollector: public final class IncrementalParseTransition { fileprivate let previousTree: SourceFileSyntax fileprivate let edits: ConcurrentEdits + fileprivate let lookaheadRanges: LookaheadRanges fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate? /// - Parameters: @@ -59,17 +83,19 @@ public final class IncrementalParseTransition { public init( previousTree: SourceFileSyntax, edits: ConcurrentEdits, + lookaheadRanges: LookaheadRanges, reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil ) { self.previousTree = previousTree self.edits = edits + self.lookaheadRanges = lookaheadRanges self.reusedDelegate = reusedNodeDelegate } } /// Provides a mechanism for the parser to skip regions of an incrementally /// updated source that was already parsed during a previous parse invocation. -public struct IncrementalParseLookup { +struct IncrementalParseLookup { fileprivate let transition: IncrementalParseTransition fileprivate var cursor: SyntaxCursor @@ -100,8 +126,7 @@ public struct IncrementalParseLookup { /// - Returns: A ``Syntax`` node from the previous parse invocation, /// representing the contents of this region, if it is still valid /// to re-use. `nil` otherwise. - @_spi(RawSyntax) - public mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? { + fileprivate mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? { guard let prevOffset = translateToPreEditOffset(newOffset) else { return nil } @@ -109,14 +134,13 @@ public struct IncrementalParseLookup { let node = cursorLookup(prevPosition: prevPosition, kind: kind) if let delegate = reusedDelegate, let node { delegate.parserReusedNode( - range: ByteSourceRange(offset: newOffset, length: node.byteSize), previousNode: node ) } return node } - mutating fileprivate func cursorLookup( + fileprivate mutating func cursorLookup( prevPosition: AbsolutePosition, kind: SyntaxKind ) -> Syntax? { @@ -148,24 +172,13 @@ public struct IncrementalParseLookup { return true } - // Node can also not be reused if an edit has been made in the next token's - // text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with - // a StructDecl inside and `private struc Foo {}` parses as two - // CodeBlockItems one for `private` and one for `struc Foo {}` - var nextLeafNodeLength: SourceLength = .zero - if let nextSibling = cursor.nextSibling { - // Fast path check: if next sibling is before all the edits then we can - // re-use the node. - if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset { - return true - } - if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) { - nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength - } + guard let nodeAffectRangeLength = transition.lookaheadRanges.lookaheadRanges[node.raw.id] else { + return false } + let nodeAffectRange = ByteSourceRange( offset: node.position.utf8Offset, - length: (node.totalLength + nextLeafNodeLength).utf8Length + length: nodeAffectRangeLength ) for edit in edits.edits { diff --git a/Sources/SwiftParser/Lexer/LexemeSequence.swift b/Sources/SwiftParser/Lexer/LexemeSequence.swift index 18b74568c3a..d27cae871c1 100644 --- a/Sources/SwiftParser/Lexer/LexemeSequence.swift +++ b/Sources/SwiftParser/Lexer/LexemeSequence.swift @@ -32,10 +32,23 @@ extension Lexer { /// usually less than 0.1% of the memory allocated by the syntax arena. var lexerStateAllocator = BumpPtrAllocator(slabSize: 256) - fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) { + /// The offset of the trailing trivia end of `nextToken` relative to the source buffer’s start. + var offsetToNextTokenEnd: Int { + self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength + } + + /// See doc comments in ``LookaheadTracker`` + /// + /// This is an `UnsafeMutablePointer` for two reasons + /// - When `LexemeSequence` gets copied (e.g. when a ``Lookahead`` gets created), it should still reference the same ``LookaheadTracker`` so that any lookahead performed in the ``Lookahead`` also affects the original ``Parser``. It thus needs to be a reference type + /// - ``LookaheadTracker`` is not a class to avoid reference counting it. The ``Parser`` that creates the ``LexemeSequence`` will always outlive any ``Lookahead`` created for it. + let lookaheadTracker: UnsafeMutablePointer + + fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor, lookaheadTracker: UnsafeMutablePointer) { self.sourceBufferStart = sourceBufferStart self.cursor = cursor self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator) + self.lookaheadTracker = lookaheadTracker } @_spi(Testing) @@ -43,13 +56,36 @@ extension Lexer { return self.advance() } + /// Record the offset of the end of `nextToken` as the furthest offset in ``LookaheadTracker`` + private func recordNextTokenInLookaheadTracker() { + self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd) + } + mutating func advance() -> Lexer.Lexeme { defer { self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator) } + self.recordNextTokenInLookaheadTracker() return self.nextToken } + /// Get the offset of the leading trivia start of `token` relative to `sourceBufferStart`. + func getOffsetToStart(_ token: Lexer.Lexeme) -> Int { + return self.sourceBufferStart.distance(to: token.cursor) + } + + /// Advance the the cursor by `offset` and reset `currentToken` + /// + /// - Important: This should only be used for incremental parsing. + mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) { + self.cursor = currentToken.cursor + self.cursor.position = self.cursor.position.advanced(by: offset) + + self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator) + + currentToken = self.advance() + } + /// Reset the lexeme sequence to the state we were in when lexing `splitToken` /// but after we consumed `consumedPrefix` bytes from `splitToken`. /// - Warning: Do not add more usages of this function. @@ -63,6 +99,7 @@ extension Lexer { } func peek() -> Lexer.Lexeme { + self.recordNextTokenInLookaheadTracker() return self.nextToken } @@ -104,12 +141,13 @@ extension Lexer { @_spi(Testing) public static func tokenize( _ input: UnsafeBufferPointer, - from startIndex: Int = 0 + from startIndex: Int = 0, + lookaheadTracker: UnsafeMutablePointer ) -> LexemeSequence { precondition(input.isEmpty || startIndex < input.endIndex) let startChar = startIndex == input.startIndex ? UInt8(ascii: "\0") : input[startIndex - 1] let start = Cursor(input: input, previous: UInt8(ascii: "\0")) let cursor = Cursor(input: UnsafeBufferPointer(rebasing: input[startIndex...]), previous: startChar) - return LexemeSequence(sourceBufferStart: start, cursor: cursor) + return LexemeSequence(sourceBufferStart: start, cursor: cursor, lookaheadTracker: lookaheadTracker) } } diff --git a/Sources/SwiftParser/Parser.swift b/Sources/SwiftParser/Parser.swift index c8e0e8896dd..69b5e4184d5 100644 --- a/Sources/SwiftParser/Parser.swift +++ b/Sources/SwiftParser/Parser.swift @@ -101,6 +101,15 @@ public struct Parser { /// When this nesting level is exceeded, the parser should stop parsing. let maximumNestingLevel: Int + /// See comments in ``IncrementalParseLookup`` + var parseLookup: IncrementalParseLookup? + + /// See comments in ``LookaheadRanges`` + public internal(set) var lookaheadRanges = LookaheadRanges() + + /// Parser should own a ``LookaheadTracker`` so that we can share one `furthestOffset` in a parse. + let lookaheadTrackerOwner = LookaheadTrackerOwner() + /// A default maximum nesting level that is used if the client didn't /// explicitly specify one. Debug builds of the parser comume a lot more stack /// space and thus have a lower default maximum nesting level. @@ -111,7 +120,11 @@ public struct Parser { #endif /// Initializes a ``Parser`` from the given string. - public init(_ input: String, maximumNestingLevel: Int? = nil) { + public init( + _ input: String, + maximumNestingLevel: Int? = nil, + parseTransition: IncrementalParseTransition? = nil + ) { self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel self.arena = ParsingSyntaxArena( @@ -124,8 +137,13 @@ public struct Parser { return arena.internSourceBuffer(buffer) } - self.lexemes = Lexer.tokenize(interned) + self.lexemes = Lexer.tokenize(interned, lookaheadTracker: lookaheadTrackerOwner.lookaheadTracker) self.currentToken = self.lexemes.advance() + if let parseTransition { + self.parseLookup = IncrementalParseLookup(transition: parseTransition) + } else { + self.parseLookup = nil + } } /// Initializes a ``Parser`` from the given input buffer. @@ -142,7 +160,12 @@ public struct Parser { /// arena is created automatically, and `input` copied into the /// arena. If non-`nil`, `input` must be within its registered /// source buffer or allocator. - public init(_ input: UnsafeBufferPointer, maximumNestingLevel: Int? = nil, arena: ParsingSyntaxArena? = nil) { + public init( + _ input: UnsafeBufferPointer, + maximumNestingLevel: Int? = nil, + parseTransition: IncrementalParseTransition? = nil, + arena: ParsingSyntaxArena? = nil + ) { self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel var sourceBuffer: UnsafeBufferPointer @@ -157,8 +180,13 @@ public struct Parser { sourceBuffer = self.arena.internSourceBuffer(input) } - self.lexemes = Lexer.tokenize(sourceBuffer) + self.lexemes = Lexer.tokenize(sourceBuffer, lookaheadTracker: lookaheadTrackerOwner.lookaheadTracker) self.currentToken = self.lexemes.advance() + if let parseTransition { + self.parseLookup = IncrementalParseLookup(transition: parseTransition) + } else { + self.parseLookup = nil + } } mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax { @@ -629,3 +657,45 @@ extension Parser { ) } } + +/// Record the furthest offset to `sourceBufferStart` that is reached by `LexemeSequence.advance()` or `LexemeSequence.peek()`. +public struct LookaheadTracker { + private(set) var furthestOffset: Int = 0 + + public init() {} + + mutating func recordFurthestOffset(_ furthestOffset: Int) { + /// We could lookahead multi-times to find different valid part of a node, so we should take the maximum of the lookahead offset as the possible affect range of a node. + self.furthestOffset = max(furthestOffset, self.furthestOffset) + } +} + +/// Owns a ``LookaheadTracker``. +/// +/// Once the `LookeaheadTrackerOwner` is deinitialized, the ``LookaheadTracker`` is also destroyed. +class LookaheadTrackerOwner { + var lookaheadTracker: UnsafeMutablePointer + + init() { + self.lookaheadTracker = .allocate(capacity: 1) + self.lookaheadTracker.initialize(to: LookaheadTracker()) + } + + deinit { + self.lookaheadTracker.deallocate() + } +} + +/// Record the lookahead ranges for syntax nodes. +public struct LookaheadRanges { + /// For each node that is recorded for re-use, the number of UTF-8 bytes that the parser looked ahead to parse the node, measured from the start of the node’s leading trivia. + /// + /// This information can be used to determine whether a node can be reused in incremental parse. A node can only be re-used if no byte in its looked range has changed. + var lookaheadRanges: [RawSyntax.ID: Int] = [:] + + public init() {} + + mutating func registerNodeForIncrementalParse(node: RawSyntax, lookaheadLength: Int) { + self.lookaheadRanges[node.id] = lookaheadLength + } +} diff --git a/Sources/SwiftParser/TopLevel.swift b/Sources/SwiftParser/TopLevel.swift index 90baaeb7ea0..cf45a59fbd4 100644 --- a/Sources/SwiftParser/TopLevel.swift +++ b/Sources/SwiftParser/TopLevel.swift @@ -151,6 +151,12 @@ extension Parser { /// statement → compiler-control-statement /// statements → statement statements? mutating func parseCodeBlockItem(isAtTopLevel: Bool, allowInitDecl: Bool) -> RawCodeBlockItemSyntax? { + let startToken = self.currentToken + if let syntax = self.loadCurrentSyntaxNodeFromCache(for: .codeBlockItem) { + self.registerNodeForIncrementalParse(node: syntax.raw, startToken: startToken) + return RawCodeBlockItemSyntax(syntax.raw) + } + if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() { return RawCodeBlockItemSyntax( remainingTokens, @@ -183,12 +189,17 @@ extension Parser { if item.raw.isEmpty && semi == nil && trailingSemis.isEmpty { return nil } - return RawCodeBlockItemSyntax( + + let result = RawCodeBlockItemSyntax( item: item, semicolon: semi, RawUnexpectedNodesSyntax(trailingSemis, arena: self.arena), arena: self.arena ) + + self.registerNodeForIncrementalParse(node: result.raw, startToken: startToken) + + return result } private mutating func parseStatementItem() -> RawCodeBlockItemSyntax.Item { diff --git a/Sources/SwiftParser/generated/Parser+Entry.swift b/Sources/SwiftParser/generated/Parser+Entry.swift index a446e1e570d..10edbaf464a 100644 --- a/Sources/SwiftParser/generated/Parser+Entry.swift +++ b/Sources/SwiftParser/generated/Parser+Entry.swift @@ -18,23 +18,62 @@ extension Parser { /// Parse the source code in the given string as Swift source file. See /// `Parser.init` for more details. public static func parse( - source: String, - parseTransition: IncrementalParseTransition? = nil + source: String ) -> SourceFileSyntax { var parser = Parser(source) return SourceFileSyntax.parse(from: &parser) } - /// Parse the source code in the given string as Swift source file. See + /// Parse the source code in the given buffer as Swift source file. See /// `Parser.init` for more details. public static func parse( source: UnsafeBufferPointer, - maximumNestingLevel: Int? = nil, - parseTransition: IncrementalParseTransition? = nil + maximumNestingLevel: Int? = nil ) -> SourceFileSyntax { var parser = Parser(source, maximumNestingLevel: maximumNestingLevel) return SourceFileSyntax.parse(from: &parser) } + + /// Parse the source code in the given string as Swift source file with support + /// for incremental parsing. + /// + /// When parsing a source file for the first time, invoke `parseIncrementally` + /// with `parseTransition: nil`. This returns the initial tree as well as + /// ``LookaheadRanges``. If an edit is made to the source file, an + /// ``IncrementalParseTransition`` can be constructed from the initial tree + /// and its ``LookaheadRanges``. When invoking `parseIncrementally` again with + /// the post-edit source and that parse transition, the parser will re-use + /// nodes that haven’t changed. + /// + /// - Parameters: + /// - source: The source code to parse + /// - parseTransition: If a similar source file has already been parsed, the + /// ``IncrementalParseTransition`` that contains the previous tree as well + /// as the edits that were performed to it. + /// - Returns: The parsed tree as well as the ``LookaheadRanges`` that describe + /// how far the parser looked ahead while parsing a node, which is + /// necessary to construct an ``IncrementalParseTransition`` for a + /// subsequent incremental parse + public static func parseIncrementally( + source: String, + parseTransition: IncrementalParseTransition? + ) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) { + var parser = Parser(source, parseTransition: parseTransition) + return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges) + } + + /// Parse the source code in the given buffer as Swift source file with support + /// for incremental parsing. + /// + /// See doc comments in ``Parser/parseIncrementally(source:parseTransition:)`` + public static func parseIncrementally( + source: UnsafeBufferPointer, + maximumNestingLevel: Int? = nil, + parseTransition: IncrementalParseTransition? + ) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) { + var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition) + return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges) + } } public protocol SyntaxParseable: SyntaxProtocol { diff --git a/Sources/SwiftSyntax/Raw/RawSyntax.swift b/Sources/SwiftSyntax/Raw/RawSyntax.swift index cf3cb4787ff..42f89a1e1d7 100644 --- a/Sources/SwiftSyntax/Raw/RawSyntax.swift +++ b/Sources/SwiftSyntax/Raw/RawSyntax.swift @@ -918,6 +918,20 @@ extension RawSyntax { } } +extension RawSyntax: Identifiable { + public struct ID: Hashable { + /// The pointer to the start of the `RawSyntax` node. + private var pointer: UnsafeRawPointer + fileprivate init(_ raw: RawSyntax) { + self.pointer = UnsafeRawPointer(raw.pointer) + } + } + + public var id: ID { + return ID(self) + } +} + #if DEBUG /// See `SyntaxMemoryLayout`. var RawSyntaxDataMemoryLayouts: [String: SyntaxMemoryLayout.Value] = [ diff --git a/Sources/_SwiftSyntaxTestSupport/IncrementalParseTestUtils.swift b/Sources/_SwiftSyntaxTestSupport/IncrementalParseTestUtils.swift index 6f7699bc633..5832618c582 100644 --- a/Sources/_SwiftSyntaxTestSupport/IncrementalParseTestUtils.swift +++ b/Sources/_SwiftSyntaxTestSupport/IncrementalParseTestUtils.swift @@ -30,13 +30,18 @@ public func assertIncrementalParse( let originalString = String(originalSource) let editedString = String(editedSource) - let originalTree = Parser.parse(source: originalString) + let (originalTree, lookaheadRanges) = Parser.parseIncrementally(source: originalString, parseTransition: nil) let reusedNodesCollector = IncrementalParseReusedNodeCollector() - let transition = IncrementalParseTransition(previousTree: originalTree, edits: concurrentEdits, reusedNodeDelegate: reusedNodesCollector) + let transition = IncrementalParseTransition( + previousTree: originalTree, + edits: concurrentEdits, + lookaheadRanges: lookaheadRanges, + reusedNodeDelegate: reusedNodesCollector + ) let newTree = Parser.parse(source: editedString) - let incrementallyParsedNewTree = Parser.parse(source: editedString, parseTransition: transition) + let (incrementallyParsedNewTree, _) = Parser.parseIncrementally(source: editedString, parseTransition: transition) // Round-trip assertStringsEqualWithDiff( @@ -61,11 +66,11 @@ public func assertIncrementalParse( } // Re-used nodes - if reusedNodesCollector.rangeAndNodes.count != expectedReusedNodes.count { + if reusedNodesCollector.nodes.count != expectedReusedNodes.count { XCTFail( """ - Expected \(expectedReusedNodes.count) re-used nodes but received \(reusedNodesCollector.rangeAndNodes.count): - \(reusedNodesCollector.rangeAndNodes.map {$0.1.description}.joined(separator: "\n")) + Expected \(expectedReusedNodes.count) re-used nodes but received \(reusedNodesCollector.nodes.count): + \(reusedNodesCollector.nodes.map {$0.description}.joined(separator: "\n")) """, file: file, line: line @@ -79,11 +84,11 @@ public func assertIncrementalParse( continue } - guard let reusedNode = reusedNodesCollector.rangeAndNodes.first(where: { $0.0 == range })?.1 else { + guard let reusedNode = reusedNodesCollector.nodes.first(where: { $0.byteRangeAfterTrimmingTrivia == range }) else { XCTFail( """ Fail to match the range of \(expectedReusedNode.source) in: - \(reusedNodesCollector.rangeAndNodes.map({"\($0.0): \($0.1.description)"}).joined(separator: "\n")) + \(reusedNodesCollector.nodes.map({"\($0.byteRangeAfterTrimmingTrivia): \($0.description)"}).joined(separator: "\n")) """, file: expectedReusedNode.file, line: expectedReusedNode.line @@ -215,3 +220,10 @@ public func applyEdits( } return String(bytes: bytes, encoding: .utf8)! } + +fileprivate extension Syntax { + /// The byte source range of this node exluding leading and trailing trivia. + var byteRangeAfterTrimmingTrivia: ByteSourceRange { + return ByteSourceRange(offset: positionAfterSkippingLeadingTrivia.utf8Offset, length: byteSizeAfterTrimmingTrivia) + } +} diff --git a/Sources/swift-parser-cli/Commands/PerformanceTest.swift b/Sources/swift-parser-cli/Commands/PerformanceTest.swift index 5b3c0afa046..3463eda3a8c 100644 --- a/Sources/swift-parser-cli/Commands/PerformanceTest.swift +++ b/Sources/swift-parser-cli/Commands/PerformanceTest.swift @@ -14,6 +14,7 @@ import _InstructionCounter import ArgumentParser import Foundation import SwiftParser +import SwiftSyntax struct PerformanceTest: ParsableCommand { static var configuration = CommandConfiguration( @@ -22,6 +23,9 @@ struct PerformanceTest: ParsableCommand { "Parse all .swift files in '--directory' and its subdirectories '--iteration' times and output the average time (in milliseconds) one iteration took." ) + @Flag(name: .long, help: "Parse files incrementally") + var incrementalParse: Bool = false + @Option(help: "The directory in which all .swift files should be parsed") var directory: String @@ -37,12 +41,35 @@ struct PerformanceTest: ParsableCommand { .filter { $0.pathExtension == "swift" } .map { try Data(contentsOf: $0) } + var fileTransition: [Data: IncrementalParseTransition] = [:] + + if self.incrementalParse { + /// The initial parse for incremental parsing + for file in files { + file.withUnsafeBytes { buf in + let (tree, lookaheadRanges) = Parser.parseIncrementally( + source: buf.bindMemory(to: UInt8.self), + parseTransition: nil + ) + + fileTransition[file] = IncrementalParseTransition( + previousTree: tree, + edits: ConcurrentEdits(fromSequential: []), + lookaheadRanges: lookaheadRanges + ) + } + } + } + let start = Date() let startInstructions = getInstructionsExecuted() for _ in 0...allocate(capacity: 1) + defer { + lookaheadTracker.deallocate() + } + lookaheadTracker.initialize(to: LookaheadTracker()) source.withUTF8 { buf in var lexemes = [Lexer.Lexeme]() - for token in Lexer.tokenize(buf, from: 0) { + for token in Lexer.tokenize(buf, from: 0, lookaheadTracker: lookaheadTracker) { lexemes.append(token) if token.rawTokenKind == .eof { diff --git a/Tests/SwiftParserTest/IncrementalParsingTests.swift b/Tests/SwiftParserTest/IncrementalParsingTests.swift index 2299d8fd838..2c25ab5986d 100644 --- a/Tests/SwiftParserTest/IncrementalParsingTests.swift +++ b/Tests/SwiftParserTest/IncrementalParsingTests.swift @@ -25,8 +25,7 @@ public class IncrementalParsingTests: XCTestCase { ) } - public func testReusedNode() throws { - try XCTSkipIf(true, "Swift parser does not handle node reuse yet") + public func testReusedNode() { assertIncrementalParse( """ struct A⏩️⏸️A⏪️ {} @@ -143,7 +142,7 @@ public class IncrementalParsingTests: XCTestCase { public func testInsertTextIdentifier() { assertIncrementalParse( """ - self = ⏩️⏸️_ _⏪️foo(1)[object1, object2] + o bar(1) + self = ⏩️⏸️_ _⏪️foo(1)[object1, object2] + bar(1) """ ) } @@ -272,8 +271,7 @@ public class IncrementalParsingTests: XCTestCase { ) } - public func testNextTokenCalculation() throws { - try XCTSkipIf(true, "Swift parser does not handle node reuse yet") + public func testNextTokenCalculation() { assertIncrementalParse( """ let a = "hello" @@ -285,8 +283,7 @@ public class IncrementalParsingTests: XCTestCase { ) } - public func testReplace() throws { - try XCTSkipIf(true, "Swift parser does not handle node reuse yet") + public func testReplace() { assertIncrementalParse( """ func foo() { @@ -428,4 +425,44 @@ public class IncrementalParsingTests: XCTestCase { """ ) } + + public func testTrailingClosure() { + assertIncrementalParse( + """ + foo() {} + trailingClosure: ⏩️switch x { + default: break + }⏸️{}⏪️ + """ + ) + } + + public func testMultiFunctionCall() { + assertIncrementalParse( + """ + foo() {} + foo1() {} + foo2() {} + ⏩️⏸️foo3() {}⏪️ + """, + reusedNodes: [ + ReusedNodeSpec("foo() {}", kind: .codeBlockItem), + ReusedNodeSpec("foo1() {}", kind: .codeBlockItem), + ] + ) + } + + public func testDeclFollowedByLabeledStmt() { + assertIncrementalParse( + """ + class foo {} + trailingClosure: ⏩️switch x { + default: break + }⏸️{}⏪️ + """, + reusedNodes: [ + ReusedNodeSpec("class foo {}", kind: .codeBlockItem) + ] + ) + } } diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift index 1566fe67658..bfb28f434a0 100644 --- a/Tests/SwiftParserTest/LexerTests.swift +++ b/Tests/SwiftParserTest/LexerTests.swift @@ -15,9 +15,14 @@ import XCTest @_spi(RawSyntax) @_spi(Testing) import SwiftParser fileprivate func lex(_ sourceBytes: [UInt8], body: ([Lexer.Lexeme]) throws -> Void) rethrows { + let lookaheadTracker = UnsafeMutablePointer.allocate(capacity: 1) + defer { + lookaheadTracker.deallocate() + } + lookaheadTracker.initialize(to: LookaheadTracker()) try sourceBytes.withUnsafeBufferPointer { (buf) in var lexemes = [Lexer.Lexeme]() - for token in Lexer.tokenize(buf, from: 0) { + for token in Lexer.tokenize(buf, from: 0, lookaheadTracker: lookaheadTracker) { lexemes.append(token) if token.rawTokenKind == .eof {