Skip to content

Commit 44689a8

Browse files
committed
[SyntaxData] Alternative allocation strategy
* Lazily allocate layout buffers * Tail allocate a storage for the layout buffer base address
1 parent aeab8c6 commit 44689a8

File tree

1 file changed

+42
-43
lines changed

1 file changed

+42
-43
lines changed

Sources/SwiftSyntax/Syntax.swift

Lines changed: 42 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ struct SyntaxData: Sendable {
345345
/// This 4 bytes fits nicely after the 12 bytes `absoluteInfo`.
346346
let childCount: UInt32
347347

348-
// If the childCount > 0, the layout buffer (`SyntaxDataArenaReference? * childCount`) is tail allocated.
348+
// If the childCount > 0, a pointer to the layout buffer (`UnsafePointer<SyntaxDataReference?>?`) is tail allocated.
349349
}
350350

351351
/// `SyntaxDataArena` manages the entire data of a "red" tree.
@@ -388,45 +388,39 @@ final class SyntaxDataArena: @unchecked Sendable {
388388
return SyntaxDataReferenceBuffer()
389389
}
390390

391-
// The storage to the pointer to the buffer is allocated next to the SyntaxData.
392-
let baseAddress = parent.advanced(by: 1)
391+
// The storage of the buffer address is allocated next to the SyntaxData.
392+
let baseAddressRef = parent.advanced(by: 1)
393393
.unsafeRawPointer
394-
.assumingMemoryBound(to: SyntaxDataReference?.self)
395-
let buffer = UnsafeBufferPointer(start: baseAddress, count: childCount)
396-
397-
// The _last_ element is initially filled with `~0` indicating not populated.
398-
@inline(__always) func isPopulated() -> Bool {
399-
baseAddress
400-
.advanced(by: childCount - 1)
401-
.withMemoryRebound(to: UInt.self, capacity: 1) { pointer in
402-
pointer.pointee != ~0
403-
}
404-
}
394+
.assumingMemoryBound(to: UnsafePointer<SyntaxDataReference?>?.self)
405395

406396
// If the buffer is already populated, return it.
407-
if isPopulated() {
408-
return SyntaxDataReferenceBuffer(buffer)
397+
if let baseAddress = baseAddressRef.pointee {
398+
return SyntaxDataReferenceBuffer(UnsafeBufferPointer(start: baseAddress, count: childCount))
409399
}
410400

411401
mutex.lock()
412402
defer { mutex.unlock() }
413403

414-
// Recheck before populating, maybe some other thread has populated the buffer
415-
// during acquiring the lock.
416-
if !isPopulated() {
417-
populateDataLayoutImpl(parent)
404+
// Recheck, maybe some other thread has populated the buffer during acquiring the lock.
405+
if let baseAddress = baseAddressRef.pointee {
406+
return SyntaxDataReferenceBuffer(UnsafeBufferPointer(start: baseAddress, count: childCount))
418407
}
419408

409+
let buffer = createLayoutDataImpl(parent)
410+
// Remeber the base address of the created buffer.
411+
UnsafeMutablePointer(mutating: baseAddressRef).pointee = buffer.baseAddress
412+
420413
return SyntaxDataReferenceBuffer(buffer)
421414
}
422415

423-
/// Fill the layout buffer of the node.
424-
private func populateDataLayoutImpl(_ parent: SyntaxDataReference) {
425-
let baseAddress = parent.advanced(by: 1)
426-
.unsafeRawPointer
427-
.assumingMemoryBound(to: SyntaxDataReference?.self)
416+
/// Create the layout buffer of the node.
417+
private func createLayoutDataImpl(_ parent: SyntaxDataReference) -> UnsafeBufferPointer<SyntaxDataReference?> {
418+
let allocated = self.allocator.allocate(
419+
SyntaxDataReference?.self,
420+
count: Int(truncatingIfNeeded: parent.pointee.childCount)
421+
)
428422

429-
var ptr = UnsafeMutablePointer(mutating: baseAddress)
423+
var ptr = allocated.baseAddress!
430424
var absoluteInfo = parent.pointee.absoluteInfo.advancedToFirstChild()
431425
for raw in parent.pointee.raw.layoutView!.children {
432426
let dataRef = raw.map {
@@ -436,25 +430,29 @@ final class SyntaxDataArena: @unchecked Sendable {
436430
absoluteInfo = absoluteInfo.advancedBySibling(raw)
437431
ptr += 1
438432
}
433+
return UnsafeBufferPointer(allocated)
439434
}
440435

441436
/// Calculate the recommended slab size of `BumpPtrAllocator`.
442437
///
443-
/// Estimate the total allocation size assuming the client visits every nodes.
444-
/// Return the estimated size, or 4096 if it's larger than 4096.
438+
/// Estimate the total allocation size assuming the client visits every node in
439+
/// the tree. Return the estimated size, or 4096 if it's larger than 4096.
445440
///
446-
/// Each node consumes `SyntaxData` size at least. In addition to that, each syntax collection
447-
/// element consumes `SyntaxDataReference` in the parent's layout. For non-collection layout
448-
/// nodes, the layout is usually sparse, so we can't calculate the exact memory consumption
449-
/// until we see the syntax kind. But 4 slots per each node looks like an enough estimation.
441+
/// Each node consumes `SyntaxData` size at least. Non-empty layout node tail
442+
/// allocates a pointer storage for the base address of the layout buffer.
443+
///
444+
/// For layout buffers, each child element consumes a `SyntaxDataReference` in
445+
/// the parent's layout. But non-collection layout nodes, the layout is usually
446+
/// sparse, so we can't calculate the exact memory size until we see the RawSyntax.
447+
/// That being said, `SytnaxData` + 4 pointer size looks like an enough estimation.
450448
private static func slabSize(for raw: RawSyntax) -> Int {
451449
let dataSize = MemoryLayout<SyntaxData>.stride
452-
let slotSize = MemoryLayout<SyntaxDataReference?>.stride
450+
let pointerSize = MemoryLayout<UnsafeRawPointer>.stride
453451

454452
let nodeCount = raw.totalNodes
455453
var totalSize = dataSize
456-
if nodeCount > 1 {
457-
totalSize += (dataSize + slotSize * 4) * (nodeCount &- 1)
454+
if nodeCount != 0 {
455+
totalSize += (dataSize + pointerSize * 4) * (nodeCount &- 1)
458456
}
459457
// Power of 2 might look nicer, but 'BumpPtrAllocator' doesn't require that.
460458
return min(totalSize, 4096)
@@ -469,9 +467,13 @@ final class SyntaxDataArena: @unchecked Sendable {
469467
) -> SyntaxDataReference {
470468
let childCount = raw.layoutView?.children.count ?? 0
471469

472-
// Allocate 'SyntaxData' + buffer for child data.
470+
// Allocate 'SyntaxData' + storage for the reference to the children data.
473471
// NOTE: If you change the memory layout, revisit 'slabSize(for:)' too.
474-
let totalSize = MemoryLayout<SyntaxData>.stride &+ MemoryLayout<SyntaxDataReference?>.stride * childCount
472+
var totalSize = MemoryLayout<SyntaxData>.stride
473+
if childCount != 0 {
474+
// Tail allocate the storage for the pointer to the lazily allocated layout data.
475+
totalSize &+= MemoryLayout<UnsafePointer<SyntaxDataReference?>?>.size
476+
}
475477
let alignment = MemoryLayout<SyntaxData>.alignment
476478
let allocated = allocator.allocate(byteCount: totalSize, alignment: alignment).baseAddress!
477479

@@ -487,14 +489,11 @@ final class SyntaxDataArena: @unchecked Sendable {
487489
)
488490

489491
if childCount != 0 {
490-
// Fill the _last_ element with '~0' to indicate it's not populated.
492+
// Initlaize the tail allocated storage with 'nil'.
491493
allocated
492494
.advanced(by: MemoryLayout<SyntaxData>.stride)
493-
.bindMemory(to: SyntaxDataReference?.self, capacity: childCount)
494-
.advanced(by: childCount - 1)
495-
.withMemoryRebound(to: UInt.self, capacity: 1) {
496-
$0.initialize(to: ~0)
497-
}
495+
.bindMemory(to: UnsafePointer<SyntaxDataReference?>?.self, capacity: 1)
496+
.initialize(to: nil)
498497
}
499498

500499
return SyntaxDataReference(UnsafePointer(dataRef))

0 commit comments

Comments
 (0)