Skip to content

Commit 16cfac8

Browse files
committed
[SyntaxData] Alternative allocation strategy
* Lazily allocate layout buffers * Tail allocate a storage for the layout buffer base address
1 parent 12dea90 commit 16cfac8

File tree

1 file changed

+40
-41
lines changed

1 file changed

+40
-41
lines changed

Sources/SwiftSyntax/Syntax.swift

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -389,45 +389,39 @@ final class SyntaxDataArena: @unchecked Sendable {
389389
return SyntaxDataReferenceBuffer()
390390
}
391391

392-
// The storage to the pointer to the buffer is allocated next to the SyntaxData.
393-
let baseAddress = parent.advanced(by: 1)
392+
// The storage of the buffer address is allocated next to the SyntaxData.
393+
let baseAddressRef = parent.advanced(by: 1)
394394
.unsafeRawPointer
395-
.assumingMemoryBound(to: SyntaxDataReference?.self)
396-
let buffer = UnsafeBufferPointer(start: baseAddress, count: childCount)
397-
398-
// The _last_ element is initially filled with `~0` indicating not populated.
399-
@inline(__always) func isPopulated() -> Bool {
400-
baseAddress
401-
.advanced(by: childCount - 1)
402-
.withMemoryRebound(to: UInt.self, capacity: 1) { pointer in
403-
pointer.pointee != ~0
404-
}
405-
}
395+
.assumingMemoryBound(to: UnsafePointer<SyntaxDataReference?>?.self)
406396

407397
// If the buffer is already populated, return it.
408-
if isPopulated() {
409-
return SyntaxDataReferenceBuffer(buffer)
398+
if let baseAddress = baseAddressRef.pointee {
399+
return SyntaxDataReferenceBuffer(UnsafeBufferPointer(start: baseAddress, count: childCount))
410400
}
411401

412402
mutex.lock()
413403
defer { mutex.unlock() }
414404

415-
// Recheck before populating, maybe some other thread has populated the buffer
416-
// during acquiring the lock.
417-
if !isPopulated() {
418-
populateDataLayoutImpl(parent)
405+
// Recheck, maybe some other thread has populated the buffer during acquiring the lock.
406+
if let baseAddress = baseAddressRef.pointee {
407+
return SyntaxDataReferenceBuffer(UnsafeBufferPointer(start: baseAddress, count: childCount))
419408
}
420409

410+
let buffer = createLayoutDataImpl(parent)
411+
// Remeber the base address of the created buffer.
412+
UnsafeMutablePointer(mutating: baseAddressRef).pointee = buffer.baseAddress
413+
421414
return SyntaxDataReferenceBuffer(buffer)
422415
}
423416

424-
/// Fill the layout buffer of the node.
425-
private func populateDataLayoutImpl(_ parent: SyntaxDataReference) {
426-
let baseAddress = parent.advanced(by: 1)
427-
.unsafeRawPointer
428-
.assumingMemoryBound(to: SyntaxDataReference?.self)
417+
/// Create the layout buffer of the node.
418+
private func createLayoutDataImpl(_ parent: SyntaxDataReference) -> UnsafeBufferPointer<SyntaxDataReference?> {
419+
let allocated = self.allocator.allocate(
420+
SyntaxDataReference?.self,
421+
count: Int(truncatingIfNeeded: parent.pointee.childCount)
422+
)
429423

430-
var ptr = UnsafeMutablePointer(mutating: baseAddress)
424+
var ptr = allocated.baseAddress!
431425
var absoluteInfo = parent.pointee.absoluteInfo.advancedToFirstChild()
432426
for raw in parent.pointee.raw.layoutView!.children {
433427
let dataRef = raw.map {
@@ -437,25 +431,29 @@ final class SyntaxDataArena: @unchecked Sendable {
437431
absoluteInfo = absoluteInfo.advancedBySibling(raw)
438432
ptr += 1
439433
}
434+
return UnsafeBufferPointer(allocated)
440435
}
441436

442437
/// Calculate the recommended slab size of `BumpPtrAllocator`.
443438
///
444-
/// Estimate the total allocation size assuming the client visits every nodes.
445-
/// Return the estimated size, or 4096 if it's larger than 4096.
439+
/// Estimate the total allocation size assuming the client visits every node in
440+
/// the tree. Return the estimated size, or 4096 if it's larger than 4096.
446441
///
447-
/// Each node consumes `SyntaxData` size at least. In addition to that, each syntax collection
448-
/// element consumes `SyntaxDataReference` in the parent's layout. For non-collection layout
449-
/// nodes, the layout is usually sparse, so we can't calculate the exact memory consumption
450-
/// until we see the syntax kind. But 4 slots per each node looks like an enough estimation.
442+
/// Each node consumes `SyntaxData` size at least. Non-empty layout node tail
443+
/// allocates a pointer storage for the base address of the layout buffer.
444+
///
445+
/// For layout buffers, each child element consumes a `SyntaxDataReference` in
446+
/// the parent's layout. But non-collection layout nodes, the layout is usually
447+
/// sparse, so we can't calculate the exact memory size until we see the RawSyntax.
448+
/// That being said, `SytnaxData` + 4 pointer size looks like an enough estimation.
451449
private static func slabSize(for raw: RawSyntax) -> Int {
452450
let dataSize = MemoryLayout<SyntaxData>.stride
453-
let slotSize = MemoryLayout<SyntaxDataReference?>.stride
451+
let pointerSize = MemoryLayout<UnsafeRawPointer>.stride
454452

455453
let nodeCount = raw.totalNodes
456454
var totalSize = dataSize
457-
if nodeCount > 1 {
458-
totalSize += (dataSize + slotSize * 4) * (nodeCount &- 1)
455+
if nodeCount != 0 {
456+
totalSize += (dataSize + pointerSize * 4) * (nodeCount &- 1)
459457
}
460458
// Power of 2 might look nicer, but 'BumpPtrAllocator' doesn't require that.
461459
return min(totalSize, 4096)
@@ -472,7 +470,11 @@ final class SyntaxDataArena: @unchecked Sendable {
472470

473471
// Allocate 'SyntaxData' + buffer for child data.
474472
// NOTE: If you change the memory layout, revisit 'slabSize(for:)' too.
475-
let totalSize = MemoryLayout<SyntaxData>.stride &+ MemoryLayout<SyntaxDataReference?>.stride * childCount
473+
var totalSize = MemoryLayout<SyntaxData>.stride
474+
if childCount != 0 {
475+
// Tail allocate the storage for the pointer to the lazily allocated layout data.
476+
totalSize &+= MemoryLayout<UnsafePointer<SyntaxDataReference?>?>.size
477+
}
476478
let alignment = MemoryLayout<SyntaxData>.alignment
477479
let allocated = allocator.allocate(byteCount: totalSize, alignment: alignment).baseAddress!
478480

@@ -488,14 +490,11 @@ final class SyntaxDataArena: @unchecked Sendable {
488490
)
489491

490492
if childCount != 0 {
491-
// Fill the _last_ element with '~0' to indicate it's not populated.
493+
// Initlaize the tail allocated storage with nil.
492494
allocated
493495
.advanced(by: MemoryLayout<SyntaxData>.stride)
494-
.bindMemory(to: SyntaxDataReference?.self, capacity: childCount)
495-
.advanced(by: childCount - 1)
496-
.withMemoryRebound(to: UInt.self, capacity: 1) {
497-
$0.initialize(to: ~0)
498-
}
496+
.bindMemory(to: UnsafePointer<SyntaxDataReference?>?.self, capacity: 1)
497+
.initialize(to: nil)
499498
}
500499

501500
return SyntaxDataReference(UnsafePointer(dataRef))

0 commit comments

Comments
 (0)