Skip to content

Commit 68cf832

Browse files
committed
[SyntaxData] Alternative allocation strategy
* Lazily allocate layout buffers * Tail allocate a storage for the layout buffer base address
1 parent 899b1d5 commit 68cf832

File tree

1 file changed

+41
-42
lines changed

1 file changed

+41
-42
lines changed

Sources/SwiftSyntax/Syntax.swift

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ struct SyntaxData: Sendable {
344344
/// This is a cached information, equals to `raw.layoutView?.children.count ?? 0`.
345345
let childCount: UInt32
346346

347-
// If the childCount > 0, the layout buffer (`SyntaxDataArenaReference? * childCount`) is tail allocated.
347+
// If the childCount > 0, a pointer to the layout buffer (`UnsafePointer<SyntaxDataReference?>?`) is tail allocated.
348348
}
349349

350350
/// `SyntaxDataArena` manages the entire data of a "red" tree.
@@ -387,45 +387,39 @@ final class SyntaxDataArena: @unchecked Sendable {
387387
return SyntaxDataReferenceBuffer()
388388
}
389389

390-
// The storage to the pointer to the buffer is allocated next to the SyntaxData.
391-
let baseAddress = parent.advanced(by: 1)
390+
// The storage of the buffer address is allocated next to the SyntaxData.
391+
let baseAddressRef = parent.advanced(by: 1)
392392
.unsafeRawPointer
393-
.assumingMemoryBound(to: SyntaxDataReference?.self)
394-
let buffer = UnsafeBufferPointer(start: baseAddress, count: childCount)
395-
396-
// The _last_ element is initially filled with `~0` indicating not populated.
397-
@inline(__always) func isPopulated() -> Bool {
398-
baseAddress
399-
.advanced(by: childCount - 1)
400-
.withMemoryRebound(to: UInt.self, capacity: 1) { pointer in
401-
pointer.pointee != ~0
402-
}
403-
}
393+
.assumingMemoryBound(to: UnsafePointer<SyntaxDataReference?>?.self)
404394

405395
// If the buffer is already populated, return it.
406-
if isPopulated() {
407-
return SyntaxDataReferenceBuffer(buffer)
396+
if let baseAddress = baseAddressRef.pointee {
397+
return SyntaxDataReferenceBuffer(UnsafeBufferPointer(start: baseAddress, count: childCount))
408398
}
409399

410400
mutex.lock()
411401
defer { mutex.unlock() }
412402

413-
// Recheck before populating, maybe some other thread has populated the buffer
414-
// during acquiring the lock.
415-
if !isPopulated() {
416-
populateDataLayoutImpl(parent)
403+
// Recheck, maybe some other thread has populated the buffer during acquiring the lock.
404+
if let baseAddress = baseAddressRef.pointee {
405+
return SyntaxDataReferenceBuffer(UnsafeBufferPointer(start: baseAddress, count: childCount))
417406
}
418407

408+
let buffer = createLayoutDataImpl(parent)
409+
// Remeber the base address of the created buffer.
410+
UnsafeMutablePointer(mutating: baseAddressRef).pointee = buffer.baseAddress
411+
419412
return SyntaxDataReferenceBuffer(buffer)
420413
}
421414

422-
/// Fill the layout buffer of the node.
423-
private func populateDataLayoutImpl(_ parent: SyntaxDataReference) {
424-
let baseAddress = parent.advanced(by: 1)
425-
.unsafeRawPointer
426-
.assumingMemoryBound(to: SyntaxDataReference?.self)
415+
/// Create the layout buffer of the node.
416+
private func createLayoutDataImpl(_ parent: SyntaxDataReference) -> UnsafeBufferPointer<SyntaxDataReference?> {
417+
let allocated = self.allocator.allocate(
418+
SyntaxDataReference?.self,
419+
count: Int(truncatingIfNeeded: parent.pointee.childCount)
420+
)
427421

428-
var ptr = UnsafeMutablePointer(mutating: baseAddress)
422+
var ptr = allocated.baseAddress!
429423
var absoluteInfo = parent.pointee.absoluteInfo.advancedToFirstChild()
430424
for raw in parent.pointee.raw.layoutView!.children {
431425
let dataRef = raw.map {
@@ -435,25 +429,29 @@ final class SyntaxDataArena: @unchecked Sendable {
435429
absoluteInfo = absoluteInfo.advancedBySibling(raw)
436430
ptr += 1
437431
}
432+
return UnsafeBufferPointer(allocated)
438433
}
439434

440435
/// Calculate the recommended slab size of `BumpPtrAllocator`.
441436
///
442-
/// Estimate the total allocation size assuming the client visits every nodes.
443-
/// Return the estimated size, or 4096 if it's larger than 4096.
437+
/// Estimate the total allocation size assuming the client visits every node in
438+
/// the tree. Return the estimated size, or 4096 if it's larger than 4096.
444439
///
445-
/// Each node consumes `SyntaxData` size at least. In addition to that, each syntax collection
446-
/// element consumes `SyntaxDataReference` in the parent's layout. For non-collection layout
447-
/// nodes, the layout is usually sparse, so we can't calculate the exact memory consumption
448-
/// until we see the syntax kind. But 4 slots per each node looks like an enough estimation.
440+
/// Each node consumes `SyntaxData` size at least. Non-empty layout node tail
441+
/// allocates a pointer storage for the base address of the layout buffer.
442+
///
443+
/// For layout buffers, each child element consumes a `SyntaxDataReference` in
444+
/// the parent's layout. But non-collection layout nodes, the layout is usually
445+
/// sparse, so we can't calculate the exact memory size until we see the RawSyntax.
446+
/// That being said, `SytnaxData` + 4 pointer size looks like an enough estimation.
449447
private static func slabSize(for raw: RawSyntax) -> Int {
450448
let dataSize = MemoryLayout<SyntaxData>.stride
451-
let slotSize = MemoryLayout<SyntaxDataReference?>.stride
449+
let pointerSize = MemoryLayout<UnsafeRawPointer>.stride
452450

453451
let nodeCount = raw.totalNodes
454452
var totalSize = dataSize
455-
if nodeCount > 1 {
456-
totalSize += (dataSize + slotSize * 4) * (nodeCount &- 1)
453+
if nodeCount != 0 {
454+
totalSize += (dataSize + pointerSize * 4) * (nodeCount &- 1)
457455
}
458456
// Power of 2 might look nicer, but 'BumpPtrAllocator' doesn't require that.
459457
return min(totalSize, 4096)
@@ -470,7 +468,11 @@ final class SyntaxDataArena: @unchecked Sendable {
470468

471469
// Allocate 'SyntaxData' + buffer for child data.
472470
// NOTE: If you change the memory layout, revisit 'slabSize(for:)' too.
473-
let totalSize = MemoryLayout<SyntaxData>.stride &+ MemoryLayout<SyntaxDataReference?>.stride * childCount
471+
var totalSize = MemoryLayout<SyntaxData>.stride
472+
if childCount != 0 {
473+
// Tail allocate the storage for the pointer to the lazily allocated layout data.
474+
totalSize &+= MemoryLayout<UnsafePointer<SyntaxDataReference?>?>.size
475+
}
474476
let alignment = MemoryLayout<SyntaxData>.alignment
475477
let allocated = allocator.allocate(byteCount: totalSize, alignment: alignment).baseAddress!
476478

@@ -486,14 +488,11 @@ final class SyntaxDataArena: @unchecked Sendable {
486488
)
487489

488490
if childCount != 0 {
489-
// Fill the _last_ element with '~0' to indicate it's not populated.
491+
// Initlaize the tail allocated storage with nil.
490492
allocated
491493
.advanced(by: MemoryLayout<SyntaxData>.stride)
492-
.bindMemory(to: SyntaxDataReference?.self, capacity: childCount)
493-
.advanced(by: childCount - 1)
494-
.withMemoryRebound(to: UInt.self, capacity: 1) {
495-
$0.initialize(to: ~0)
496-
}
494+
.bindMemory(to: UnsafePointer<SyntaxDataReference?>?.self, capacity: 1)
495+
.initialize(to: nil)
497496
}
498497

499498
return SyntaxDataReference(UnsafePointer(dataRef))

0 commit comments

Comments
 (0)