Skip to content

Commit d76a5d2

Browse files
committed
feat: add config option to control fanout size
Adds a `shardFanoutBytes` option to the importer to allow configuring the number of bytes used for the HAMT prefix, also a test.
1 parent d269d97 commit d76a5d2

File tree

6 files changed

+57
-9
lines changed

6 files changed

+57
-9
lines changed

packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de
3333
throw errCode(err, 'ERR_NOT_UNIXFS')
3434
}
3535

36-
if (!dir.fanout) {
36+
if (dir.fanout == null) {
3737
throw errCode(new Error('missing fanout'), 'ERR_NOT_UNIXFS')
3838
}
3939

packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import * as dagPb from '@ipld/dag-pb'
44
import { expect } from 'aegir/chai'
55
import { MemoryBlockstore } from 'blockstore-core'
66
import { UnixFS } from 'ipfs-unixfs'
7-
import { importer } from 'ipfs-unixfs-importer'
7+
import { importer, type ImportCandidate } from 'ipfs-unixfs-importer'
88
import all from 'it-all'
99
import randomBytes from 'it-buffer-stream'
1010
import last from 'it-last'
@@ -255,4 +255,40 @@ describe('exporter sharded', function () {
255255

256256
expect(exported.name).to.deep.equal('file-1')
257257
})
258+
259+
it('exports a shard with a different fanout size', async () => {
260+
const files: ImportCandidate[] = [{
261+
path: '/baz.txt',
262+
content: Uint8Array.from([0, 1, 2, 3, 4])
263+
}, {
264+
path: '/foo.txt',
265+
content: Uint8Array.from([0, 1, 2, 3, 4])
266+
}, {
267+
path: '/bar.txt',
268+
content: Uint8Array.from([0, 1, 2, 3, 4])
269+
}]
270+
271+
const result = await last(importer(files, block, {
272+
shardSplitThresholdBytes: 0,
273+
shardFanoutBytes: 4,
274+
wrapWithDirectory: true
275+
}))
276+
277+
if (result == null) {
278+
throw new Error('Import failed')
279+
}
280+
281+
const { cid } = result
282+
const dir = await exporter(cid, block)
283+
284+
expect(dir).to.have.nested.property('unixfs.fanout', 16n)
285+
286+
const contents = await all(dir.content())
287+
288+
expect(contents.map(entry => ({
289+
path: `/${entry.name}`,
290+
content: entry.node
291+
})))
292+
.to.deep.equal(files)
293+
})
258294
})

packages/ipfs-unixfs-importer/src/dir-sharded.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,19 @@ async function hamtHashFn (buf: Uint8Array): Promise<Uint8Array> {
1919

2020
const HAMT_HASH_CODE = BigInt(0x22)
2121

22+
export interface DirShardedOptions extends PersistOptions {
23+
shardFanoutBytes: number
24+
}
25+
2226
class DirSharded extends Dir {
2327
private readonly _bucket: Bucket<InProgressImportResult | Dir>
2428

25-
constructor (props: DirProps, options: PersistOptions) {
29+
constructor (props: DirProps, options: DirShardedOptions) {
2630
super(props, options)
2731

2832
this._bucket = createHAMT({
2933
hashFn: hamtHashFn,
30-
bits: 8
34+
bits: options.shardFanoutBytes ?? 8
3135
})
3236
}
3337

@@ -88,6 +92,7 @@ export default DirSharded
8892

8993
async function * flush (bucket: Bucket<Dir | InProgressImportResult>, blockstore: Blockstore, shardRoot: DirSharded | null, options: PersistOptions): AsyncIterable<ImportResult> {
9094
const children = bucket._children
95+
const padLength = (bucket.tableSize() - 1).toString(16).length
9196
const links: PBLink[] = []
9297
let childrenSize = 0n
9398

@@ -98,7 +103,7 @@ async function * flush (bucket: Bucket<Dir | InProgressImportResult>, blockstore
98103
continue
99104
}
100105

101-
const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0')
106+
const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0')
102107

103108
if (child instanceof Bucket) {
104109
let shard

packages/ipfs-unixfs-importer/src/flat-to-shard.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import { DirFlat } from './dir-flat.js'
2-
import DirSharded from './dir-sharded.js'
2+
import DirSharded, { type DirShardedOptions } from './dir-sharded.js'
33
import type { Dir } from './dir.js'
4-
import type { PersistOptions } from './utils/persist.js'
54

6-
export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: PersistOptions): Promise<DirSharded> {
5+
export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: DirShardedOptions): Promise<DirSharded> {
76
let newDir = dir as DirSharded
87

98
if (dir instanceof DirFlat && dir.estimateNodeSize() > threshold) {
@@ -31,7 +30,7 @@ export async function flatToShard (child: Dir | null, dir: Dir, threshold: numbe
3130
return newDir
3231
}
3332

34-
async function convertToShard (oldDir: DirFlat, options: PersistOptions): Promise<DirSharded> {
33+
async function convertToShard (oldDir: DirFlat, options: DirShardedOptions): Promise<DirSharded> {
3534
const newDir = new DirSharded({
3635
root: oldDir.root,
3736
dir: true,

packages/ipfs-unixfs-importer/src/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ export interface ImporterOptions extends ProgressOptions<ImporterProgressEvents>
123123
*/
124124
shardSplitThresholdBytes?: number
125125

126+
/**
127+
* The maximum number of bytes used as a HAMT prefix for shard entries. Default: 256
128+
*/
129+
shardFanoutBytes?: number
130+
126131
/**
127132
* How many files to import concurrently. For large numbers of small files this
128133
* should be high (e.g. 50). Default: 10
@@ -241,6 +246,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri
241246

242247
const wrapWithDirectory = options.wrapWithDirectory ?? false
243248
const shardSplitThresholdBytes = options.shardSplitThresholdBytes ?? 262144
249+
const shardFanoutBytes = options.shardFanoutBytes ?? 8
244250
const cidVersion = options.cidVersion ?? 1
245251
const rawLeaves = options.rawLeaves ?? true
246252
const leafType = options.leafType ?? 'file'
@@ -269,6 +275,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri
269275
const buildTree: TreeBuilder = options.treeBuilder ?? defaultTreeBuilder({
270276
wrapWithDirectory,
271277
shardSplitThresholdBytes,
278+
shardFanoutBytes,
272279
cidVersion,
273280
onProgress: options.onProgress
274281
})

packages/ipfs-unixfs-importer/src/tree-builder.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type { PersistOptions } from './utils/persist.js'
77

88
export interface AddToTreeOptions extends PersistOptions {
99
shardSplitThresholdBytes: number
10+
shardFanoutBytes: number
1011
}
1112

1213
async function addToTree (elem: InProgressImportResult, tree: Dir, options: AddToTreeOptions): Promise<Dir> {

0 commit comments

Comments
 (0)