diff --git a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts index fb35d04a..9e59d7c9 100644 --- a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts +++ b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts @@ -33,7 +33,7 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de throw errCode(err, 'ERR_NOT_UNIXFS') } - if (!dir.fanout) { + if (dir.fanout == null) { throw errCode(new Error('missing fanout'), 'ERR_NOT_UNIXFS') } diff --git a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts index 7d1b1be6..95a8c50e 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts +++ b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts @@ -4,7 +4,7 @@ import * as dagPb from '@ipld/dag-pb' import { expect } from 'aegir/chai' import { MemoryBlockstore } from 'blockstore-core' import { UnixFS } from 'ipfs-unixfs' -import { importer } from 'ipfs-unixfs-importer' +import { importer, type ImportCandidate } from 'ipfs-unixfs-importer' import all from 'it-all' import randomBytes from 'it-buffer-stream' import last from 'it-last' @@ -255,4 +255,40 @@ describe('exporter sharded', function () { expect(exported.name).to.deep.equal('file-1') }) + + it('exports a shard with a different fanout size', async () => { + const files: ImportCandidate[] = [{ + path: '/baz.txt', + content: Uint8Array.from([0, 1, 2, 3, 4]) + }, { + path: '/foo.txt', + content: Uint8Array.from([0, 1, 2, 3, 4]) + }, { + path: '/bar.txt', + content: Uint8Array.from([0, 1, 2, 3, 4]) + }] + + const result = await last(importer(files, block, { + shardSplitThresholdBytes: 0, + shardFanoutBits: 4, // 2**4 = 16 children max + wrapWithDirectory: true + })) + + if (result == null) { + throw new Error('Import failed') + } + + const { cid } = result + const dir = await exporter(cid, block) + + expect(dir).to.have.nested.property('unixfs.fanout', 16n) + + const contents = await all(dir.content()) + + expect(contents.map(entry => ({ + path: `/${entry.name}`, + content: entry.node + }))) + .to.deep.equal(files) + }) }) diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.ts b/packages/ipfs-unixfs-importer/src/dir-sharded.ts index c30fbfbb..7bee99b0 100644 --- a/packages/ipfs-unixfs-importer/src/dir-sharded.ts +++ b/packages/ipfs-unixfs-importer/src/dir-sharded.ts @@ -18,16 +18,21 @@ async function hamtHashFn (buf: Uint8Array): Promise { } const HAMT_HASH_CODE = BigInt(0x22) +const DEFAULT_FANOUT_BITS = 8 + +export interface DirShardedOptions extends PersistOptions { + shardFanoutBits: number +} class DirSharded extends Dir { private readonly _bucket: Bucket - constructor (props: DirProps, options: PersistOptions) { + constructor (props: DirProps, options: DirShardedOptions) { super(props, options) this._bucket = createHAMT({ hashFn: hamtHashFn, - bits: 8 + bits: options.shardFanoutBits ?? DEFAULT_FANOUT_BITS }) } @@ -88,6 +93,7 @@ export default DirSharded async function * flush (bucket: Bucket, blockstore: Blockstore, shardRoot: DirSharded | null, options: PersistOptions): AsyncIterable { const children = bucket._children + const padLength = (bucket.tableSize() - 1).toString(16).length const links: PBLink[] = [] let childrenSize = 0n @@ -98,7 +104,7 @@ async function * flush (bucket: Bucket, blockstore continue } - const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0') + const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0') if (child instanceof Bucket) { let shard @@ -191,6 +197,7 @@ function isDir (obj: any): obj is Dir { function calculateSize (bucket: Bucket, shardRoot: DirSharded | null, options: PersistOptions): number { const children = bucket._children + const padLength = (bucket.tableSize() - 1).toString(16).length const links: PBLink[] = [] for (let i = 0; i < children.length; i++) { @@ -200,7 +207,7 @@ function calculateSize (bucket: Bucket, shardRoot: DirSharded | null, optio continue } - const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0') + const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0') if (child instanceof Bucket) { const size = calculateSize(child, null, options) diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.ts b/packages/ipfs-unixfs-importer/src/flat-to-shard.ts index a5b4d419..f7e58959 100644 --- a/packages/ipfs-unixfs-importer/src/flat-to-shard.ts +++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.ts @@ -1,9 +1,8 @@ import { DirFlat } from './dir-flat.js' -import DirSharded from './dir-sharded.js' +import DirSharded, { type DirShardedOptions } from './dir-sharded.js' import type { Dir } from './dir.js' -import type { PersistOptions } from './utils/persist.js' -export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: PersistOptions): Promise { +export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: DirShardedOptions): Promise { let newDir = dir as DirSharded if (dir instanceof DirFlat && dir.estimateNodeSize() > threshold) { @@ -31,7 +30,7 @@ export async function flatToShard (child: Dir | null, dir: Dir, threshold: numbe return newDir } -async function convertToShard (oldDir: DirFlat, options: PersistOptions): Promise { +async function convertToShard (oldDir: DirFlat, options: DirShardedOptions): Promise { const newDir = new DirSharded({ root: oldDir.root, dir: true, diff --git a/packages/ipfs-unixfs-importer/src/index.ts b/packages/ipfs-unixfs-importer/src/index.ts index 88270154..d0dc4a7a 100644 --- a/packages/ipfs-unixfs-importer/src/index.ts +++ b/packages/ipfs-unixfs-importer/src/index.ts @@ -123,6 +123,13 @@ export interface ImporterOptions extends ProgressOptions */ shardSplitThresholdBytes?: number + /** + * The number of bits of a hash digest used at each level of sharding to + * the child index. 2**shardFanoutBits will dictate the maximum number of + * children for any shard in the HAMT. Default: 8 + */ + shardFanoutBits?: number + /** * How many files to import concurrently. For large numbers of small files this * should be high (e.g. 50). Default: 10 @@ -241,6 +248,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri const wrapWithDirectory = options.wrapWithDirectory ?? false const shardSplitThresholdBytes = options.shardSplitThresholdBytes ?? 262144 + const shardFanoutBits = options.shardFanoutBits ?? 8 const cidVersion = options.cidVersion ?? 1 const rawLeaves = options.rawLeaves ?? true const leafType = options.leafType ?? 'file' @@ -269,6 +277,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri const buildTree: TreeBuilder = options.treeBuilder ?? defaultTreeBuilder({ wrapWithDirectory, shardSplitThresholdBytes, + shardFanoutBits, cidVersion, onProgress: options.onProgress }) diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.ts b/packages/ipfs-unixfs-importer/src/tree-builder.ts index 1c2e457d..f84d6fb5 100644 --- a/packages/ipfs-unixfs-importer/src/tree-builder.ts +++ b/packages/ipfs-unixfs-importer/src/tree-builder.ts @@ -7,6 +7,7 @@ import type { PersistOptions } from './utils/persist.js' export interface AddToTreeOptions extends PersistOptions { shardSplitThresholdBytes: number + shardFanoutBits: number } async function addToTree (elem: InProgressImportResult, tree: Dir, options: AddToTreeOptions): Promise {