From d76a5d27426d99c50020c3c383a6269a2597d014 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 24 Aug 2023 18:36:06 +0100 Subject: [PATCH 1/2] feat: add config option to control fanout size Adds a `shardFanoutBytes` option to the importer to allow configuring the number of bytes used for the HAMT prefix, also a test. --- .../content/hamt-sharded-directory.ts | 2 +- .../test/exporter-sharded.spec.ts | 38 ++++++++++++++++++- .../ipfs-unixfs-importer/src/dir-sharded.ts | 11 ++++-- .../ipfs-unixfs-importer/src/flat-to-shard.ts | 7 ++-- packages/ipfs-unixfs-importer/src/index.ts | 7 ++++ .../ipfs-unixfs-importer/src/tree-builder.ts | 1 + 6 files changed, 57 insertions(+), 9 deletions(-) diff --git a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts index fb35d04a..9e59d7c9 100644 --- a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts +++ b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts @@ -33,7 +33,7 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de throw errCode(err, 'ERR_NOT_UNIXFS') } - if (!dir.fanout) { + if (dir.fanout == null) { throw errCode(new Error('missing fanout'), 'ERR_NOT_UNIXFS') } diff --git a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts index 7d1b1be6..9da7a83d 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts +++ b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts @@ -4,7 +4,7 @@ import * as dagPb from '@ipld/dag-pb' import { expect } from 'aegir/chai' import { MemoryBlockstore } from 'blockstore-core' import { UnixFS } from 'ipfs-unixfs' -import { importer } from 'ipfs-unixfs-importer' +import { importer, type ImportCandidate } from 'ipfs-unixfs-importer' import all from 'it-all' import randomBytes from 'it-buffer-stream' import last from 'it-last' @@ -255,4 +255,40 @@ describe('exporter sharded', function () { expect(exported.name).to.deep.equal('file-1') }) + + it('exports a shard with a different fanout size', async () => { + const files: ImportCandidate[] = [{ + path: '/baz.txt', + content: Uint8Array.from([0, 1, 2, 3, 4]) + }, { + path: '/foo.txt', + content: Uint8Array.from([0, 1, 2, 3, 4]) + }, { + path: '/bar.txt', + content: Uint8Array.from([0, 1, 2, 3, 4]) + }] + + const result = await last(importer(files, block, { + shardSplitThresholdBytes: 0, + shardFanoutBytes: 4, + wrapWithDirectory: true + })) + + if (result == null) { + throw new Error('Import failed') + } + + const { cid } = result + const dir = await exporter(cid, block) + + expect(dir).to.have.nested.property('unixfs.fanout', 16n) + + const contents = await all(dir.content()) + + expect(contents.map(entry => ({ + path: `/${entry.name}`, + content: entry.node + }))) + .to.deep.equal(files) + }) }) diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.ts b/packages/ipfs-unixfs-importer/src/dir-sharded.ts index c30fbfbb..7b0914dd 100644 --- a/packages/ipfs-unixfs-importer/src/dir-sharded.ts +++ b/packages/ipfs-unixfs-importer/src/dir-sharded.ts @@ -19,15 +19,19 @@ async function hamtHashFn (buf: Uint8Array): Promise { const HAMT_HASH_CODE = BigInt(0x22) +export interface DirShardedOptions extends PersistOptions { + shardFanoutBytes: number +} + class DirSharded extends Dir { private readonly _bucket: Bucket - constructor (props: DirProps, options: PersistOptions) { + constructor (props: DirProps, options: DirShardedOptions) { super(props, options) this._bucket = createHAMT({ hashFn: hamtHashFn, - bits: 8 + bits: options.shardFanoutBytes ?? 8 }) } @@ -88,6 +92,7 @@ export default DirSharded async function * flush (bucket: Bucket, blockstore: Blockstore, shardRoot: DirSharded | null, options: PersistOptions): AsyncIterable { const children = bucket._children + const padLength = (bucket.tableSize() - 1).toString(16).length const links: PBLink[] = [] let childrenSize = 0n @@ -98,7 +103,7 @@ async function * flush (bucket: Bucket, blockstore continue } - const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0') + const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0') if (child instanceof Bucket) { let shard diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.ts b/packages/ipfs-unixfs-importer/src/flat-to-shard.ts index a5b4d419..f7e58959 100644 --- a/packages/ipfs-unixfs-importer/src/flat-to-shard.ts +++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.ts @@ -1,9 +1,8 @@ import { DirFlat } from './dir-flat.js' -import DirSharded from './dir-sharded.js' +import DirSharded, { type DirShardedOptions } from './dir-sharded.js' import type { Dir } from './dir.js' -import type { PersistOptions } from './utils/persist.js' -export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: PersistOptions): Promise { +export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: DirShardedOptions): Promise { let newDir = dir as DirSharded if (dir instanceof DirFlat && dir.estimateNodeSize() > threshold) { @@ -31,7 +30,7 @@ export async function flatToShard (child: Dir | null, dir: Dir, threshold: numbe return newDir } -async function convertToShard (oldDir: DirFlat, options: PersistOptions): Promise { +async function convertToShard (oldDir: DirFlat, options: DirShardedOptions): Promise { const newDir = new DirSharded({ root: oldDir.root, dir: true, diff --git a/packages/ipfs-unixfs-importer/src/index.ts b/packages/ipfs-unixfs-importer/src/index.ts index 88270154..5db54f81 100644 --- a/packages/ipfs-unixfs-importer/src/index.ts +++ b/packages/ipfs-unixfs-importer/src/index.ts @@ -123,6 +123,11 @@ export interface ImporterOptions extends ProgressOptions */ shardSplitThresholdBytes?: number + /** + * The maximum number of bytes used as a HAMT prefix for shard entries. Default: 256 + */ + shardFanoutBytes?: number + /** * How many files to import concurrently. For large numbers of small files this * should be high (e.g. 50). Default: 10 @@ -241,6 +246,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri const wrapWithDirectory = options.wrapWithDirectory ?? false const shardSplitThresholdBytes = options.shardSplitThresholdBytes ?? 262144 + const shardFanoutBytes = options.shardFanoutBytes ?? 8 const cidVersion = options.cidVersion ?? 1 const rawLeaves = options.rawLeaves ?? true const leafType = options.leafType ?? 'file' @@ -269,6 +275,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri const buildTree: TreeBuilder = options.treeBuilder ?? defaultTreeBuilder({ wrapWithDirectory, shardSplitThresholdBytes, + shardFanoutBytes, cidVersion, onProgress: options.onProgress }) diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.ts b/packages/ipfs-unixfs-importer/src/tree-builder.ts index 1c2e457d..674775e1 100644 --- a/packages/ipfs-unixfs-importer/src/tree-builder.ts +++ b/packages/ipfs-unixfs-importer/src/tree-builder.ts @@ -7,6 +7,7 @@ import type { PersistOptions } from './utils/persist.js' export interface AddToTreeOptions extends PersistOptions { shardSplitThresholdBytes: number + shardFanoutBytes: number } async function addToTree (elem: InProgressImportResult, tree: Dir, options: AddToTreeOptions): Promise { From 9693f3fb2531b88786aecd15c2192f85bae0ece7 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Fri, 25 Aug 2023 16:19:08 +1000 Subject: [PATCH 2/2] fix: use fanout "bits" (#357) --- .../ipfs-unixfs-exporter/test/exporter-sharded.spec.ts | 2 +- packages/ipfs-unixfs-importer/src/dir-sharded.ts | 8 +++++--- packages/ipfs-unixfs-importer/src/index.ts | 10 ++++++---- packages/ipfs-unixfs-importer/src/tree-builder.ts | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts index 9da7a83d..95a8c50e 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts +++ b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts @@ -270,7 +270,7 @@ describe('exporter sharded', function () { const result = await last(importer(files, block, { shardSplitThresholdBytes: 0, - shardFanoutBytes: 4, + shardFanoutBits: 4, // 2**4 = 16 children max wrapWithDirectory: true })) diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.ts b/packages/ipfs-unixfs-importer/src/dir-sharded.ts index 7b0914dd..7bee99b0 100644 --- a/packages/ipfs-unixfs-importer/src/dir-sharded.ts +++ b/packages/ipfs-unixfs-importer/src/dir-sharded.ts @@ -18,9 +18,10 @@ async function hamtHashFn (buf: Uint8Array): Promise { } const HAMT_HASH_CODE = BigInt(0x22) +const DEFAULT_FANOUT_BITS = 8 export interface DirShardedOptions extends PersistOptions { - shardFanoutBytes: number + shardFanoutBits: number } class DirSharded extends Dir { @@ -31,7 +32,7 @@ class DirSharded extends Dir { this._bucket = createHAMT({ hashFn: hamtHashFn, - bits: options.shardFanoutBytes ?? 8 + bits: options.shardFanoutBits ?? DEFAULT_FANOUT_BITS }) } @@ -196,6 +197,7 @@ function isDir (obj: any): obj is Dir { function calculateSize (bucket: Bucket, shardRoot: DirSharded | null, options: PersistOptions): number { const children = bucket._children + const padLength = (bucket.tableSize() - 1).toString(16).length const links: PBLink[] = [] for (let i = 0; i < children.length; i++) { @@ -205,7 +207,7 @@ function calculateSize (bucket: Bucket, shardRoot: DirSharded | null, optio continue } - const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0') + const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0') if (child instanceof Bucket) { const size = calculateSize(child, null, options) diff --git a/packages/ipfs-unixfs-importer/src/index.ts b/packages/ipfs-unixfs-importer/src/index.ts index 5db54f81..d0dc4a7a 100644 --- a/packages/ipfs-unixfs-importer/src/index.ts +++ b/packages/ipfs-unixfs-importer/src/index.ts @@ -124,9 +124,11 @@ export interface ImporterOptions extends ProgressOptions shardSplitThresholdBytes?: number /** - * The maximum number of bytes used as a HAMT prefix for shard entries. Default: 256 + * The number of bits of a hash digest used at each level of sharding to + * the child index. 2**shardFanoutBits will dictate the maximum number of + * children for any shard in the HAMT. Default: 8 */ - shardFanoutBytes?: number + shardFanoutBits?: number /** * How many files to import concurrently. For large numbers of small files this @@ -246,7 +248,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri const wrapWithDirectory = options.wrapWithDirectory ?? false const shardSplitThresholdBytes = options.shardSplitThresholdBytes ?? 262144 - const shardFanoutBytes = options.shardFanoutBytes ?? 8 + const shardFanoutBits = options.shardFanoutBits ?? 8 const cidVersion = options.cidVersion ?? 1 const rawLeaves = options.rawLeaves ?? true const leafType = options.leafType ?? 'file' @@ -275,7 +277,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri const buildTree: TreeBuilder = options.treeBuilder ?? defaultTreeBuilder({ wrapWithDirectory, shardSplitThresholdBytes, - shardFanoutBytes, + shardFanoutBits, cidVersion, onProgress: options.onProgress }) diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.ts b/packages/ipfs-unixfs-importer/src/tree-builder.ts index 674775e1..f84d6fb5 100644 --- a/packages/ipfs-unixfs-importer/src/tree-builder.ts +++ b/packages/ipfs-unixfs-importer/src/tree-builder.ts @@ -7,7 +7,7 @@ import type { PersistOptions } from './utils/persist.js' export interface AddToTreeOptions extends PersistOptions { shardSplitThresholdBytes: number - shardFanoutBytes: number + shardFanoutBits: number } async function addToTree (elem: InProgressImportResult, tree: Dir, options: AddToTreeOptions): Promise {