From 8f303e9017c56bdf84de7266405f3a02656c5868 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 15 Sep 2022 11:41:54 +0100 Subject: [PATCH 1/5] fix: OOM on large DAGs Storing a set of seen CIDs to short-cut DAG traversal while ensuring we have all the blocks in a DAG in the blockstore can cause OOMs for very large DAGs. --- packages/ipfs-repo/src/pin-manager.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/packages/ipfs-repo/src/pin-manager.js b/packages/ipfs-repo/src/pin-manager.js index 9decacc..b151308 100644 --- a/packages/ipfs-repo/src/pin-manager.js +++ b/packages/ipfs-repo/src/pin-manager.js @@ -274,19 +274,11 @@ export class PinManager { * @param {AbortOptions} options */ async fetchCompleteDag (cid, options) { - const seen = new Set() - /** * @param {CID} cid * @param {AbortOptions} options */ const walkDag = async (cid, options) => { - if (seen.has(cid.toString())) { - return - } - - seen.add(cid.toString()) - const bytes = await this.blockstore.get(cid, options) const codec = await this.loadCodec(cid.code) const block = createUnsafe({ bytes, cid, codec }) From 223d59c963a911cfa7849d12f83363d4649f7557 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 15 Sep 2022 11:58:13 +0100 Subject: [PATCH 2/5] chore: fix unrelated typescript error --- packages/ipfs-repo-migrations/src/index.js | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/ipfs-repo-migrations/src/index.js b/packages/ipfs-repo-migrations/src/index.js index 0d7f12d..5c87773 100644 --- a/packages/ipfs-repo-migrations/src/index.js +++ b/packages/ipfs-repo-migrations/src/index.js @@ -43,8 +43,11 @@ export function getLatestMigrationVersion (migrations) { * @param {number} toVersion - Version to which the repo should be migrated. * @param {MigrationOptions} [options] - Options for migration */ -export async function migrate (path, backends, repoOptions, toVersion, { ignoreLock = false, onProgress, isDryRun = false, migrations }) { - migrations = migrations || defaultMigrations +export async function migrate (path, backends, repoOptions, toVersion, options = {}) { + const ignoreLock = options.ignoreLock ?? false + const onProgress = options.onProgress + const isDryRun = options.isDryRun ?? false + const migrations = options.migrations ?? defaultMigrations if (!path) { throw new errors.RequiredParameterError('Path argument is required!') @@ -143,8 +146,11 @@ export async function migrate (path, backends, repoOptions, toVersion, { ignoreL * @param {number} toVersion - Version to which the repo will be reverted. * @param {MigrationOptions} [options] - Options for the reversion */ -export async function revert (path, backends, repoOptions, toVersion, { ignoreLock = false, onProgress, isDryRun = false, migrations }) { - migrations = migrations || defaultMigrations +export async function revert (path, backends, repoOptions, toVersion, options = {}) { + const ignoreLock = options.ignoreLock ?? false + const onProgress = options.onProgress + const isDryRun = options.isDryRun ?? false + const migrations = options.migrations ?? defaultMigrations if (!path) { throw new errors.RequiredParameterError('Path argument is required!') From a2b6122a9a5b1db20256fa1819985a0bcb4e6d52 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 15 Sep 2022 11:59:43 +0100 Subject: [PATCH 3/5] chore: linting --- packages/ipfs-repo-migrations/src/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ipfs-repo-migrations/src/index.js b/packages/ipfs-repo-migrations/src/index.js index 5c87773..899232f 100644 --- a/packages/ipfs-repo-migrations/src/index.js +++ b/packages/ipfs-repo-migrations/src/index.js @@ -1,4 +1,4 @@ -/* eslint complexity: ["error", 27] */ +/* eslint complexity: ["error", 28] */ import defaultMigrations from '../migrations/index.js' import * as repoVersion from './repo/version.js' From 4b1faadcd41bd7bf22842eb0b91e0698ebe88170 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 20 Sep 2022 14:27:17 +0100 Subject: [PATCH 4/5] fix: use lru cache instead of no cache at all Refactor to use a lru cache instead of an unbounded set and add a test to make sure the cache actually caches things. --- packages/ipfs-repo/package.json | 1 + packages/ipfs-repo/src/pin-manager.js | 35 ++++++++++++++++++++------- packages/ipfs-repo/test/pins-test.js | 27 +++++++++++++++++++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/packages/ipfs-repo/package.json b/packages/ipfs-repo/package.json index af7d8fa..d5e0574 100644 --- a/packages/ipfs-repo/package.json +++ b/packages/ipfs-repo/package.json @@ -204,6 +204,7 @@ "multiformats": "^9.0.4", "p-queue": "^7.3.0", "proper-lockfile": "^4.0.0", + "quick-lru": "^6.1.1", "sort-keys": "^5.0.0", "uint8arrays": "^3.0.0" }, diff --git a/packages/ipfs-repo/src/pin-manager.js b/packages/ipfs-repo/src/pin-manager.js index b151308..ff8f8de 100644 --- a/packages/ipfs-repo/src/pin-manager.js +++ b/packages/ipfs-repo/src/pin-manager.js @@ -13,6 +13,16 @@ import { } from './utils/blockstore.js' import { walkDag } from './utils/walk-dag.js' import { PinTypes } from './pin-types.js' +import QuickLRU from 'quick-lru' + +/** + * @typedef {import('./types').PinType} PinType + * @typedef {import('./types').PinQueryType} PinQueryType + * @typedef {import('multiformats/codecs/interface').BlockCodec} BlockCodec + * @typedef {import('./types').PinOptions} PinOptions + * @typedef {import('./types').AbortOptions} AbortOptions + * @typedef {import('./types').Pins} Pins + */ /** * @typedef {object} PinInternal @@ -23,14 +33,13 @@ import { PinTypes } from './pin-types.js' */ /** - * @typedef {import('./types').PinType} PinType - * @typedef {import('./types').PinQueryType} PinQueryType - * @typedef {import('multiformats/codecs/interface').BlockCodec} BlockCodec - * @typedef {import('./types').PinOptions} PinOptions - * @typedef {import('./types').AbortOptions} AbortOptions - * @typedef {import('./types').Pins} Pins + * @typedef {object} FetchCompleteDagOptions + * @property {AbortSignal} [signal] + * @property {number} [cidCacheMaxSize] */ +const CID_CACHE_MAX_SIZE = 2048 + /** * @param {string} type */ @@ -95,7 +104,7 @@ export class PinManager { /** * @param {CID} cid - * @param {PinOptions & AbortOptions} [options] + * @param {PinOptions & FetchCompleteDagOptions & AbortOptions} [options] */ async pinRecursively (cid, options = {}) { await this.fetchCompleteDag(cid, options) @@ -271,14 +280,22 @@ export class PinManager { /** * @param {CID} cid - * @param {AbortOptions} options + * @param {FetchCompleteDagOptions} [options] */ - async fetchCompleteDag (cid, options) { + async fetchCompleteDag (cid, options = {}) { + const seen = new QuickLRU({ maxSize: options.cidCacheMaxSize ?? CID_CACHE_MAX_SIZE }) + /** * @param {CID} cid * @param {AbortOptions} options */ const walkDag = async (cid, options) => { + if (seen.has(cid.toString())) { + return + } + + seen.set(cid.toString(), true) + const bytes = await this.blockstore.get(cid, options) const codec = await this.loadCodec(cid.code) const block = createUnsafe({ bytes, cid, codec }) diff --git a/packages/ipfs-repo/test/pins-test.js b/packages/ipfs-repo/test/pins-test.js index 67a52b4..0e46082 100644 --- a/packages/ipfs-repo/test/pins-test.js +++ b/packages/ipfs-repo/test/pins-test.js @@ -9,6 +9,7 @@ import { CID } from 'multiformats/cid' import all from 'it-all' import { PinTypes } from '../src/pin-types.js' import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' +import Sinon from 'sinon' /** * @param {import('@ipld/dag-pb').PBNode} node @@ -105,6 +106,32 @@ export default (repo) => { expect(pins.filter(p => p.cid.toString() === cid.toString())) .to.have.deep.nested.property('[0].metadata', metadata) }) + + it('does not traverse the same linked node twice', async () => { + // @ts-expect-error blockstore property is private + const getSpy = Sinon.spy(repo.pins.blockstore, 'get') + + const { cid: childCid, buf: childBuf } = await createDagPbNode() + await repo.blocks.put(childCid, childBuf) + + // create a root block with duplicate links to the same block + const { cid: rootCid, buf: rootBuf } = await createDagPbNode({ Links: [{ + Name: 'child-1', + Tsize: childBuf.byteLength, + Hash: childCid + }, { + Name: 'child-2', + Tsize: childBuf.byteLength, + Hash: childCid + }]}) + await repo.blocks.put(rootCid, rootBuf) + + await repo.pins.pinRecursively(rootCid) + + expect(getSpy.callCount).to.equal(2, 'should only have loaded the child block once') + expect(getSpy.getCall(0).args[0]).to.deep.equal(rootCid) + expect(getSpy.getCall(1).args[0]).to.deep.equal(childCid) + }) }) describe('.unpin', () => { From ff85fc264c4aa985f1385e2236580945e4e883ee Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 20 Sep 2022 14:53:41 +0100 Subject: [PATCH 5/5] chore: fix linting --- packages/ipfs-repo/test/pins-test.js | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/packages/ipfs-repo/test/pins-test.js b/packages/ipfs-repo/test/pins-test.js index 0e46082..6351ea8 100644 --- a/packages/ipfs-repo/test/pins-test.js +++ b/packages/ipfs-repo/test/pins-test.js @@ -115,15 +115,17 @@ export default (repo) => { await repo.blocks.put(childCid, childBuf) // create a root block with duplicate links to the same block - const { cid: rootCid, buf: rootBuf } = await createDagPbNode({ Links: [{ - Name: 'child-1', - Tsize: childBuf.byteLength, - Hash: childCid - }, { - Name: 'child-2', - Tsize: childBuf.byteLength, - Hash: childCid - }]}) + const { cid: rootCid, buf: rootBuf } = await createDagPbNode({ + Links: [{ + Name: 'child-1', + Tsize: childBuf.byteLength, + Hash: childCid + }, { + Name: 'child-2', + Tsize: childBuf.byteLength, + Hash: childCid + }] + }) await repo.blocks.put(rootCid, rootBuf) await repo.pins.pinRecursively(rootCid)