Skip to content
This repository was archived by the owner on Aug 12, 2020. It is now read-only.

Commit be07987

Browse files
authored
Merge pull request #223 from dordille/rabin-chunker
feat(importer): add rabin fingerprinting chunk algorithm
2 parents dfc9f20 + 6f1f568 commit be07987

File tree

7 files changed

+124
-7
lines changed

7 files changed

+124
-7
lines changed

package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
"leadMaintainer": "Alex Potsides <alex.potsides@protocol.ai>",
66
"main": "src/index.js",
77
"browser": {
8-
"fs": false
8+
"fs": false,
9+
"rabin": false
910
},
1011
"scripts": {
1112
"test": "aegir test",
@@ -72,7 +73,9 @@
7273
"pull-through": "^1.0.18",
7374
"pull-traverse": "^1.0.3",
7475
"pull-write": "^1.1.4",
75-
"sparse-array": "^1.3.1"
76+
"rabin": "^1.6.0",
77+
"sparse-array": "^1.3.1",
78+
"stream-to-pull-stream": "^1.7.2"
7679
},
7780
"contributors": [
7881
"Alan Shaw <alan@tableflip.io>",

src/builder/builder.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode
1616

1717
const defaultOptions = {
1818
chunkerOptions: {
19-
maxChunkSize: 262144
19+
maxChunkSize: 262144,
20+
avgChunkSize: 262144
2021
},
2122
rawLeaves: false,
2223
hashAlg: 'sha2-256',

src/chunker/index.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
'use strict'
2+
3+
const chunkers = {
4+
fixed: require('../chunker/fixed-size'),
5+
rabin: require('../chunker/rabin')
6+
}
7+
8+
module.exports = chunkers

src/chunker/rabin.js

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
'use strict'
2+
3+
const createRabin = require('rabin')
4+
const toPull = require('stream-to-pull-stream')
5+
6+
module.exports = (options) => {
7+
let min, max, avg
8+
if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) {
9+
avg = options.avgChunkSize
10+
min = options.minChunkSize
11+
max = options.maxChunkSize
12+
} else {
13+
avg = options.avgChunkSize
14+
min = avg / 3
15+
max = avg + (avg / 2)
16+
}
17+
18+
const sizepow = Math.floor(Math.log2(avg))
19+
const rabin = createRabin({
20+
min: min,
21+
max: max,
22+
bits: sizepow,
23+
window: options.window || 16,
24+
polynomial: options.polynomial || '0x3DF305DFB2A805'
25+
})
26+
27+
return toPull.duplex(rabin)
28+
}

src/importer/index.js

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ const assert = require('assert')
88
const setImmediate = require('async/setImmediate')
99
const DAGBuilder = require('../builder')
1010
const createTreeBuilder = require('./tree-builder')
11-
12-
const chunkers = {
13-
fixed: require('../chunker/fixed-size')
14-
}
11+
const chunkers = require('../chunker')
1512

1613
const defaultOptions = {
1714
chunker: 'fixed',

test/chunker-rabin.js

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/* eslint-env mocha */
2+
'use strict'
3+
4+
const chunker = require('./../src/chunker/rabin')
5+
const chai = require('chai')
6+
chai.use(require('dirty-chai'))
7+
const expect = chai.expect
8+
const pull = require('pull-stream')
9+
const loadFixture = require('aegir/fixtures')
10+
11+
const rawFile = loadFixture('test/fixtures/1MiB.txt')
12+
13+
describe('chunker: rabin', function () {
14+
this.timeout(30000)
15+
16+
it('chunks non flat buffers', (done) => {
17+
const b1 = Buffer.alloc(2 * 256)
18+
const b2 = Buffer.alloc(1 * 256)
19+
const b3 = Buffer.alloc(5 * 256)
20+
21+
b1.fill('a')
22+
b2.fill('b')
23+
b3.fill('c')
24+
25+
pull(
26+
pull.values([b1, b2, b3]),
27+
chunker({minChunkSize: 48, avgChunkSize: 96, maxChunkSize: 192}),
28+
pull.collect((err, chunks) => {
29+
expect(err).to.not.exist()
30+
chunks.forEach((chunk) => {
31+
expect(chunk).to.have.length.gte(48)
32+
expect(chunk).to.have.length.lte(192)
33+
})
34+
done()
35+
})
36+
)
37+
})
38+
39+
it('uses default min and max chunk size when only avgChunkSize is specified', (done) => {
40+
const b1 = Buffer.alloc(10 * 256)
41+
b1.fill('a')
42+
pull(
43+
pull.values([b1]),
44+
chunker({avgChunkSize: 256}),
45+
pull.collect((err, chunks) => {
46+
expect(err).to.not.exist()
47+
chunks.forEach((chunk) => {
48+
expect(chunk).to.have.length.gte(256 / 3)
49+
expect(chunk).to.have.length.lte(256 * (256 / 2))
50+
})
51+
done()
52+
})
53+
)
54+
})
55+
56+
it('256 KiB avg chunks of non scalar filesize', (done) => {
57+
const KiB256 = 262144
58+
let file = Buffer.concat([rawFile, Buffer.from('hello')])
59+
const opts = {
60+
minChunkSize: KiB256 / 3,
61+
avgChunkSize: KiB256,
62+
maxChunkSize: KiB256 + (KiB256 / 2)
63+
}
64+
pull(
65+
pull.values([file]),
66+
chunker(opts),
67+
pull.collect((err, chunks) => {
68+
expect(err).to.not.exist()
69+
70+
chunks.forEach((chunk) => {
71+
expect(chunk).to.have.length.gte(opts.minChunkSize)
72+
expect(chunk).to.have.length.lte(opts.maxChunkSize)
73+
})
74+
75+
done()
76+
})
77+
)
78+
})
79+
})

test/node.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ describe('IPFS UnixFS Engine', () => {
4444

4545
// Chunkers
4646
require('./chunker-fixed-size')
47+
require('./chunker-rabin')
4748

4849
// Graph Builders
4950
require('./builder')(repo)

0 commit comments

Comments
 (0)