Skip to content

Commit bbf0ec6

Browse files
committed
fix: more Tsize fixes, fix HAMT and make it match go-unixfs output
1 parent 981d684 commit bbf0ec6

File tree

3 files changed

+58
-12
lines changed

3 files changed

+58
-12
lines changed

data/builder/dir_test.go

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,11 @@ func TestBuildUnixFSDirectory(t *testing.T) {
4545
t.Fatal(err)
4646
}
4747

48-
dl, tsize, err := BuildUnixFSDirectory(entries, &ls)
48+
dl, _, err := BuildUnixFSDirectory(entries, &ls)
4949
if err != nil {
5050
t.Fatal(err)
5151
}
5252

53-
require.GreaterOrEqual(t, tsize, uint64(0)) // TODO: set properly
54-
5553
pbn, err := ls.Load(ipld.LinkContext{}, dl, dagpb.Type.PBNode)
5654
if err != nil {
5755
t.Fatal(err)
@@ -108,8 +106,36 @@ func TestBuildUnixFSRecursive(t *testing.T) {
108106

109107
lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls)
110108
require.NoError(t, err)
111-
require.Equal(t, lnk.String(), fixture.expectedLnk.String())
112-
require.Equal(t, sz, uint64(245))
109+
require.Equal(t, fixture.expectedLnk.String(), lnk.String())
110+
require.Equal(t, uint64(245), sz)
111+
}
112+
113+
func TestBuildUnixFSRecursiveSharded(t *testing.T) {
114+
// only the top CID is of interest, but this tree is correct and can be used for future validation
115+
fixture := fentry{
116+
"rootDir",
117+
"",
118+
mustCidDecode("bafybeiendaawtta62lx2p2e2hecgywmqeq6ekrn2pfypxjkmdzmaeituhe"),
119+
make([]fentry, 0),
120+
}
121+
122+
for i := 0; i < 2048; i++ {
123+
name := fmt.Sprintf("long name to fill out bytes to make the sharded directory test flip over the sharded directory limit because link names are included in the directory entry %d", i)
124+
fixture.children = append(fixture.children, fentry{name, name, cid.Undef, nil})
125+
}
126+
127+
ls := cidlink.DefaultLinkSystem()
128+
storage := cidlink.Memory{}
129+
ls.StorageReadOpener = storage.OpenRead
130+
ls.StorageWriteOpener = storage.OpenWrite
131+
132+
dir := t.TempDir()
133+
makeFixture(t, dir, fixture)
134+
135+
lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls)
136+
require.NoError(t, err)
137+
require.Equal(t, fixture.expectedLnk.String(), lnk.String())
138+
require.Equal(t, uint64(778128), sz)
113139
}
114140

115141
type fentry struct {

data/builder/directory.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ func BuildUnixFSRecursive(root string, ls *ipld.LinkSystem) (ipld.Link, uint64,
4848
}
4949
lnks = append(lnks, entry)
5050
}
51-
outLnk, sz, err := BuildUnixFSDirectory(lnks, ls)
52-
return outLnk, tsize + sz, err
51+
return BuildUnixFSDirectory(lnks, ls)
5352
case m.Type() == fs.ModeSymlink:
5453
content, err := os.Readlink(root)
5554
if err != nil {
@@ -126,7 +125,9 @@ func BuildUnixFSDirectory(entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Lin
126125
return nil, 0, err
127126
}
128127
// sorting happens in codec-dagpb
128+
var totalSize uint64
129129
for _, e := range entries {
130+
totalSize += uint64(e.Tsize.Must().Int())
130131
if err := lnks.AssembleValue().AssignNode(e); err != nil {
131132
return nil, 0, err
132133
}
@@ -138,5 +139,9 @@ func BuildUnixFSDirectory(entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Lin
138139
return nil, 0, err
139140
}
140141
node := pbb.Build()
141-
return sizedStore(ls, fileLinkProto, node)
142+
lnk, sz, err := sizedStore(ls, fileLinkProto, node)
143+
if err != nil {
144+
return nil, 0, err
145+
}
146+
return lnk, totalSize + sz, err
142147
}

data/builder/dirshard.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink
5656
hamtEntries := make([]hamtLink, 0, len(entries))
5757
for _, e := range entries {
5858
name := e.Name.Must().String()
59-
sum := h.Sum([]byte(name))
59+
h.Reset()
60+
h.Write([]byte(name))
61+
sum := h.Sum(nil)
6062
hamtEntries = append(hamtEntries, hamtLink{
6163
sum,
6264
e,
@@ -97,9 +99,11 @@ func (s *shard) add(lnk hamtLink) error {
9799

98100
current, ok := s.children[bucket]
99101
if !ok {
102+
// no bucket, make one with this entry
100103
s.children[bucket] = entry{nil, &lnk}
101104
return nil
102105
} else if current.shard != nil {
106+
// existing shard, add this link to the shard
103107
return current.shard.add(lnk)
104108
}
105109
// make a shard for current and lnk
@@ -114,15 +118,18 @@ func (s *shard) add(lnk hamtLink) error {
114118
},
115119
nil,
116120
}
121+
// add existing link from this bucket to the new shard
117122
if err := newShard.add(*current.hamtLink); err != nil {
118123
return err
119124
}
125+
// replace bucket with shard
120126
s.children[bucket] = newShard
127+
// add new link to the new shard
121128
return newShard.add(lnk)
122129
}
123130

124131
func (s *shard) formatLinkName(name string, idx int) string {
125-
return fmt.Sprintf("%*X%s", s.width, idx, name)
132+
return fmt.Sprintf("%0*X%s", s.width, idx, name)
126133
}
127134

128135
// bitmap calculates the bitmap of which links in the shard are set.
@@ -169,21 +176,25 @@ func (s *shard) serialize(ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
169176
return nil, 0, err
170177
}
171178
// sorting happens in codec-dagpb
179+
var totalSize uint64
172180
for idx, e := range s.children {
173181
var lnk dagpb.PBLink
174182
if e.shard != nil {
175183
ipldLnk, sz, err := e.shard.serialize(ls)
176184
if err != nil {
177185
return nil, 0, err
178186
}
187+
totalSize += sz
179188
fullName := s.formatLinkName("", idx)
180189
lnk, err = BuildUnixFSDirectoryEntry(fullName, int64(sz), ipldLnk)
181190
if err != nil {
182191
return nil, 0, err
183192
}
184193
} else {
185194
fullName := s.formatLinkName(e.Name.Must().String(), idx)
186-
lnk, err = BuildUnixFSDirectoryEntry(fullName, e.Tsize.Must().Int(), e.Hash.Link())
195+
sz := e.Tsize.Must().Int()
196+
totalSize += uint64(sz)
197+
lnk, err = BuildUnixFSDirectoryEntry(fullName, sz, e.Hash.Link())
187198
}
188199
if err != nil {
189200
return nil, 0, err
@@ -200,5 +211,9 @@ func (s *shard) serialize(ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
200211
return nil, 0, err
201212
}
202213
node := pbb.Build()
203-
return sizedStore(ls, fileLinkProto, node)
214+
lnk, sz, err := sizedStore(ls, fileLinkProto, node)
215+
if err != nil {
216+
return nil, 0, err
217+
}
218+
return lnk, totalSize + sz, nil
204219
}

0 commit comments

Comments
 (0)