Skip to content

Commit 981d684

Browse files
committed
fix: encode Tsize correctly everywhere (using wraped LinkSystem)
1 parent 75fdb68 commit 981d684

File tree

5 files changed

+171
-73
lines changed

5 files changed

+171
-73
lines changed

data/builder/dir_test.go

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ package builder
33
import (
44
"bytes"
55
"fmt"
6+
"os"
7+
"path/filepath"
68
"testing"
79

10+
"github.com/ipfs/go-cid"
811
"github.com/ipfs/go-unixfsnode"
912
dagpb "github.com/ipld/go-codec-dagpb"
1013
"github.com/ipld/go-ipld-prime"
1114
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
15+
"github.com/stretchr/testify/require"
1216
)
1317

1418
func mkEntries(cnt int, ls *ipld.LinkSystem) ([]dagpb.PBLink, error) {
@@ -41,11 +45,13 @@ func TestBuildUnixFSDirectory(t *testing.T) {
4145
t.Fatal(err)
4246
}
4347

44-
dl, err := BuildUnixFSDirectory(entries, &ls)
48+
dl, tsize, err := BuildUnixFSDirectory(entries, &ls)
4549
if err != nil {
4650
t.Fatal(err)
4751
}
4852

53+
require.GreaterOrEqual(t, tsize, uint64(0)) // TODO: set properly
54+
4955
pbn, err := ls.Load(ipld.LinkContext{}, dl, dagpb.Type.PBNode)
5056
if err != nil {
5157
t.Fatal(err)
@@ -70,3 +76,65 @@ func TestBuildUnixFSDirectory(t *testing.T) {
7076
}
7177
}
7278
}
79+
80+
func TestBuildUnixFSRecursive(t *testing.T) {
81+
// only the top CID is of interest, but this tree is correct and can be used for future validation
82+
fixture := fentry{
83+
"rootDir",
84+
"",
85+
mustCidDecode("bafybeihswl3f7pa7fueyayewcvr3clkdz7oetv4jolyejgw26p6l3qzlbm"),
86+
[]fentry{
87+
{"a", "aaa", mustCidDecode("bafkreieygsdw3t5qlsywpjocjfj6xjmmjlejwgw7k7zi6l45bgxra7xi6a"), nil},
88+
{
89+
"b",
90+
"",
91+
mustCidDecode("bafybeibohj54uixf2mso4t53suyarv6cfuxt6b5cj6qjsqaa2ezfxnu5pu"),
92+
[]fentry{
93+
{"1", "111", mustCidDecode("bafkreihw4cq6flcbsrnjvj77rkfkudhlyevdxteydkjjvvopqefasdqrvy"), nil},
94+
{"2", "222", mustCidDecode("bafkreie3q4kremt4bhhjdxletm7znjr3oqeo6jt4rtcxcaiu4yuxgdfwd4"), nil},
95+
},
96+
},
97+
{"c", "ccc", mustCidDecode("bafkreide3ksevvet74uks3x7vnxhp4ltfi6zpwbsifmbwn6324fhusia7y"), nil},
98+
},
99+
}
100+
101+
ls := cidlink.DefaultLinkSystem()
102+
storage := cidlink.Memory{}
103+
ls.StorageReadOpener = storage.OpenRead
104+
ls.StorageWriteOpener = storage.OpenWrite
105+
106+
dir := t.TempDir()
107+
makeFixture(t, dir, fixture)
108+
109+
lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls)
110+
require.NoError(t, err)
111+
require.Equal(t, lnk.String(), fixture.expectedLnk.String())
112+
require.Equal(t, sz, uint64(245))
113+
}
114+
115+
type fentry struct {
116+
name string
117+
content string
118+
expectedLnk cid.Cid
119+
children []fentry
120+
}
121+
122+
func makeFixture(t *testing.T, dir string, fixture fentry) {
123+
path := filepath.Join(dir, fixture.name)
124+
if fixture.children != nil {
125+
require.NoError(t, os.Mkdir(path, 0755))
126+
for _, c := range fixture.children {
127+
makeFixture(t, path, c)
128+
}
129+
} else {
130+
os.WriteFile(path, []byte(fixture.content), 0644)
131+
}
132+
}
133+
134+
func mustCidDecode(s string) cid.Cid {
135+
c, err := cid.Decode(s)
136+
if err != nil {
137+
panic(err)
138+
}
139+
return c
140+
}

data/builder/directory.go

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ func BuildUnixFSRecursive(root string, ls *ipld.LinkSystem) (ipld.Link, uint64,
3030
m := info.Mode()
3131
switch {
3232
case m.IsDir():
33+
var tsize uint64
3334
entries, err := os.ReadDir(root)
3435
if err != nil {
3536
return nil, 0, err
@@ -40,27 +41,36 @@ func BuildUnixFSRecursive(root string, ls *ipld.LinkSystem) (ipld.Link, uint64,
4041
if err != nil {
4142
return nil, 0, err
4243
}
44+
tsize += sz
4345
entry, err := BuildUnixFSDirectoryEntry(e.Name(), int64(sz), lnk)
4446
if err != nil {
4547
return nil, 0, err
4648
}
4749
lnks = append(lnks, entry)
4850
}
49-
outLnk, err := BuildUnixFSDirectory(lnks, ls)
50-
return outLnk, 0, err
51+
outLnk, sz, err := BuildUnixFSDirectory(lnks, ls)
52+
return outLnk, tsize + sz, err
5153
case m.Type() == fs.ModeSymlink:
5254
content, err := os.Readlink(root)
5355
if err != nil {
5456
return nil, 0, err
5557
}
56-
return BuildUnixFSSymlink(content, ls)
58+
outLnk, sz, err := BuildUnixFSSymlink(content, ls)
59+
if err != nil {
60+
return nil, 0, err
61+
}
62+
return outLnk, sz, nil
5763
case m.IsRegular():
5864
fp, err := os.Open(root)
5965
if err != nil {
6066
return nil, 0, err
6167
}
6268
defer fp.Close()
63-
return BuildUnixFSFile(fp, "", ls)
69+
outLnk, sz, err := BuildUnixFSFile(fp, "", ls)
70+
if err != nil {
71+
return nil, 0, err
72+
}
73+
return outLnk, sz, nil
6474
default:
6575
return nil, 0, fmt.Errorf("cannot encode non regular file: %s", root)
6676
}
@@ -87,46 +97,46 @@ func estimateDirSize(entries []dagpb.PBLink) int {
8797
}
8898

8999
// BuildUnixFSDirectory creates a directory link over a collection of entries.
90-
func BuildUnixFSDirectory(entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, error) {
100+
func BuildUnixFSDirectory(entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
91101
if estimateDirSize(entries) > shardSplitThreshold {
92102
return BuildUnixFSShardedDirectory(defaultShardWidth, multihash.MURMUR3X64_64, entries, ls)
93103
}
94104
ufd, err := BuildUnixFS(func(b *Builder) {
95105
DataType(b, data.Data_Directory)
96106
})
97107
if err != nil {
98-
return nil, err
108+
return nil, 0, err
99109
}
100110
pbb := dagpb.Type.PBNode.NewBuilder()
101111
pbm, err := pbb.BeginMap(2)
102112
if err != nil {
103-
return nil, err
113+
return nil, 0, err
104114
}
105115
if err = pbm.AssembleKey().AssignString("Data"); err != nil {
106-
return nil, err
116+
return nil, 0, err
107117
}
108118
if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(ufd)); err != nil {
109-
return nil, err
119+
return nil, 0, err
110120
}
111121
if err = pbm.AssembleKey().AssignString("Links"); err != nil {
112-
return nil, err
122+
return nil, 0, err
113123
}
114124
lnks, err := pbm.AssembleValue().BeginList(int64(len(entries)))
115125
if err != nil {
116-
return nil, err
126+
return nil, 0, err
117127
}
118128
// sorting happens in codec-dagpb
119129
for _, e := range entries {
120130
if err := lnks.AssembleValue().AssignNode(e); err != nil {
121-
return nil, err
131+
return nil, 0, err
122132
}
123133
}
124134
if err := lnks.Finish(); err != nil {
125-
return nil, err
135+
return nil, 0, err
126136
}
127137
if err := pbm.Finish(); err != nil {
128-
return nil, err
138+
return nil, 0, err
129139
}
130140
node := pbb.Build()
131-
return ls.Store(ipld.LinkContext{}, fileLinkProto, node)
141+
return sizedStore(ls, fileLinkProto, node)
132142
}

data/builder/dirshard.go

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ type hamtLink struct {
3939

4040
// BuildUnixFSShardedDirectory will build a hamt of unixfs hamt shards encoing a directory with more entries
4141
// than is typically allowed to fit in a standard IPFS single-block unixFS directory.
42-
func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, error) {
42+
func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
4343
// hash the entries
4444
var h hash.Hash
4545
var err error
@@ -50,7 +50,7 @@ func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink
5050
} else {
5151
h, err = multihash.GetHasher(hasher)
5252
if err != nil {
53-
return nil, err
53+
return nil, 0, err
5454
}
5555
}
5656
hamtEntries := make([]hamtLink, 0, len(entries))
@@ -65,7 +65,7 @@ func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink
6565

6666
sizeLg2, err := logtwo(size)
6767
if err != nil {
68-
return nil, err
68+
return nil, 0, err
6969
}
7070

7171
sharder := shard{
@@ -81,7 +81,7 @@ func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink
8181
for _, entry := range hamtEntries {
8282
err := sharder.add(entry)
8383
if err != nil {
84-
return nil, err
84+
return nil, 0, err
8585
}
8686
}
8787

@@ -138,67 +138,67 @@ func (s *shard) bitmap() []byte {
138138

139139
// serialize stores the concrete representation of this shard in the link system and
140140
// returns a link to it.
141-
func (s *shard) serialize(ls *ipld.LinkSystem) (ipld.Link, error) {
141+
func (s *shard) serialize(ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
142142
ufd, err := BuildUnixFS(func(b *Builder) {
143143
DataType(b, data.Data_HAMTShard)
144144
HashType(b, s.hasher)
145145
Data(b, s.bitmap())
146146
Fanout(b, uint64(s.size))
147147
})
148148
if err != nil {
149-
return nil, err
149+
return nil, 0, err
150150
}
151151
pbb := dagpb.Type.PBNode.NewBuilder()
152152
pbm, err := pbb.BeginMap(2)
153153
if err != nil {
154-
return nil, err
154+
return nil, 0, err
155155
}
156156
if err = pbm.AssembleKey().AssignString("Data"); err != nil {
157-
return nil, err
157+
return nil, 0, err
158158
}
159159
if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(ufd)); err != nil {
160-
return nil, err
160+
return nil, 0, err
161161
}
162162
if err = pbm.AssembleKey().AssignString("Links"); err != nil {
163-
return nil, err
163+
return nil, 0, err
164164
}
165165

166166
lnkBuilder := dagpb.Type.PBLinks.NewBuilder()
167167
lnks, err := lnkBuilder.BeginList(int64(len(s.children)))
168168
if err != nil {
169-
return nil, err
169+
return nil, 0, err
170170
}
171171
// sorting happens in codec-dagpb
172172
for idx, e := range s.children {
173173
var lnk dagpb.PBLink
174174
if e.shard != nil {
175-
ipldLnk, err := e.shard.serialize(ls)
175+
ipldLnk, sz, err := e.shard.serialize(ls)
176176
if err != nil {
177-
return nil, err
177+
return nil, 0, err
178178
}
179179
fullName := s.formatLinkName("", idx)
180-
lnk, err = BuildUnixFSDirectoryEntry(fullName, 0, ipldLnk)
180+
lnk, err = BuildUnixFSDirectoryEntry(fullName, int64(sz), ipldLnk)
181181
if err != nil {
182-
return nil, err
182+
return nil, 0, err
183183
}
184184
} else {
185185
fullName := s.formatLinkName(e.Name.Must().String(), idx)
186186
lnk, err = BuildUnixFSDirectoryEntry(fullName, e.Tsize.Must().Int(), e.Hash.Link())
187187
}
188188
if err != nil {
189-
return nil, err
189+
return nil, 0, err
190190
}
191191
if err := lnks.AssembleValue().AssignNode(lnk); err != nil {
192-
return nil, err
192+
return nil, 0, err
193193
}
194194
}
195195
if err := lnks.Finish(); err != nil {
196-
return nil, err
196+
return nil, 0, err
197197
}
198198
pbm.AssembleValue().AssignNode(lnkBuilder.Build())
199199
if err := pbm.Finish(); err != nil {
200-
return nil, err
200+
return nil, 0, err
201201
}
202202
node := pbb.Build()
203-
return ls.Store(ipld.LinkContext{}, fileLinkProto, node)
203+
return sizedStore(ls, fileLinkProto, node)
204204
}

data/builder/file.go

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ func fileTreeRecursive(depth int, children []ipld.Link, childLen []uint64, src c
8686
return nil, 0, err
8787
}
8888
node := basicnode.NewBytes(leaf)
89-
link, err := ls.Store(ipld.LinkContext{}, leafLinkProto, node)
90-
return link, uint64(len(leaf)), err
89+
return sizedStore(ls, leafLinkProto, node)
9190
}
9291
// depth > 1.
9392
totalSize := uint64(0)
@@ -166,25 +165,11 @@ func fileTreeRecursive(depth int, children []ipld.Link, childLen []uint64, src c
166165
}
167166
pbn := dpbb.Build()
168167

169-
link, err := ls.Store(ipld.LinkContext{}, fileLinkProto, pbn)
168+
link, sz, err := sizedStore(ls, fileLinkProto, pbn)
170169
if err != nil {
171170
return nil, 0, err
172171
}
173-
// calculate the dagpb node's size and add as overhead.
174-
cl, ok := link.(cidlink.Link)
175-
if !ok {
176-
return nil, 0, fmt.Errorf("unexpected non-cid linksystem")
177-
}
178-
rawlnk := cid.NewCidV1(uint64(multicodec.Raw), cl.Cid.Hash())
179-
rn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: rawlnk}, basicnode.Prototype__Bytes{})
180-
if err != nil {
181-
return nil, 0, fmt.Errorf("could not re-interpret dagpb node as bytes: %w", err)
182-
}
183-
rnb, err := rn.AsBytes()
184-
if err != nil {
185-
return nil, 0, fmt.Errorf("could not parse dagpb node as bytes: %w", err)
186-
}
187-
return link, totalSize + uint64(len(rnb)), nil
172+
return link, totalSize + sz, nil
188173
}
189174

190175
// BuildUnixFSDirectoryEntry creates the link to a file or directory as it appears within a unixfs directory.
@@ -256,25 +241,7 @@ func BuildUnixFSSymlink(content string, ls *ipld.LinkSystem) (ipld.Link, uint64,
256241
}
257242
pbn := dpbb.Build()
258243

259-
link, err := ls.Store(ipld.LinkContext{}, fileLinkProto, pbn)
260-
if err != nil {
261-
return nil, 0, err
262-
}
263-
// calculate the size and add as overhead.
264-
cl, ok := link.(cidlink.Link)
265-
if !ok {
266-
return nil, 0, fmt.Errorf("unexpected non-cid linksystem")
267-
}
268-
rawlnk := cid.NewCidV1(uint64(multicodec.Raw), cl.Cid.Hash())
269-
rn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: rawlnk}, basicnode.Prototype__Bytes{})
270-
if err != nil {
271-
return nil, 0, fmt.Errorf("could not re-interpret dagpb node as bytes: %w", err)
272-
}
273-
rnb, err := rn.AsBytes()
274-
if err != nil {
275-
return nil, 0, fmt.Errorf("could not re-interpret dagpb node as bytes: %w", err)
276-
}
277-
return link, uint64(len(rnb)), nil
244+
return sizedStore(ls, fileLinkProto, pbn)
278245
}
279246

280247
// Constants below are from

0 commit comments

Comments
 (0)