Skip to content
This repository was archived by the owner on Jan 20, 2023. It is now read-only.

Commit 1ee0185

Browse files
committed
Check expected invariants while unmarshaling
We expect some things of our digests: - The centroids should be in order of increasing mean. - The digest's countTotal field should be the sum of the counts of the centroids (overflowing is forbidden). - Centroids should have non-negative counts. Fuzzing detected cases where these are not true.
1 parent 40d829c commit 1ee0185

File tree

3 files changed

+96
-10
lines changed

3 files changed

+96
-10
lines changed

fuzz.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,41 @@
22

33
package tdigest
44

5+
import (
6+
"bytes"
7+
"fmt"
8+
"log"
9+
10+
"github.com/davecgh/go-spew/spew"
11+
)
12+
513
func Fuzz(data []byte) int {
614
v := new(TDigest)
715
err := v.UnmarshalBinary(data)
816
if err != nil {
917
return 0
1018
}
11-
_, err = v.MarshalBinary()
19+
20+
remarshaled, err := v.MarshalBinary()
1221
if err != nil {
1322
panic(err)
1423
}
15-
return 1
1624

25+
if !bytes.HasPrefix(data, remarshaled) {
26+
panic(fmt.Sprintf("not equal: \n%v\nvs\n%v", data, remarshaled))
27+
}
28+
29+
for q := float64(0.1); q <= 1.0; q += 0.05 {
30+
prev, this := v.Quantile(q-0.1), v.Quantile(q)
31+
if prev-this > 1e-100 { // Floating point math makes this slightly imprecise.
32+
log.Printf("v: %s", spew.Sprint(v))
33+
log.Printf("q: %v", q)
34+
log.Printf("prev: %v", prev)
35+
log.Printf("this: %v", this)
36+
panic("quantiles should only increase")
37+
}
38+
}
39+
40+
v.Add(1, 1)
41+
return 1
1742
}

fuzz_test.go

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,44 @@
11
package tdigest
22

3-
import "testing"
3+
import (
4+
"bytes"
5+
"testing"
6+
7+
"github.com/davecgh/go-spew/spew"
8+
)
49

510
func TestFuzzPanicRegressions(t *testing.T) {
611
// This test contains a list of byte sequences discovered by
712
// github.com/dvyukov/go-fuzz which, at one time, caused tdigest to panic. The
813
// test just makes sure that they no longer cause a panic.
914
testcase := func(crasher []byte) func(*testing.T) {
10-
return func(*testing.T) {
15+
return func(t *testing.T) {
1116
v := new(TDigest)
1217
err := v.UnmarshalBinary(crasher)
1318
if err != nil {
1419
return
1520
}
16-
_, err = v.MarshalBinary()
21+
remarshaled, err := v.MarshalBinary()
1722
if err != nil {
18-
panic(err)
23+
t.Fatalf("marshal error: %v", err)
24+
}
25+
26+
if !bytes.HasPrefix(crasher, remarshaled) {
27+
t.Fatalf("not equal: \n%v\nvs\n%v", crasher, remarshaled)
28+
}
29+
30+
for q := float64(0.1); q <= 1.0; q += 0.05 {
31+
prev, this := v.Quantile(q-0.1), v.Quantile(q)
32+
if prev-this > 1e-100 { // Floating point math makes this slightly imprecise.
33+
t.Logf("v: %s", spew.Sprint(v))
34+
t.Logf("q: %v", q)
35+
t.Logf("prev: %v", prev)
36+
t.Logf("this: %v", this)
37+
t.Fatal("quantiles should only increase")
38+
}
1939
}
40+
41+
v.Add(1, 1)
2042
}
2143
}
2244
t.Run("fuzz1", testcase([]byte{
@@ -32,5 +54,31 @@ func TestFuzzPanicRegressions(t *testing.T) {
3254
0x37, 0x35, 0x37, 0x36, 0x37, 0x37, 0x37, 0x38,
3355
0x37, 0x39, 0x28,
3456
}))
35-
57+
t.Run("fuzz3", testcase([]byte{
58+
0x80, 0x0c, 0x01, 0x00, 0x00, 0x00, 0x30, 0x30,
59+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x02, 0x00,
60+
0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
61+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
62+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
63+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
64+
0x30, 0xbf,
65+
}))
66+
t.Run("fuzz4", testcase([]byte{
67+
0x80, 0x0c, 0x01, 0x00, 0x00, 0x00, 0x30, 0x30,
68+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x02, 0x00,
69+
0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
70+
0x30, 0x63, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
71+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
72+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
73+
0x30, 0x4e,
74+
}))
75+
t.Run("fuzz5", testcase([]byte{
76+
0x80, 0x0c, 0x01, 0x00, 0x00, 0x00, 0x30, 0x30,
77+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x02, 0x00,
78+
0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
79+
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
80+
0x30, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
81+
0x30, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
82+
0x92, 0x00,
83+
}))
3684
}

serde.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/binary"
66
"fmt"
77
"io"
8+
"math"
89
)
910

1011
const (
@@ -39,19 +40,19 @@ func unmarshalBinary(d *TDigest, p []byte) error {
3940
r := &binaryReader{r: bytes.NewReader(p)}
4041
r.readValue(&mv)
4142
if mv != magic {
42-
return fmt.Errorf("invalid header magic value, data might be corrupted: %d", mv)
43+
return fmt.Errorf("data corruption detected: invalid header magic value %d", mv)
4344
}
4445
r.readValue(&ev)
4546
if ev != encodingVersion {
46-
return fmt.Errorf("invalid encoding version: %d", ev)
47+
return fmt.Errorf("data corruption detected: invalid encoding version %d", ev)
4748
}
4849
r.readValue(&d.compression)
4950
r.readValue(&n)
5051
if r.err != nil {
5152
return r.err
5253
}
5354
if n < 0 {
54-
return fmt.Errorf("invalid n, cannot be negative: %v", n)
55+
return fmt.Errorf("data corruption detected: number of centroids cannot be negative, have %v", n)
5556
}
5657
if n > 1<<20 {
5758
return fmt.Errorf("invalid n, cannot be greater than 2^20: %v", n)
@@ -64,7 +65,19 @@ func unmarshalBinary(d *TDigest, p []byte) error {
6465
if r.err != nil {
6566
return r.err
6667
}
68+
if c.count < 0 {
69+
return fmt.Errorf("data corruption detected: negative count: %d", c.count)
70+
}
71+
if i > 0 {
72+
prev := d.centroids[i-1]
73+
if c.mean < prev.mean {
74+
return fmt.Errorf("data corruption detected: centroid %d has lower mean (%v) than preceding centroid %d (%v)", i, c.mean, i-1, prev.mean)
75+
}
76+
}
6777
d.centroids[i] = c
78+
if c.count > math.MaxInt64-d.countTotal {
79+
return fmt.Errorf("data corruption detected: centroid total size overflow")
80+
}
6881
d.countTotal += c.count
6982
}
7083

0 commit comments

Comments
 (0)