Skip to content

Commit aa95895

Browse files
authored
Fix sdk/log record attr value limit (#6032)
Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
1 parent 58fdf2a commit aa95895

File tree

3 files changed

+188
-76
lines changed

3 files changed

+188
-76
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
3333
- Fix inconsistent request body closing in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`. (#5954)
3434
- Fix invalid exemplar keys in `go.opentelemetry.io/otel/exporters/prometheus`. (#5995)
3535
- Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/trace`. (#5997)
36+
- Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/log`. (#6032)
3637

3738
<!-- Released section -->
3839
<!-- Don't change this section unless doing release -->

sdk/log/record.go

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
406406
case log.KindString:
407407
s := val.AsString()
408408
if len(s) > r.attributeValueLengthLimit {
409-
val = log.StringValue(truncate(s, r.attributeValueLengthLimit))
409+
val = log.StringValue(truncate(r.attributeValueLengthLimit, s))
410410
}
411411
case log.KindSlice:
412412
sl := val.AsSlice()
@@ -427,40 +427,78 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
427427
return val
428428
}
429429

430-
// truncate returns a copy of str truncated to have a length of at most n
431-
// characters. If the length of str is less than n, str itself is returned.
430+
// truncate returns a truncated version of s such that it contains less than
431+
// the limit number of characters. Truncation is applied by returning the limit
432+
// number of valid characters contained in s.
432433
//
433-
// The truncate of str ensures that no valid UTF-8 code point is split. The
434-
// copy returned will be less than n if a characters straddles the length
435-
// limit.
434+
// If limit is negative, it returns the original string.
436435
//
437-
// No truncation is performed if n is less than zero.
438-
func truncate(str string, n int) string {
439-
if n < 0 {
440-
return str
436+
// UTF-8 is supported. When truncating, all invalid characters are dropped
437+
// before applying truncation.
438+
//
439+
// If s already contains less than the limit number of bytes, it is returned
440+
// unchanged. No invalid characters are removed.
441+
func truncate(limit int, s string) string {
442+
// This prioritize performance in the following order based on the most
443+
// common expected use-cases.
444+
//
445+
// - Short values less than the default limit (128).
446+
// - Strings with valid encodings that exceed the limit.
447+
// - No limit.
448+
// - Strings with invalid encodings that exceed the limit.
449+
if limit < 0 || len(s) <= limit {
450+
return s
441451
}
442452

443-
// cut returns a copy of the s truncated to not exceed a length of n. If
444-
// invalid UTF-8 is encountered, s is returned with false. Otherwise, the
445-
// truncated copy will be returned with true.
446-
cut := func(s string) (string, bool) {
447-
var i int
448-
for i = 0; i < n; {
449-
r, size := utf8.DecodeRuneInString(s[i:])
450-
if r == utf8.RuneError {
451-
return s, false
453+
// Optimistically, assume all valid UTF-8.
454+
var b strings.Builder
455+
count := 0
456+
for i, c := range s {
457+
if c != utf8.RuneError {
458+
count++
459+
if count > limit {
460+
return s[:i]
452461
}
453-
if i+size > n {
454-
break
455-
}
456-
i += size
462+
continue
463+
}
464+
465+
_, size := utf8.DecodeRuneInString(s[i:])
466+
if size == 1 {
467+
// Invalid encoding.
468+
b.Grow(len(s) - 1)
469+
_, _ = b.WriteString(s[:i])
470+
s = s[i:]
471+
break
457472
}
458-
return s[:i], true
459473
}
460474

461-
cp, ok := cut(str)
462-
if !ok {
463-
cp, _ = cut(strings.ToValidUTF8(str, ""))
475+
// Fast-path, no invalid input.
476+
if b.Cap() == 0 {
477+
return s
464478
}
465-
return cp
479+
480+
// Truncate while validating UTF-8.
481+
for i := 0; i < len(s) && count < limit; {
482+
c := s[i]
483+
if c < utf8.RuneSelf {
484+
// Optimization for single byte runes (common case).
485+
_ = b.WriteByte(c)
486+
i++
487+
count++
488+
continue
489+
}
490+
491+
_, size := utf8.DecodeRuneInString(s[i:])
492+
if size == 1 {
493+
// We checked for all 1-byte runes above, this is a RuneError.
494+
i++
495+
continue
496+
}
497+
498+
_, _ = b.WriteString(s[i : i+size])
499+
i += size
500+
count++
501+
}
502+
503+
return b.String()
466504
}

sdk/log/record_test.go

Lines changed: 121 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -570,74 +570,147 @@ func assertKV(t *testing.T, r Record, kv log.KeyValue) {
570570
}
571571

572572
func TestTruncate(t *testing.T) {
573-
testcases := []struct {
574-
input, want string
575-
limit int
573+
type group struct {
574+
limit int
575+
input string
576+
expected string
577+
}
578+
579+
tests := []struct {
580+
name string
581+
groups []group
576582
}{
583+
// Edge case: limit is negative, no truncation should occur
577584
{
578-
input: "value",
579-
want: "value",
580-
limit: -1,
581-
},
582-
{
583-
input: "value",
584-
want: "",
585-
limit: 0,
586-
},
587-
{
588-
input: "value",
589-
want: "v",
590-
limit: 1,
591-
},
592-
{
593-
input: "value",
594-
want: "va",
595-
limit: 2,
585+
name: "NoTruncation",
586+
groups: []group{
587+
{-1, "No truncation!", "No truncation!"},
588+
},
596589
},
590+
591+
// Edge case: string is already shorter than the limit, no truncation
592+
// should occur
597593
{
598-
input: "value",
599-
want: "val",
600-
limit: 3,
594+
name: "ShortText",
595+
groups: []group{
596+
{10, "Short text", "Short text"},
597+
{15, "Short text", "Short text"},
598+
{100, "Short text", "Short text"},
599+
},
601600
},
601+
602+
// Edge case: truncation happens with ASCII characters only
602603
{
603-
input: "value",
604-
want: "valu",
605-
limit: 4,
604+
name: "ASCIIOnly",
605+
groups: []group{
606+
{1, "Hello World!", "H"},
607+
{5, "Hello World!", "Hello"},
608+
{12, "Hello World!", "Hello World!"},
609+
},
606610
},
611+
612+
// Truncation including multi-byte characters (UTF-8)
607613
{
608-
input: "value",
609-
want: "value",
610-
limit: 5,
614+
name: "ValidUTF-8",
615+
groups: []group{
616+
{7, "Hello, 世界", "Hello, "},
617+
{8, "Hello, 世界", "Hello, 世"},
618+
{2, "こんにちは", "こん"},
619+
{3, "こんにちは", "こんに"},
620+
{5, "こんにちは", "こんにちは"},
621+
{12, "こんにちは", "こんにちは"},
622+
},
611623
},
624+
625+
// Truncation with invalid UTF-8 characters
612626
{
613-
input: "value",
614-
want: "value",
615-
limit: 6,
627+
name: "InvalidUTF-8",
628+
groups: []group{
629+
{11, "Invalid\x80text", "Invalidtext"},
630+
// Do not modify invalid text if equal to limit.
631+
{11, "Valid text\x80", "Valid text\x80"},
632+
// Do not modify invalid text if under limit.
633+
{15, "Valid text\x80", "Valid text\x80"},
634+
{5, "Hello\x80World", "Hello"},
635+
{11, "Hello\x80World\x80!", "HelloWorld!"},
636+
{15, "Hello\x80World\x80Test", "HelloWorldTest"},
637+
{15, "Hello\x80\x80\x80World\x80Test", "HelloWorldTest"},
638+
{15, "\x80\x80\x80Hello\x80\x80\x80World\x80Test\x80\x80", "HelloWorldTest"},
639+
},
616640
},
641+
642+
// Truncation with mixed validn and invalid UTF-8 characters
617643
{
618-
input: "€€€€", // 3 bytes each
619-
want: "€€€",
620-
limit: 10,
644+
name: "MixedUTF-8",
645+
groups: []group{
646+
{6, "€"[0:2] + "hello€€", "hello€"},
647+
{6, "€" + "€"[0:2] + "hello", "€hello"},
648+
{11, "Valid text\x80📜", "Valid text📜"},
649+
{11, "Valid text📜\x80", "Valid text📜"},
650+
{14, "😊 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"},
651+
{14, "😊\x80 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"},
652+
{14, "😊\x80 Hello\x80World🌍\x80🚀", "😊 HelloWorld🌍🚀"},
653+
{14, "😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"},
654+
{14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"},
655+
},
621656
},
657+
658+
// Edge case: empty string, should return empty string
622659
{
623-
input: "€"[0:2] + "hello€€", // corrupted first rune, then over limit
624-
want: "hello€",
625-
limit: 10,
660+
name: "Empty",
661+
groups: []group{
662+
{5, "", ""},
663+
},
626664
},
665+
666+
// Edge case: limit is 0, should return an empty string
627667
{
628-
input: "€"[0:2] + "hello", // corrupted first rune, then not over limit
629-
want: "hello",
630-
limit: 10,
668+
name: "Zero",
669+
groups: []group{
670+
{0, "Some text", ""},
671+
{0, "", ""},
672+
},
631673
},
632674
}
633675

634-
for _, tc := range testcases {
635-
name := fmt.Sprintf("%s/%d", tc.input, tc.limit)
636-
t.Run(name, func(t *testing.T) {
637-
t.Log(tc.input, len(tc.input), tc.limit)
638-
assert.Equal(t, tc.want, truncate(tc.input, tc.limit))
639-
})
676+
for _, tt := range tests {
677+
for _, g := range tt.groups {
678+
t.Run(tt.name, func(t *testing.T) {
679+
t.Parallel()
680+
681+
got := truncate(g.limit, g.input)
682+
assert.Equalf(
683+
t, g.expected, got,
684+
"input: %q([]rune%v))\ngot: %q([]rune%v)\nwant %q([]rune%v)",
685+
g.input, []rune(g.input),
686+
got, []rune(got),
687+
g.expected, []rune(g.expected),
688+
)
689+
})
690+
}
691+
}
692+
}
693+
694+
func BenchmarkTruncate(b *testing.B) {
695+
run := func(limit int, input string) func(b *testing.B) {
696+
return func(b *testing.B) {
697+
b.ReportAllocs()
698+
b.RunParallel(func(pb *testing.PB) {
699+
var out string
700+
for pb.Next() {
701+
out = truncate(limit, input)
702+
}
703+
_ = out
704+
})
705+
}
640706
}
707+
b.Run("Unlimited", run(-1, "hello 😊 world 🌍🚀"))
708+
b.Run("Zero", run(0, "Some text"))
709+
b.Run("Short", run(10, "Short Text"))
710+
b.Run("ASCII", run(5, "Hello, World!"))
711+
b.Run("ValidUTF-8", run(10, "hello 😊 world 🌍🚀"))
712+
b.Run("InvalidUTF-8", run(6, "€"[0:2]+"hello€€"))
713+
b.Run("MixedUTF-8", run(14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80"))
641714
}
642715

643716
func BenchmarkWalkAttributes(b *testing.B) {

0 commit comments

Comments
 (0)