Skip to content

Commit ae19a78

Browse files
authored
Add test for s16 audio format (#576)
1 parent 05454ef commit ae19a78

File tree

5 files changed

+174
-1
lines changed

5 files changed

+174
-1
lines changed

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1716,7 +1716,10 @@ void VideoDecoder::createSwrContext(
17161716
TORCH_CHECK(
17171717
status == AVSUCCESS,
17181718
"Couldn't initialize SwrContext: ",
1719-
getFFMPEGErrorStringFromErrorCode(status));
1719+
getFFMPEGErrorStringFromErrorCode(status),
1720+
". If the error says 'Invalid argument', it's likely that you are using "
1721+
"a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
1722+
"valid scenarios. Try to upgrade FFmpeg?");
17201723
streamInfo.swrContext.reset(swrContext);
17211724
}
17221725

test/decoders/test_decoders.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
NASA_AUDIO,
2626
NASA_AUDIO_MP3,
2727
NASA_VIDEO,
28+
SINE_MONO_S16,
2829
SINE_MONO_S32,
2930
)
3031

@@ -1088,3 +1089,24 @@ def test_format_conversion(self):
10881089

10891090
reference_frames = asset.get_frame_data_by_range(start=0, stop=asset.num_frames)
10901091
torch.testing.assert_close(all_samples.data, reference_frames)
1092+
1093+
def test_s16_ffmpeg4_bug(self):
1094+
# s16 fails on FFmpeg4 but can be decoded on other versions.
1095+
# Debugging logs show that we're hitting:
1096+
# [SWR @ 0x560a7abdaf80] Input channel count and layout are unset
1097+
# which seems to point to:
1098+
# https://github.com/FFmpeg/FFmpeg/blob/40a6963fbd0c47be358a3760480180b7b532e1e9/libswresample/swresample.c#L293-L305
1099+
# ¯\_(ツ)_/¯
1100+
1101+
asset = SINE_MONO_S16
1102+
decoder = AudioDecoder(asset.path)
1103+
assert decoder.metadata.sample_rate == asset.sample_rate
1104+
assert decoder.metadata.sample_format == asset.sample_format
1105+
1106+
cm = (
1107+
pytest.raises(RuntimeError, match="Invalid argument")
1108+
if get_ffmpeg_major_version() == 4
1109+
else contextlib.nullcontext()
1110+
)
1111+
with cm:
1112+
decoder.get_samples_played_in_range(start_seconds=0)

test/resources/sine_mono_s16.wav

125 KB
Binary file not shown.
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
[
2+
{
3+
"duration_time": "0.128000",
4+
"pts_time": "0.000000"
5+
},
6+
{
7+
"duration_time": "0.128000",
8+
"pts_time": "0.128000"
9+
},
10+
{
11+
"duration_time": "0.128000",
12+
"pts_time": "0.256000"
13+
},
14+
{
15+
"duration_time": "0.128000",
16+
"pts_time": "0.384000"
17+
},
18+
{
19+
"duration_time": "0.128000",
20+
"pts_time": "0.512000"
21+
},
22+
{
23+
"duration_time": "0.128000",
24+
"pts_time": "0.640000"
25+
},
26+
{
27+
"duration_time": "0.128000",
28+
"pts_time": "0.768000"
29+
},
30+
{
31+
"duration_time": "0.128000",
32+
"pts_time": "0.896000"
33+
},
34+
{
35+
"duration_time": "0.128000",
36+
"pts_time": "1.024000"
37+
},
38+
{
39+
"duration_time": "0.128000",
40+
"pts_time": "1.152000"
41+
},
42+
{
43+
"duration_time": "0.128000",
44+
"pts_time": "1.280000"
45+
},
46+
{
47+
"duration_time": "0.128000",
48+
"pts_time": "1.408000"
49+
},
50+
{
51+
"duration_time": "0.128000",
52+
"pts_time": "1.536000"
53+
},
54+
{
55+
"duration_time": "0.128000",
56+
"pts_time": "1.664000"
57+
},
58+
{
59+
"duration_time": "0.128000",
60+
"pts_time": "1.792000"
61+
},
62+
{
63+
"duration_time": "0.128000",
64+
"pts_time": "1.920000"
65+
},
66+
{
67+
"duration_time": "0.128000",
68+
"pts_time": "2.048000"
69+
},
70+
{
71+
"duration_time": "0.128000",
72+
"pts_time": "2.176000"
73+
},
74+
{
75+
"duration_time": "0.128000",
76+
"pts_time": "2.304000"
77+
},
78+
{
79+
"duration_time": "0.128000",
80+
"pts_time": "2.432000"
81+
},
82+
{
83+
"duration_time": "0.128000",
84+
"pts_time": "2.560000"
85+
},
86+
{
87+
"duration_time": "0.128000",
88+
"pts_time": "2.688000"
89+
},
90+
{
91+
"duration_time": "0.128000",
92+
"pts_time": "2.816000"
93+
},
94+
{
95+
"duration_time": "0.128000",
96+
"pts_time": "2.944000"
97+
},
98+
{
99+
"duration_time": "0.128000",
100+
"pts_time": "3.072000"
101+
},
102+
{
103+
"duration_time": "0.128000",
104+
"pts_time": "3.200000"
105+
},
106+
{
107+
"duration_time": "0.128000",
108+
"pts_time": "3.328000"
109+
},
110+
{
111+
"duration_time": "0.128000",
112+
"pts_time": "3.456000"
113+
},
114+
{
115+
"duration_time": "0.128000",
116+
"pts_time": "3.584000"
117+
},
118+
{
119+
"duration_time": "0.128000",
120+
"pts_time": "3.712000"
121+
},
122+
{
123+
"duration_time": "0.128000",
124+
"pts_time": "3.840000"
125+
},
126+
{
127+
"duration_time": "0.032000",
128+
"pts_time": "3.968000"
129+
}
130+
]

test/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,24 @@ def sample_format(self) -> str:
462462
},
463463
)
464464

465+
466+
# Same sample rate as SINE_MONO_S32, but encoded as s16 instead of s32. Generated with:
467+
# ffmpeg -i test/resources/sine_mono_s32.wav -ar 16000 -c:a pcm_s16le test/resources/sine_mono_s16.wav
468+
SINE_MONO_S16 = TestAudio(
469+
filename="sine_mono_s16.wav",
470+
default_stream_index=0,
471+
frames={}, # Automatically loaded from json file
472+
stream_infos={
473+
0: TestAudioStreamInfo(
474+
sample_rate=16_000,
475+
num_channels=1,
476+
duration_seconds=4,
477+
num_frames=63,
478+
sample_format="s16",
479+
)
480+
},
481+
)
482+
465483
H265_VIDEO = TestVideo(
466484
filename="h265_video.mp4",
467485
default_stream_index=0,

0 commit comments

Comments
 (0)