Fix first PTS bug (#565)

NicolasHug · web-flow · commit 28e150312bb5 · 2025-03-17T13:24:15.000Z
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -467,6 +467,7 @@ void VideoDecoder::addStream(
   TORCH_CHECK_EQ(retVal, AVSUCCESS);
 
   streamInfo.codecContext->thread_count = ffmpegThreadCount.value_or(0);
+  streamInfo.codecContext->pkt_timebase = streamInfo.stream->time_base;
 
   // TODO_CODE_QUALITY same as above.
   if (mediaType == AVMEDIA_TYPE_VIDEO && device.type() == torch::kCUDA) {
diff --git a/test/decoders/test_decoders.py b/test/decoders/test_decoders.py
@@ -990,13 +990,7 @@ def test_get_all_samples(self, asset, stop_seconds):
         torch.testing.assert_close(samples.data, reference_frames)
         assert samples.sample_rate == asset.sample_rate
 
-        # TODO there's a bug with NASA_AUDIO_MP3: https://github.com/pytorch/torchcodec/issues/553
-        expected_pts = (
-            0.072
-            if asset is NASA_AUDIO_MP3
-            else asset.get_frame_info(idx=0).pts_seconds
-        )
-        assert samples.pts_seconds == expected_pts
+        assert samples.pts_seconds == asset.get_frame_info(idx=0).pts_seconds
 
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
     def test_at_frame_boundaries(self, asset):
@@ -1060,12 +1054,8 @@ def test_start_equals_stop(self, asset):
         assert samples.data.shape == (0, 0)
 
     def test_frame_start_is_not_zero(self):
-        # For NASA_AUDIO_MP3, the first frame is not at 0, it's at 0.072 [1].
+        # For NASA_AUDIO_MP3, the first frame is not at 0, it's at 0.138125.
         # So if we request start = 0.05, we shouldn't be truncating anything.
-        #
-        # [1] well, really it's at 0.138125, not 0.072 (see
-        # https://github.com/pytorch/torchcodec/issues/553), but for the purpose
-        # of this test it doesn't matter.
 
         asset = NASA_AUDIO_MP3
         start_seconds = 0.05  # this is less than the first frame's pts
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -826,6 +826,8 @@ def get_reference_frames(start_seconds, stop_seconds):
 
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
     def test_pts(self, asset):
+        # Non-regression test for
+        # https://github.com/pytorch/torchcodec/issues/553
         decoder = create_from_file(str(asset.path), seek_mode="approximate")
         add_audio_stream(decoder)
 
@@ -840,15 +842,7 @@ def test_pts(self, asset):
                 frames, asset.get_frame_data_by_index(frame_index)
             )
 
-            if asset is NASA_AUDIO_MP3 and frame_index == 0:
-                # TODO This is a bug. The 0.138125 is correct while 0.072 is
-                # incorrect, even though it comes from the decoded AVFrame's pts
-                # field.
-                # See https://github.com/pytorch/torchcodec/issues/553
-                assert pts_seconds == 0.072
-                assert start_seconds == 0.138125
-            else:
-                assert pts_seconds == start_seconds
+            assert pts_seconds == start_seconds
 
 
 if __name__ == "__main__":