From bd7c6ae2500546a087f37189ca136ef9586c250a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 27 Mar 2025 10:47:06 +0000 Subject: [PATCH 1/2] Fix implicit int64 to double conversion in audio seeks --- src/torchcodec/decoders/_core/VideoDecoder.cpp | 10 +++++----- src/torchcodec/decoders/_core/VideoDecoder.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 3d51b8a3..9d450201 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -912,7 +912,7 @@ VideoDecoder::AudioFramesOutput VideoDecoder::getFramesPlayedInRangeAudio( // If we need to seek backwards, then we have to seek back to the beginning // of the stream. // See [Audio Decoding Design]. - setCursorPtsInSecondsInternal(INT64_MIN); + setCursor(INT64_MIN); } // TODO-AUDIO Pre-allocate a long-enough tensor instead of creating a vec + @@ -971,13 +971,13 @@ void VideoDecoder::setCursorPtsInSeconds(double seconds) { // We don't allow public audio decoding APIs to seek, see [Audio Decoding // Design] validateActiveStream(AVMEDIA_TYPE_VIDEO); - setCursorPtsInSecondsInternal(seconds); + setCursor( + secondsToClosestPts(seconds, streamInfos_[activeStreamIndex_].timeBase)); } -void VideoDecoder::setCursorPtsInSecondsInternal(double seconds) { +void VideoDecoder::setCursor(int64_t pts) { cursorWasJustSet_ = true; - cursor_ = - secondsToClosestPts(seconds, streamInfos_[activeStreamIndex_].timeBase); + cursor_ = pts; } /* diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h index 4d3e2f2c..b367aa7b 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.h +++ b/src/torchcodec/decoders/_core/VideoDecoder.h @@ -367,7 +367,7 @@ class VideoDecoder { // DECODING APIS AND RELATED UTILS // -------------------------------------------------------------------------- - void setCursorPtsInSecondsInternal(double seconds); + void setCursor(int64_t pts); bool canWeAvoidSeeking() const; void maybeSeekToBeforeDesiredPts(); From 9dbf49f532f180da0ad51c2bc5ab5a9757f60d1c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 27 Mar 2025 11:31:06 +0000 Subject: [PATCH 2/2] Fix stopPts logic --- .../decoders/_core/VideoDecoder.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 9d450201..c759a47f 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -890,16 +890,15 @@ VideoDecoder::AudioFramesOutput VideoDecoder::getFramesPlayedInRangeAudio( std::optional stopSecondsOptional) { validateActiveStream(AVMEDIA_TYPE_AUDIO); - double stopSeconds = - stopSecondsOptional.value_or(std::numeric_limits::max()); - - TORCH_CHECK( - startSeconds <= stopSeconds, - "Start seconds (" + std::to_string(startSeconds) + - ") must be less than or equal to stop seconds (" + - std::to_string(stopSeconds) + ")."); + if (stopSecondsOptional.has_value()) { + TORCH_CHECK( + startSeconds <= *stopSecondsOptional, + "Start seconds (" + std::to_string(startSeconds) + + ") must be less than or equal to stop seconds (" + + std::to_string(*stopSecondsOptional) + ")."); + } - if (startSeconds == stopSeconds) { + if (stopSecondsOptional.has_value() && startSeconds == *stopSecondsOptional) { // For consistency with video return AudioFramesOutput{torch::empty({0, 0}), 0.0}; } @@ -921,7 +920,9 @@ VideoDecoder::AudioFramesOutput VideoDecoder::getFramesPlayedInRangeAudio( std::vector frames; std::optional firstFramePtsSeconds = std::nullopt; - auto stopPts = secondsToClosestPts(stopSeconds, streamInfo.timeBase); + auto stopPts = stopSecondsOptional.has_value() + ? secondsToClosestPts(*stopSecondsOptional, streamInfo.timeBase) + : INT64_MAX; auto finished = false; while (!finished) { try {