-
Notifications
You must be signed in to change notification settings - Fork 39
Remove multi-stream related code #483
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
cfadb8c
"Remove multi-stream related code"
NicolasHug 72d3cf3
Merge branch 'main' of github.com:pytorch/torchcodec into demuxxxx
NicolasHug 780469b
Merge branch 'main' of github.com:pytorch/torchcodec into demuxxxx
NicolasHug f352cfc
Use NO_ACTIVE_STREAM
NicolasHug File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -435,11 +435,9 @@ int VideoDecoder::getBestStreamIndex(AVMediaType mediaType) { | |
void VideoDecoder::addVideoStreamDecoder( | ||
int preferredStreamIndex, | ||
const VideoStreamOptions& videoStreamOptions) { | ||
if (activeStreamIndices_.count(preferredStreamIndex) > 0) { | ||
throw std::invalid_argument( | ||
"Stream with index " + std::to_string(preferredStreamIndex) + | ||
" is already active."); | ||
} | ||
TORCH_CHECK( | ||
activeStreamIndex_ == NO_ACTIVE_STREAM, | ||
"Can only add one single stream."); | ||
TORCH_CHECK(formatContext_.get() != nullptr); | ||
|
||
AVCodecOnlyUseForCallingAVFindBestStream avCodec = nullptr; | ||
|
@@ -506,7 +504,7 @@ void VideoDecoder::addVideoStreamDecoder( | |
} | ||
|
||
codecContext->time_base = streamInfo.stream->time_base; | ||
activeStreamIndices_.insert(streamIndex); | ||
activeStreamIndex_ = streamIndex; | ||
updateMetadataWithCodecContext(streamInfo.streamIndex, codecContext); | ||
streamInfo.videoStreamOptions = videoStreamOptions; | ||
|
||
|
@@ -726,53 +724,39 @@ bool VideoDecoder::canWeAvoidSeekingForStream( | |
// AVFormatContext if it is needed. We can skip seeking in certain cases. See | ||
// the comment of canWeAvoidSeeking() for details. | ||
void VideoDecoder::maybeSeekToBeforeDesiredPts() { | ||
if (activeStreamIndices_.size() == 0) { | ||
if (activeStreamIndex_ == NO_ACTIVE_STREAM) { | ||
return; | ||
} | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streamInfos_[streamIndex]; | ||
// clang-format off: clang format clashes | ||
streamInfo.discardFramesBeforePts = secondsToClosestPts(*desiredPtsSeconds_, streamInfo.timeBase); | ||
// clang-format on | ||
} | ||
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_]; | ||
streamInfo.discardFramesBeforePts = | ||
secondsToClosestPts(*desiredPtsSeconds_, streamInfo.timeBase); | ||
|
||
decodeStats_.numSeeksAttempted++; | ||
// See comment for canWeAvoidSeeking() for details on why this optimization | ||
// works. | ||
bool mustSeek = false; | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streamInfos_[streamIndex]; | ||
int64_t desiredPtsForStream = *desiredPtsSeconds_ * streamInfo.timeBase.den; | ||
if (!canWeAvoidSeekingForStream( | ||
streamInfo, streamInfo.currentPts, desiredPtsForStream)) { | ||
mustSeek = true; | ||
break; | ||
} | ||
} | ||
if (!mustSeek) { | ||
|
||
int64_t desiredPtsForStream = *desiredPtsSeconds_ * streamInfo.timeBase.den; | ||
if (canWeAvoidSeekingForStream( | ||
streamInfo, streamInfo.currentPts, desiredPtsForStream)) { | ||
decodeStats_.numSeeksSkipped++; | ||
return; | ||
} | ||
int firstActiveStreamIndex = *activeStreamIndices_.begin(); | ||
const auto& firstStreamInfo = streamInfos_[firstActiveStreamIndex]; | ||
int64_t desiredPts = | ||
secondsToClosestPts(*desiredPtsSeconds_, firstStreamInfo.timeBase); | ||
secondsToClosestPts(*desiredPtsSeconds_, streamInfo.timeBase); | ||
|
||
// For some encodings like H265, FFMPEG sometimes seeks past the point we | ||
// set as the max_ts. So we use our own index to give it the exact pts of | ||
// the key frame that we want to seek to. | ||
// See https://github.com/pytorch/torchcodec/issues/179 for more details. | ||
// See https://trac.ffmpeg.org/ticket/11137 for the underlying ffmpeg bug. | ||
if (!firstStreamInfo.keyFrames.empty()) { | ||
if (!streamInfo.keyFrames.empty()) { | ||
int desiredKeyFrameIndex = getKeyFrameIndexForPtsUsingScannedIndex( | ||
firstStreamInfo.keyFrames, desiredPts); | ||
streamInfo.keyFrames, desiredPts); | ||
desiredKeyFrameIndex = std::max(desiredKeyFrameIndex, 0); | ||
desiredPts = firstStreamInfo.keyFrames[desiredKeyFrameIndex].pts; | ||
desiredPts = streamInfo.keyFrames[desiredKeyFrameIndex].pts; | ||
} | ||
|
||
int ffmepgStatus = avformat_seek_file( | ||
formatContext_.get(), | ||
firstStreamInfo.streamIndex, | ||
streamInfo.streamIndex, | ||
INT64_MIN, | ||
desiredPts, | ||
desiredPts, | ||
|
@@ -783,15 +767,12 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() { | |
getFFMPEGErrorStringFromErrorCode(ffmepgStatus)); | ||
} | ||
decodeStats_.numFlushes++; | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streamInfos_[streamIndex]; | ||
avcodec_flush_buffers(streamInfo.codecContext.get()); | ||
} | ||
avcodec_flush_buffers(streamInfo.codecContext.get()); | ||
} | ||
|
||
VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( | ||
std::function<bool(int, AVFrame*)> filterFunction) { | ||
if (activeStreamIndices_.size() == 0) { | ||
std::function<bool(AVFrame*)> filterFunction) { | ||
if (activeStreamIndex_ == NO_ACTIVE_STREAM) { | ||
throw std::runtime_error("No active streams configured."); | ||
} | ||
|
||
|
@@ -803,44 +784,25 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( | |
desiredPtsSeconds_ = std::nullopt; | ||
} | ||
|
||
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_]; | ||
|
||
// Need to get the next frame or error from PopFrame. | ||
UniqueAVFrame avFrame(av_frame_alloc()); | ||
AutoAVPacket autoAVPacket; | ||
int ffmpegStatus = AVSUCCESS; | ||
bool reachedEOF = false; | ||
int frameStreamIndex = -1; | ||
while (true) { | ||
frameStreamIndex = -1; | ||
bool gotPermanentErrorOnAnyActiveStream = false; | ||
|
||
// Get a frame on an active stream. Note that we don't know ahead of time | ||
// which streams have frames to receive, so we linearly try the active | ||
// streams. | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streamInfos_[streamIndex]; | ||
ffmpegStatus = | ||
avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get()); | ||
|
||
if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR(EAGAIN)) { | ||
gotPermanentErrorOnAnyActiveStream = true; | ||
break; | ||
} | ||
ffmpegStatus = | ||
avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get()); | ||
|
||
if (ffmpegStatus == AVSUCCESS) { | ||
frameStreamIndex = streamIndex; | ||
break; | ||
} | ||
} | ||
|
||
if (gotPermanentErrorOnAnyActiveStream) { | ||
if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR(EAGAIN)) { | ||
// Non-retriable error | ||
break; | ||
} | ||
|
||
decodeStats_.numFramesReceivedByDecoder++; | ||
|
||
// Is this the kind of frame we're looking for? | ||
if (ffmpegStatus == AVSUCCESS && | ||
filterFunction(frameStreamIndex, avFrame.get())) { | ||
if (ffmpegStatus == AVSUCCESS && filterFunction(avFrame.get())) { | ||
// Yes, this is the frame we'll return; break out of the decoding loop. | ||
break; | ||
} else if (ffmpegStatus == AVSUCCESS) { | ||
|
@@ -865,18 +827,15 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( | |
decodeStats_.numPacketsRead++; | ||
|
||
if (ffmpegStatus == AVERROR_EOF) { | ||
// End of file reached. We must drain all codecs by sending a nullptr | ||
// End of file reached. We must drain the codec by sending a nullptr | ||
// packet. | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streamInfos_[streamIndex]; | ||
ffmpegStatus = avcodec_send_packet( | ||
streamInfo.codecContext.get(), | ||
/*avpkt=*/nullptr); | ||
if (ffmpegStatus < AVSUCCESS) { | ||
throw std::runtime_error( | ||
"Could not flush decoder: " + | ||
getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); | ||
} | ||
ffmpegStatus = avcodec_send_packet( | ||
streamInfo.codecContext.get(), | ||
/*avpkt=*/nullptr); | ||
if (ffmpegStatus < AVSUCCESS) { | ||
throw std::runtime_error( | ||
"Could not flush decoder: " + | ||
getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); | ||
} | ||
|
||
// We've reached the end of file so we can't read any more packets from | ||
|
@@ -892,15 +851,14 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( | |
getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); | ||
} | ||
|
||
if (activeStreamIndices_.count(packet->stream_index) == 0) { | ||
// This packet is not for any of the active streams. | ||
if (packet->stream_index != activeStreamIndex_) { | ||
continue; | ||
} | ||
|
||
// We got a valid packet. Send it to the decoder, and we'll receive it in | ||
// the next iteration. | ||
ffmpegStatus = avcodec_send_packet( | ||
streamInfos_[packet->stream_index].codecContext.get(), packet.get()); | ||
ffmpegStatus = | ||
avcodec_send_packet(streamInfo.codecContext.get(), packet.get()); | ||
if (ffmpegStatus < AVSUCCESS) { | ||
throw std::runtime_error( | ||
"Could not push packet to decoder: " + | ||
|
@@ -927,11 +885,10 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( | |
// haven't received as frames. Eventually we will either hit AVERROR_EOF from | ||
// av_receive_frame() or the user will have seeked to a different location in | ||
// the file and that will flush the decoder. | ||
StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex]; | ||
activeStreamInfo.currentPts = avFrame->pts; | ||
activeStreamInfo.currentDuration = getDuration(avFrame); | ||
streamInfo.currentPts = avFrame->pts; | ||
streamInfo.currentDuration = getDuration(avFrame); | ||
|
||
return AVFrameStream(std::move(avFrame), frameStreamIndex); | ||
return AVFrameStream(std::move(avFrame), activeStreamIndex_); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't really need this |
||
} | ||
|
||
VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput( | ||
|
@@ -1096,8 +1053,8 @@ VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAtNoDemux( | |
|
||
setCursorPtsInSeconds(seconds); | ||
AVFrameStream avFrameStream = | ||
decodeAVFrame([seconds, this](int frameStreamIndex, AVFrame* avFrame) { | ||
StreamInfo& streamInfo = streamInfos_[frameStreamIndex]; | ||
decodeAVFrame([seconds, this](AVFrame* avFrame) { | ||
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_]; | ||
double frameStartTime = ptsToSeconds(avFrame->pts, streamInfo.timeBase); | ||
double frameEndTime = ptsToSeconds( | ||
avFrame->pts + getDuration(avFrame), streamInfo.timeBase); | ||
|
@@ -1496,11 +1453,10 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemux() { | |
|
||
VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemuxInternal( | ||
std::optional<torch::Tensor> preAllocatedOutputTensor) { | ||
AVFrameStream avFrameStream = | ||
decodeAVFrame([this](int frameStreamIndex, AVFrame* avFrame) { | ||
StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex]; | ||
return avFrame->pts >= activeStreamInfo.discardFramesBeforePts; | ||
}); | ||
AVFrameStream avFrameStream = decodeAVFrame([this](AVFrame* avFrame) { | ||
StreamInfo& activeStreamInfo = streamInfos_[activeStreamIndex_]; | ||
return avFrame->pts >= activeStreamInfo.discardFramesBeforePts; | ||
}); | ||
return convertAVFrameToFrameOutput(avFrameStream, preAllocatedOutputTensor); | ||
} | ||
|
||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW, this is the "demux" part now. And I think we should be calling
av_read_frame
within its ownwhile
loop, for as long as the received packet isn't of the target stream.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, what we're currently doing is inefficient, because if the packet is not the right stream, we first have to make a call to
avcodec_receive_frame()
that we know will fail. We're still doing the correct thing (I think), it's just inefficient.