@@ -188,6 +188,7 @@ void VideoDecoder::initializeDecoder() {
188
188
// fps is numFrames / duration where
189
189
// - duration = numSamplesTotal / sampleRate and
190
190
// - numSamplesTotal = numSamplesPerFrame * numFrames
191
+ // so fps = numFrames * sampleRate / (numSamplesPerFrame * numFrames)
191
192
streamMetadata.averageFps =
192
193
static_cast <double >(sampleRate) / numSamplesPerFrame;
193
194
}
@@ -477,7 +478,8 @@ void VideoDecoder::addStream(
477
478
.value_or (avCodec));
478
479
}
479
480
480
- // TODO: For audio, we raise if seek_mode="approximate" and if the number of
481
+ // TODO_FRAME_SIZE_APPROXIMATE_MODE
482
+ // For audio, we raise if seek_mode="approximate" and if the number of
481
483
// samples per frame is unknown (frame_size field of codec params). But that's
482
484
// quite limitting. Ultimately, the most common type of call will be to decode
483
485
// an entire file from start to end (possibly with some offsets for start and
@@ -577,7 +579,7 @@ void VideoDecoder::addVideoStream(
577
579
void VideoDecoder::addAudioStream (int streamIndex) {
578
580
addStream (streamIndex, AVMEDIA_TYPE_AUDIO);
579
581
580
- // See correspodning TODO in makeFrameBatchOutput
582
+ // See TODO_FRAME_SIZE_BATCH_TENSOR_ALLOCATION
581
583
auto & streamInfo = streamInfos_[activeStreamIndex_];
582
584
TORCH_CHECK (
583
585
streamInfo.codecContext ->frame_size > 0 ,
@@ -1020,9 +1022,9 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
1020
1022
// fix for this is to let `getFramePlayedAt` convert the pts to an index,
1021
1023
// just like the rest of the APIs.
1022
1024
//
1023
- // TODO HOW DO WE FIX THIS??
1024
-
1025
- // A few notes:
1025
+ // TODO HOW DO WE ADDRESS THIS??
1026
+ //
1027
+ // A few more notes:
1026
1028
// - This offset trick does work for the first frame at pts=0: we'll seek to
1027
1029
// -1, and this leads to a first packet with pts=-1024 to be sent to the
1028
1030
// decoder (on our test data), leading to frame 0 to be correctly decoded.
@@ -1057,7 +1059,6 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
1057
1059
desiredPts,
1058
1060
desiredPts,
1059
1061
0 );
1060
-
1061
1062
if (ffmepgStatus < 0 ) {
1062
1063
throw std::runtime_error (
1063
1064
" Could not seek file to pts=" + std::to_string (desiredPts) + " : " +
@@ -1470,11 +1471,12 @@ VideoDecoder::FrameBatchOutput VideoDecoder::makeFrameBatchOutput(
1470
1471
containerMetadata_.allStreamMetadata [activeStreamIndex_];
1471
1472
return FrameBatchOutput (numFrames, videoStreamOptions, streamMetadata);
1472
1473
} else {
1474
+ // TODO_FRAME_SIZE_BATCH_TENSOR_ALLOCATION
1473
1475
// We asserted that frame_size is non-zero when we added the stream, but it
1474
1476
// may not always be the case.
1475
1477
// When it's 0, we can't pre-allocate the output tensor as we don't know the
1476
- // number of samples per channel, and it may be non-constant.
1477
- // TODO: handle this .
1478
+ // number of samples per channel, and it may be non-constant. We'll have to
1479
+ // find a way to make the batch-APIs work without pre-allocation .
1478
1480
int64_t numSamples = streamInfo.codecContext ->frame_size ;
1479
1481
int64_t numChannels = getNumChannels (streamInfo.codecContext );
1480
1482
return FrameBatchOutput (numFrames, numChannels, numSamples);
0 commit comments