@@ -1436,6 +1436,11 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
1436
1436
convertedAVFrame->format = static_cast <int >(desiredSampleFormat);
1437
1437
convertedAVFrame->sample_rate = desiredSampleRate;
1438
1438
if (sourceSampleRate != desiredSampleRate) {
1439
+ // Note that this is an upper bound on the number of output samples.
1440
+ // `swr_convert()` will likely not fill convertedAVFrame with that many
1441
+ // samples, it will buffer the last few ones because those require future
1442
+ // samples. That's also why we reset nb_samples after the call to
1443
+ // `swr_convert()`.
1439
1444
convertedAVFrame->nb_samples = av_rescale_rnd (
1440
1445
swr_get_delay (streamInfo.swrContext .get (), sourceSampleRate) +
1441
1446
avFrame->nb_samples ,
@@ -1452,16 +1457,20 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
1452
1457
" Could not allocate frame buffers for sample format conversion: " ,
1453
1458
getFFMPEGErrorStringFromErrorCode (status));
1454
1459
1455
- auto numSampleConverted = swr_convert (
1460
+ auto numConvertedSamples = swr_convert (
1456
1461
streamInfo.swrContext .get (),
1457
1462
convertedAVFrame->data ,
1458
1463
convertedAVFrame->nb_samples ,
1459
1464
static_cast <const uint8_t **>(const_cast <const uint8_t **>(avFrame->data )),
1460
1465
avFrame->nb_samples );
1461
1466
TORCH_CHECK (
1462
- numSampleConverted > 0 ,
1467
+ numConvertedSamples > 0 ,
1463
1468
" Error in swr_convert: " ,
1464
- getFFMPEGErrorStringFromErrorCode (numSampleConverted));
1469
+ getFFMPEGErrorStringFromErrorCode (numConvertedSamples));
1470
+
1471
+ // See comment above about nb_samples
1472
+ convertedAVFrame->nb_samples = numConvertedSamples;
1473
+ // TODO need to flush properly to retrieve the last few samples.
1465
1474
1466
1475
return convertedAVFrame;
1467
1476
}
0 commit comments