Skip to content

Commit a89287d

Browse files
committed
some comments
1 parent e44b444 commit a89287d

File tree

5 files changed

+15
-12
lines changed

5 files changed

+15
-12
lines changed

src/torchcodec/_frame.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class Frame(Iterable):
4141
def __post_init__(self):
4242
# This is called after __init__() when a Frame is created. We can run
4343
# input validation checks here.
44+
4445
if not self.data.ndim == 3:
4546
raise ValueError(f"data must be 3-dimensional, got {self.data.shape = }")
4647
self.pts_seconds = float(self.pts_seconds)

src/torchcodec/decoders/_core/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ set(CMAKE_CXX_STANDARD 17)
44
set(CMAKE_CXX_STANDARD_REQUIRED ON)
55

66
find_package(Torch REQUIRED)
7-
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
8-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall ${TORCH_CXX_FLAGS}")
7+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
98
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
109

1110
function(make_torchcodec_library library_name ffmpeg_target)

src/torchcodec/decoders/_core/FFMPEGCommon.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ int getNumChannels(const AVFrame* avFrame) {
7070
}
7171

7272
int getNumChannels(const UniqueAVCodecContext& avCodecContext) {
73-
// TODO not sure about the bounds of the versions here
73+
// Not sure about the exactness of the version bounds, but as long as this
74+
// compile we're fine.
7475
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
7576
(IBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
7677
return avCodecContext->ch_layout.nb_channels;

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ void VideoDecoder::initializeDecoder() {
188188
// fps is numFrames / duration where
189189
// - duration = numSamplesTotal / sampleRate and
190190
// - numSamplesTotal = numSamplesPerFrame * numFrames
191+
// so fps = numFrames * sampleRate / (numSamplesPerFrame * numFrames)
191192
streamMetadata.averageFps =
192193
static_cast<double>(sampleRate) / numSamplesPerFrame;
193194
}
@@ -477,7 +478,8 @@ void VideoDecoder::addStream(
477478
.value_or(avCodec));
478479
}
479480

480-
// TODO: For audio, we raise if seek_mode="approximate" and if the number of
481+
// TODO_FRAME_SIZE_APPROXIMATE_MODE
482+
// For audio, we raise if seek_mode="approximate" and if the number of
481483
// samples per frame is unknown (frame_size field of codec params). But that's
482484
// quite limitting. Ultimately, the most common type of call will be to decode
483485
// an entire file from start to end (possibly with some offsets for start and
@@ -577,7 +579,7 @@ void VideoDecoder::addVideoStream(
577579
void VideoDecoder::addAudioStream(int streamIndex) {
578580
addStream(streamIndex, AVMEDIA_TYPE_AUDIO);
579581

580-
// See correspodning TODO in makeFrameBatchOutput
582+
// See TODO_FRAME_SIZE_BATCH_TENSOR_ALLOCATION
581583
auto& streamInfo = streamInfos_[activeStreamIndex_];
582584
TORCH_CHECK(
583585
streamInfo.codecContext->frame_size > 0,
@@ -1020,9 +1022,9 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
10201022
// fix for this is to let `getFramePlayedAt` convert the pts to an index,
10211023
// just like the rest of the APIs.
10221024
//
1023-
// TODO HOW DO WE FIX THIS??
1024-
1025-
// A few notes:
1025+
// TODO HOW DO WE ADDRESS THIS??
1026+
//
1027+
// A few more notes:
10261028
// - This offset trick does work for the first frame at pts=0: we'll seek to
10271029
// -1, and this leads to a first packet with pts=-1024 to be sent to the
10281030
// decoder (on our test data), leading to frame 0 to be correctly decoded.
@@ -1057,7 +1059,6 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
10571059
desiredPts,
10581060
desiredPts,
10591061
0);
1060-
10611062
if (ffmepgStatus < 0) {
10621063
throw std::runtime_error(
10631064
"Could not seek file to pts=" + std::to_string(desiredPts) + ": " +
@@ -1470,11 +1471,12 @@ VideoDecoder::FrameBatchOutput VideoDecoder::makeFrameBatchOutput(
14701471
containerMetadata_.allStreamMetadata[activeStreamIndex_];
14711472
return FrameBatchOutput(numFrames, videoStreamOptions, streamMetadata);
14721473
} else {
1474+
// TODO_FRAME_SIZE_BATCH_TENSOR_ALLOCATION
14731475
// We asserted that frame_size is non-zero when we added the stream, but it
14741476
// may not always be the case.
14751477
// When it's 0, we can't pre-allocate the output tensor as we don't know the
1476-
// number of samples per channel, and it may be non-constant.
1477-
// TODO: handle this.
1478+
// number of samples per channel, and it may be non-constant. We'll have to
1479+
// find a way to make the batch-APIs work without pre-allocation.
14781480
int64_t numSamples = streamInfo.codecContext->frame_size;
14791481
int64_t numChannels = getNumChannels(streamInfo.codecContext);
14801482
return FrameBatchOutput(numFrames, numChannels, numSamples);

test/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def cpu_and_cuda():
2525

2626
def assert_frames_equal(*args, **kwargs):
2727
frame = args[0]
28-
# This heuristic will work until we start returningu int8 audio frames...
28+
# This heuristic will work until we start returning uint8 audio frames...
2929
if frame.dtype == torch.uint8:
3030
return assert_video_frames_equal(*args, **kwargs)
3131
else:

0 commit comments

Comments
 (0)