some comments

NicolasHug · NicolasHug · commit a89287d327af · 2025-02-18T14:41:13.000Z
diff --git a/src/torchcodec/_frame.py b/src/torchcodec/_frame.py
@@ -41,6 +41,7 @@ class Frame(Iterable):
     def __post_init__(self):
         # This is called after __init__() when a Frame is created. We can run
         # input validation checks here.
+
         if not self.data.ndim == 3:
             raise ValueError(f"data must be 3-dimensional, got {self.data.shape = }")
         self.pts_seconds = float(self.pts_seconds)
diff --git a/src/torchcodec/decoders/_core/CMakeLists.txt b/src/torchcodec/decoders/_core/CMakeLists.txt
@@ -4,8 +4,7 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 find_package(Torch REQUIRED)
-# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall ${TORCH_CXX_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
 find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
 
 function(make_torchcodec_library library_name ffmpeg_target)
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
@@ -70,7 +70,8 @@ int getNumChannels(const AVFrame* avFrame) {
 }
 
 int getNumChannels(const UniqueAVCodecContext& avCodecContext) {
-// TODO not sure about the bounds of the versions here
+// Not sure about the exactness of the version bounds, but as long as this
+// compile we're fine.
 #if LIBAVFILTER_VERSION_MAJOR > 8 || \
     (IBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
   return avCodecContext->ch_layout.nb_channels;
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -188,6 +188,7 @@ void VideoDecoder::initializeDecoder() {
         // fps is numFrames / duration where
         // - duration = numSamplesTotal / sampleRate and
         // - numSamplesTotal = numSamplesPerFrame * numFrames
+        // so fps = numFrames * sampleRate / (numSamplesPerFrame * numFrames)
         streamMetadata.averageFps =
             static_cast<double>(sampleRate) / numSamplesPerFrame;
       }
@@ -477,7 +478,8 @@ void VideoDecoder::addStream(
             .value_or(avCodec));
   }
 
-  // TODO: For audio, we raise if seek_mode="approximate" and if the number of
+  // TODO_FRAME_SIZE_APPROXIMATE_MODE
+  // For audio, we raise if seek_mode="approximate" and if the number of
   // samples per frame is unknown (frame_size field of codec params). But that's
   // quite limitting. Ultimately, the most common type of call will be to decode
   // an entire file from start to end (possibly with some offsets for start and
@@ -577,7 +579,7 @@ void VideoDecoder::addVideoStream(
 void VideoDecoder::addAudioStream(int streamIndex) {
   addStream(streamIndex, AVMEDIA_TYPE_AUDIO);
 
-  // See correspodning TODO in makeFrameBatchOutput
+  // See TODO_FRAME_SIZE_BATCH_TENSOR_ALLOCATION
   auto& streamInfo = streamInfos_[activeStreamIndex_];
   TORCH_CHECK(
       streamInfo.codecContext->frame_size > 0,
@@ -1020,9 +1022,9 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
     //   fix for this is to let `getFramePlayedAt` convert the pts to an index,
     //   just like the rest of the APIs.
     //
-    // TODO HOW DO WE FIX THIS??
-
-    // A few notes:
+    // TODO HOW DO WE ADDRESS THIS??
+    //
+    // A few more notes:
     // - This offset trick does work for the first frame at pts=0: we'll seek to
     //   -1, and this leads to a first packet with pts=-1024 to be sent to the
     //   decoder (on our test data), leading to frame 0 to be correctly decoded.
@@ -1057,7 +1059,6 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
       desiredPts,
       desiredPts,
       0);
-
   if (ffmepgStatus < 0) {
     throw std::runtime_error(
         "Could not seek file to pts=" + std::to_string(desiredPts) + ": " +
@@ -1470,11 +1471,12 @@ VideoDecoder::FrameBatchOutput VideoDecoder::makeFrameBatchOutput(
         containerMetadata_.allStreamMetadata[activeStreamIndex_];
     return FrameBatchOutput(numFrames, videoStreamOptions, streamMetadata);
   } else {
+    // TODO_FRAME_SIZE_BATCH_TENSOR_ALLOCATION
     // We asserted that frame_size is non-zero when we added the stream, but it
     // may not always be the case.
     // When it's 0, we can't pre-allocate the output tensor as we don't know the
-    // number of samples per channel, and it may be non-constant.
-    // TODO: handle this.
+    // number of samples per channel, and it may be non-constant. We'll have to
+    // find a way to make the batch-APIs work without pre-allocation.
     int64_t numSamples = streamInfo.codecContext->frame_size;
     int64_t numChannels = getNumChannels(streamInfo.codecContext);
     return FrameBatchOutput(numFrames, numChannels, numSamples);
diff --git a/test/utils.py b/test/utils.py
@@ -25,7 +25,7 @@ def cpu_and_cuda():
 
 def assert_frames_equal(*args, **kwargs):
     frame = args[0]
-    # This heuristic will work until we start returningu int8 audio frames...
+    # This heuristic will work until we start returning uint8 audio frames...
     if frame.dtype == torch.uint8:
         return assert_video_frames_equal(*args, **kwargs)
     else:

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,8 @@ int getNumChannels(const AVFrame* avFrame) {`
`70`	`70`	`}`
`71`	`71`
`72`	`72`	`int getNumChannels(const UniqueAVCodecContext& avCodecContext) {`
`73`		`-// TODO not sure about the bounds of the versions here`
	`73`	`+// Not sure about the exactness of the version bounds, but as long as this`
	`74`	`+// compile we're fine.`
`74`	`75`	`#if LIBAVFILTER_VERSION_MAJOR > 8 \|\| \`
`75`	`76`	`(IBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)`
`76`	`77`	`return avCodecContext->ch_layout.nb_channels;`