diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 114e8600..1e0e75c3 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -13,10 +13,8 @@ torch::Tensor validateWf(torch::Tensor wf) { wf.dtype() == torch::kFloat32, "waveform must have float32 dtype, got ", wf.dtype()); - // TODO-ENCODING check contiguity of the input wf to ensure that it is indeed - // planar (fltp). TORCH_CHECK(wf.dim() == 2, "waveform must have 2 dimensions, got ", wf.dim()); - return wf; + return wf.contiguous(); } void validateSampleRate(const AVCodec& avCodec, int sampleRate) { diff --git a/test/test_ops.py b/test/test_ops.py index d7244b39..ddca330a 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1267,6 +1267,39 @@ def test_encode_to_tensor_long_output(self): torch.testing.assert_close(self.decode(encoded_tensor), samples) + def test_contiguity(self): + # Ensure that 2 waveforms with the same values are encoded in the same + # way, regardless of their memory layout. Here we encode 2 equal + # waveforms, one is row-aligned while the other is column-aligned. + + num_samples = 10_000 # per channel + contiguous_samples = torch.rand(2, num_samples).contiguous() + assert contiguous_samples.stride() == (num_samples, 1) + + encoded_from_contiguous = encode_audio_to_tensor( + wf=contiguous_samples, + sample_rate=16_000, + format="flac", + bit_rate=44_000, + ) + non_contiguous_samples = contiguous_samples.T.contiguous().T + assert non_contiguous_samples.stride() == (1, 2) + + torch.testing.assert_close( + contiguous_samples, non_contiguous_samples, rtol=0, atol=0 + ) + + encoded_from_non_contiguous = encode_audio_to_tensor( + wf=non_contiguous_samples, + sample_rate=16_000, + format="flac", + bit_rate=44_000, + ) + + torch.testing.assert_close( + encoded_from_contiguous, encoded_from_non_contiguous, rtol=0, atol=0 + ) + if __name__ == "__main__": pytest.main()