Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/torchcodec/_core/AVIOTensorContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,8 @@ AVIOToTensorContext::AVIOToTensorContext()
}

torch::Tensor AVIOToTensorContext::getOutputTensor() {
throw std::runtime_error(
"AVIOToTensorContext::getOutputTensor is not implemented yet.");
// return tensorContext_.data.narrow(
// /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos);
return tensorContext_.data.narrow(
/*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos);
}

} // namespace facebook::torchcodec
25 changes: 11 additions & 14 deletions src/torchcodec/_core/SingleStreamDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1030,15 +1030,15 @@ AudioFramesOutput SingleStreamDecoder::getFramesPlayedInRangeAudio(
frames.push_back(*lastSamples);
}

// TORCH_CHECK(
// frames.size() > 0 && firstFramePtsSeconds.has_value(),
// "No audio frames were decoded. ",
// "This is probably because start_seconds is too high(",
// startSeconds,
// "),",
// "or because stop_seconds(",
// stopSecondsOptional,
// ") is too low.");
TORCH_CHECK(
frames.size() > 0 && firstFramePtsSeconds.has_value(),
"No audio frames were decoded. ",
"This is probably because start_seconds is too high(",
startSeconds,
"),",
"or because stop_seconds(",
stopSecondsOptional,
") is too low.");

return AudioFramesOutput{torch::cat(frames, 1), *firstFramePtsSeconds};
}
Expand Down Expand Up @@ -1419,11 +1419,8 @@ std::optional<torch::Tensor> SingleStreamDecoder::maybeFlushSwrBuffers() {
auto actualNumRemainingSamples = swr_convert(
swrContext_.get(), outputBuffers.data(), numRemainingSamples, nullptr, 0);
Copy link

Copilot AI Apr 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

swr_convert() can return a negative value on error. Passing that directly as the length to lastSamples.narrow(...) will throw an unrelated/cryptic error (or potentially create an invalid slice). Add an explicit check for actualNumRemainingSamples < 0 and surface a clear failure (e.g., TORCH_CHECK(actualNumRemainingSamples >= 0, ...)) before narrowing; optionally also handle the == 0 case explicitly.

Suggested change
swrContext_.get(), outputBuffers.data(), numRemainingSamples, nullptr, 0);
swrContext_.get(), outputBuffers.data(), numRemainingSamples, nullptr, 0);
TORCH_CHECK(
actualNumRemainingSamples >= 0,
"swr_convert() failed while flushing buffered audio samples: ",
actualNumRemainingSamples);
if (actualNumRemainingSamples == 0) {
return std::nullopt;
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

先别管这些了,你看看 diff 减少的同时,test_paddle 有没有覆盖到


throw std::runtime_error(
"SingleStreamDecoder::maybeFlushSwrBuffers is not implemented yet.");

// return lastSamples.narrow(
// /*dim=*/1, /*start=*/0, /*length=*/actualNumRemainingSamples);
return lastSamples.narrow(
/*dim=*/1, /*start=*/0, /*length=*/actualNumRemainingSamples);
}

// --------------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions src/torchcodec/_core/custom_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
#include <string>
#include "c10/core/SymIntArrayRef.h"
#include "c10/util/Exception.h"
#include "torch/library.h"
#include "src/torchcodec/_core/AVIOFileLikeContext.h"
#include "src/torchcodec/_core/AVIOTensorContext.h"
#include "src/torchcodec/_core/Encoder.h"
#include "src/torchcodec/_core/SingleStreamDecoder.h"
#include "src/torchcodec/_core/ValidationUtils.h"
#include "torch/library.h"

namespace facebook::torchcodec {

Expand Down Expand Up @@ -118,7 +118,7 @@ OpsFrameOutput makeOpsFrameOutput(FrameOutput& frame) {
// frame.data,
// torch::tensor(frame.ptsSeconds, torch::dtype(torch::kFloat64)),
// torch::tensor(frame.durationSeconds, torch::dtype(torch::kFloat64)));
return std::make_tuple(
return std::make_tuple(
frame.data,
torch::full({}, frame.ptsSeconds, torch::kFloat64),
torch::full({}, frame.durationSeconds, torch::kFloat64));
Expand Down Expand Up @@ -920,15 +920,15 @@ void scan_all_streams_to_update_metadata(at::Tensor& decoder) {
videoDecoder->scanFileAndUpdateMetadataAndIndex();
}

TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
TORCH_LIBRARY_IMPL(torchcodec_ns, BackendSelect, m) {
m.impl("create_from_file", &create_from_file);
m.impl("create_from_tensor", &create_from_tensor);
m.impl("_create_from_file_like", &_create_from_file_like);
Comment on lines +923 to 926
Copy link

Copilot AI Apr 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registering create_from_tensor under BackendSelect is risky because it has a Tensor argument and the implementation assumes host-accessible memory (no video_tensor.is_cpu() check; it is passed into AVIOFromTensorContext which reads via data_ptr()/memcpy). With this dispatch key change, CUDA tensors may no longer fail dispatch and could reach this CPU-only code path. Consider registering create_from_tensor under CPU (and only keeping no-Tensor factory ops like create_from_file / _get_json_ffmpeg_library_versions under BackendSelect), or add a strict CPU device check for video_tensor if BackendSelect is intentional.

Copilot uses AI. Check for mistakes.
m.impl(
"_get_json_ffmpeg_library_versions", &_get_json_ffmpeg_library_versions);
// }
}

// TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
m.impl("encode_audio_to_file", &encode_audio_to_file);
m.impl("encode_audio_to_tensor", &encode_audio_to_tensor);
m.impl("_encode_audio_to_file_like", &_encode_audio_to_file_like);
Expand Down
12 changes: 3 additions & 9 deletions src/torchcodec/decoders/_video_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def __init__(
# if isinstance(device, torch_device):
# device = str(device)
import paddle

if isinstance(device, paddle.base.core.Place):
if device.is_cpu_place():
return "cpu"
Expand All @@ -158,12 +159,11 @@ def __init__(

core.add_video_stream(
self._decoder,
num_threads=num_ffmpeg_threads,
dimension_order=dimension_order,
stream_index=stream_index,
dimension_order=dimension_order,
num_threads=num_ffmpeg_threads,
device=device,
device_variant=device_variant,
transform_specs="",
custom_frame_mappings=custom_frame_mappings_data,
)

Expand Down Expand Up @@ -265,9 +265,6 @@ def get_frames_at(self, indices: Union[torch.Tensor, list[int]]) -> FrameBatch:
FrameBatch: The frames at the given indices.
"""

if isinstance(indices, list):
indices = torch.tensor(indices, dtype=torch.int64).cpu()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这能删吗?你在 CPU 机器上可能是对的,在 GPU 机器上可能就跑到 GPU 上了,这得确认下,CI 测不出来

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

奇怪,我在 3.3.1 都复现不了了,难道是哪个 PR 修复了?当初也没定位到是哪一层的转换与 PyTorch 不一致导致 list[int] 会被默认转换为 GPU Tensor,现在看来不会了


data, pts_seconds, duration_seconds = core.get_frames_at_indices(
self._decoder, frame_indices=indices
)
Expand Down Expand Up @@ -347,9 +344,6 @@ def get_frames_played_at(
FrameBatch: The frames that are played at ``seconds``.
"""

if isinstance(seconds, list):
seconds = torch.tensor(seconds, dtype=torch.float32).cpu()

data, pts_seconds, duration_seconds = core.get_frames_by_pts(
self._decoder, timestamps=seconds
)
Expand Down
2 changes: 1 addition & 1 deletion src/torchcodec/samplers/_index_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def _generic_index_based_sampler(

if kind == "random":
clip_start_indices = torch.randint(
sampling_range_start, sampling_range_end, (num_clips,)
low=sampling_range_start, high=sampling_range_end, size=(num_clips,)
)
else:
# Note [num clips larger than sampling range]
Expand Down
Loading